From 7424a59fa161a22483ea7898f2c5c00bcceaacf1 Mon Sep 17 00:00:00 2001 From: kayomn Date: Wed, 4 Jan 2023 16:04:31 +0000 Subject: [PATCH] Initial commit --- .cvsignore | 16 + .dir-locals.el | 4 + .gdbinit.tmpl | 27 + .gitignore | 19 + .vscode/launch.json | 28 + .vscode/tasks.json | 35 + LICENSE | 24 + Makefile | 246 ++++++ asm.h | 18 + bio.c | 140 ++++ bootasm.S | 81 ++ bootmain.c | 93 +++ buf.h | 14 + cat.c | 39 + config.xlaunch | 2 + console.c | 332 ++++++++ cuth | 48 ++ date.h | 8 + defs.h | 191 +++++ echo.c | 12 + elf.h | 42 + entry.S | 68 ++ entryother.S | 89 +++ exec.c | 142 ++++ fcntl.h | 4 + file.c | 155 ++++ file.h | 37 + forktest.c | 52 ++ fs.c | 649 ++++++++++++++++ fs.h | 57 ++ gdbutil | 291 +++++++ gensyscalls.pl | 102 +++ grep.c | 109 +++ ide.c | 171 +++++ init.c | 35 + initcode.S | 32 + ioapic.c | 68 ++ kalloc.c | 93 +++ kbd.c | 51 ++ kbd.h | 112 +++ kernel.ld | 64 ++ kill.c | 16 + lapic.c | 218 ++++++ ln.c | 14 + log.c | 234 ++++++ ls.c | 85 +++ main.c | 111 +++ memide.c | 60 ++ memlayout.h | 15 + mkdir.c | 21 + mkfs.c | 283 +++++++ mmu.h | 181 +++++ mp.c | 141 ++++ mp.h | 56 ++ param.h | 14 + picirq.c | 17 + pipe.c | 122 +++ printf.c | 91 +++ proc.c | 527 +++++++++++++ proc.h | 57 ++ rm.c | 21 + sh.c | 482 ++++++++++++ sign.pl | 19 + sleeplock.c | 48 ++ sleeplock.h | 10 + spinlock.c | 121 +++ spinlock.h | 11 + stat.h | 11 + stressfs.c | 51 ++ string.c | 103 +++ swtch.S | 29 + syscall.c | 95 +++ sysfile.c | 450 +++++++++++ sysproc.c | 79 ++ trap.c | 111 +++ trapasm.S | 32 + traps.h | 38 + types.h | 4 + uart.c | 75 ++ ulib.c | 106 +++ umalloc.c | 95 +++ user.h | 43 ++ usertests.c | 1779 +++++++++++++++++++++++++++++++++++++++++++ vectors.pl | 47 ++ vm.c | 392 ++++++++++ wc.c | 52 ++ x86.h | 148 ++++ zombie.c | 13 + 88 files changed, 10528 insertions(+) create mode 100644 .cvsignore create mode 100644 .dir-locals.el create mode 100644 .gdbinit.tmpl create mode 100644 .gitignore create mode 100644 .vscode/launch.json create mode 100644 .vscode/tasks.json create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 asm.h create mode 100644 bio.c create mode 100644 bootasm.S create mode 100644 bootmain.c create mode 100644 buf.h create mode 100644 cat.c create mode 100644 config.xlaunch create mode 100644 console.c create mode 100644 cuth create mode 100644 date.h create mode 100644 defs.h create mode 100644 echo.c create mode 100644 elf.h create mode 100644 entry.S create mode 100644 entryother.S create mode 100644 exec.c create mode 100644 fcntl.h create mode 100644 file.c create mode 100644 file.h create mode 100644 forktest.c create mode 100644 fs.c create mode 100644 fs.h create mode 100644 gdbutil create mode 100755 gensyscalls.pl create mode 100644 grep.c create mode 100644 ide.c create mode 100644 init.c create mode 100644 initcode.S create mode 100644 ioapic.c create mode 100644 kalloc.c create mode 100644 kbd.c create mode 100644 kbd.h create mode 100644 kernel.ld create mode 100644 kill.c create mode 100644 lapic.c create mode 100644 ln.c create mode 100644 log.c create mode 100644 ls.c create mode 100644 main.c create mode 100644 memide.c create mode 100644 memlayout.h create mode 100644 mkdir.c create mode 100644 mkfs.c create mode 100644 mmu.h create mode 100644 mp.c create mode 100644 mp.h create mode 100644 param.h create mode 100644 picirq.c create mode 100644 pipe.c create mode 100644 printf.c create mode 100644 proc.c create mode 100644 proc.h create mode 100644 rm.c create mode 100644 sh.c create mode 100755 sign.pl create mode 100644 sleeplock.c create mode 100644 sleeplock.h create mode 100644 spinlock.c create mode 100644 spinlock.h create mode 100644 stat.h create mode 100644 stressfs.c create mode 100644 string.c create mode 100644 swtch.S create mode 100644 syscall.c create mode 100644 sysfile.c create mode 100644 sysproc.c create mode 100644 trap.c create mode 100644 trapasm.S create mode 100644 traps.h create mode 100644 types.h create mode 100644 uart.c create mode 100644 ulib.c create mode 100644 umalloc.c create mode 100644 user.h create mode 100644 usertests.c create mode 100755 vectors.pl create mode 100644 vm.c create mode 100644 wc.c create mode 100644 x86.h create mode 100644 zombie.c diff --git a/.cvsignore b/.cvsignore new file mode 100644 index 0000000..081a43c --- /dev/null +++ b/.cvsignore @@ -0,0 +1,16 @@ +*.asm +*.d +*.sym +_* +kernel +user1 +userfs +usertests +xv6.img +vectors.S +bochsout.txt +bootblock +bootother +bootother.out +parport.out +fmt diff --git a/.dir-locals.el b/.dir-locals.el new file mode 100644 index 0000000..da72247 --- /dev/null +++ b/.dir-locals.el @@ -0,0 +1,4 @@ +((c-mode + (indent-tabs-mode . nil) + (c-file-style . "bsd") + (c-basic-offset . 2))) diff --git a/.gdbinit.tmpl b/.gdbinit.tmpl new file mode 100644 index 0000000..f71681a --- /dev/null +++ b/.gdbinit.tmpl @@ -0,0 +1,27 @@ +set $lastcs = -1 + +define hook-stop + # There doesn't seem to be a good way to detect if we're in 16- or + # 32-bit mode, but in 32-bit mode we always run with CS == 8 in the + # kernel and CS == 35 in user space + if $cs == 8 || $cs == 35 + if $lastcs != 8 && $lastcs != 35 + set architecture i386 + end + x/i $pc + else + if $lastcs == -1 || $lastcs == 8 || $lastcs == 35 + set architecture i8086 + end + # Translate the segment:offset into a physical address + printf "[%4x:%4x] ", $cs, $eip + x/i $cs*16+$eip + end + set $lastcs = $cs +end + +echo + target remote localhost:1234\n +target remote localhost:1234 + +echo + symbol-file kernel\n +symbol-file kernel diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..85b11d0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,19 @@ +*~ +_* +*.o +*.d +*.asm +*.sym +*.img +vectors.S +bootblock +entryother +initcode +initcode.out +kernel +kernelmemfs +mkfs +.gdbinit +syscalltable.h +syscall.h +usys.S diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..8653e61 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,28 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "(gdb) Launch", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/kernel", + "args": [], + "stopAtEntry": true, + "cwd": "${workspaceFolder}", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "miDebuggerPath": "/usr/bin/gdb" + } + ] +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..1471a31 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,35 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=733558 + // for the documentation about the tasks.json format + "version": "2.0.0", + "tasks": [ + { + "label": "Build Xv6 operating system", + "type": "shell", + "command": "make", + "problemMatcher": [], + "group": { + "kind": "build", + "isDefault": true + } + }, + { + "label": "Run Xv6 under QEMU", + "type": "shell", + "command": "make qemu", + "problemMatcher": [] + }, + { + "label": "Run Xv6 under QEMU for debugging", + "type": "shell", + "command": "make qemu-gdb", + "problemMatcher": [] + }, + { + "label": "Clean non-source files", + "type": "shell", + "command": "make clean", + "problemMatcher": [] + } +] +} \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..422c0cc --- /dev/null +++ b/LICENSE @@ -0,0 +1,24 @@ +The xv6 software is: + +Copyright (c) 2006-2018 Frans Kaashoek, Robert Morris, Russ Cox, + Massachusetts Institute of Technology + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6a41d37 --- /dev/null +++ b/Makefile @@ -0,0 +1,246 @@ +OBJS = \ + bio.o\ + console.o\ + exec.o\ + file.o\ + fs.o\ + ide.o\ + ioapic.o\ + kalloc.o\ + kbd.o\ + lapic.o\ + log.o\ + main.o\ + mp.o\ + picirq.o\ + pipe.o\ + proc.o\ + sleeplock.o\ + spinlock.o\ + string.o\ + swtch.o\ + syscall.o\ + sysfile.o\ + sysproc.o\ + trapasm.o\ + trap.o\ + uart.o\ + vectors.o\ + vm.o\ + +# Cross-compiling (e.g., on Mac OS X) +# TOOLPREFIX = i386-jos-elf + +# Using native tools (e.g., on X86 Linux) +#TOOLPREFIX = + +# Try to infer the correct TOOLPREFIX if not set +ifndef TOOLPREFIX +TOOLPREFIX := $(shell if i386-jos-elf-objdump -i 2>&1 | grep '^elf32-i386$$' >/dev/null 2>&1; \ + then echo 'i386-jos-elf-'; \ + elif objdump -i 2>&1 | grep 'elf32-i386' >/dev/null 2>&1; \ + then echo ''; \ + else echo "***" 1>&2; \ + echo "*** Error: Couldn't find an i386-*-elf version of GCC/binutils." 1>&2; \ + echo "*** Is the directory with i386-jos-elf-gcc in your PATH?" 1>&2; \ + echo "*** If your i386-*-elf toolchain is installed with a command" 1>&2; \ + echo "*** prefix other than 'i386-jos-elf-', set your TOOLPREFIX" 1>&2; \ + echo "*** environment variable to that prefix and run 'make' again." 1>&2; \ + echo "*** To turn off this error, run 'gmake TOOLPREFIX= ...'." 1>&2; \ + echo "***" 1>&2; exit 1; fi) +endif + +# If the makefile can't find QEMU, specify its path here +# QEMU = qemu-system-i386 + +# Try to infer the correct QEMU +ifndef QEMU +QEMU = $(shell if which qemu > /dev/null; \ + then echo qemu; exit; \ + elif which qemu-system-i386 > /dev/null; \ + then echo qemu-system-i386; exit; \ + elif which qemu-system-x86_64 > /dev/null; \ + then echo qemu-system-x86_64; exit; \ + else \ + qemu=/Applications/Q.app/Contents/MacOS/i386-softmmu.app/Contents/MacOS/i386-softmmu; \ + if test -x $$qemu; then echo $$qemu; exit; fi; fi; \ + echo "***" 1>&2; \ + echo "*** Error: Couldn't find a working QEMU executable." 1>&2; \ + echo "*** Is the directory containing the qemu binary in your PATH" 1>&2; \ + echo "*** or have you tried setting the QEMU variable in Makefile?" 1>&2; \ + echo "***" 1>&2; exit 1) +endif + +CC = $(TOOLPREFIX)gcc +AS = $(TOOLPREFIX)gas +LD = $(TOOLPREFIX)ld +OBJCOPY = $(TOOLPREFIX)objcopy +OBJDUMP = $(TOOLPREFIX)objdump +CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 -Werror -fno-omit-frame-pointer +CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector) +ASFLAGS = -m32 -gdwarf-2 -Wa,-divide +# FreeBSD ld wants ``elf_i386_fbsd'' +LDFLAGS += -m $(shell $(LD) -V | grep elf_i386 2>/dev/null | head -n 1) + +# Disable PIE when possible (for Ubuntu 16.10 toolchain) +ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]no-pie'),) +CFLAGS += -fno-pie -no-pie +endif +ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]nopie'),) +CFLAGS += -fno-pie -nopie +endif + +xv6.img: bootblock kernel + dd if=/dev/zero of=xv6.img count=10000 + dd if=bootblock of=xv6.img conv=notrunc + dd if=kernel of=xv6.img seek=1 conv=notrunc + +xv6memfs.img: bootblock kernelmemfs + dd if=/dev/zero of=xv6memfs.img count=10000 + dd if=bootblock of=xv6memfs.img conv=notrunc + dd if=kernelmemfs of=xv6memfs.img seek=1 conv=notrunc + +bootblock: bootasm.S bootmain.c + $(CC) $(CFLAGS) -fno-pic -O -nostdinc -I. -c bootmain.c + $(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c bootasm.S + $(LD) $(LDFLAGS) -N -e start -Ttext 0x7C00 -o bootblock.o bootasm.o bootmain.o + $(OBJDUMP) -S bootblock.o > bootblock.asm + $(OBJCOPY) -S -O binary -j .text bootblock.o bootblock + # The following line is here since it has been noticed that if you use Explorer to + # copy folders on wsl, sometimees the execute permissions can be removed from perl scripts. + # Uncomment if needed, but it will flag as a change for git. + # chmod +x sign.pl + ./sign.pl bootblock + +syscall.h: gensyscalls.pl + ./gensyscalls.pl -h > syscall.h + +syscalltable.h: gensyscalls.pl + ./gensyscalls.pl -c > syscalltable.h + +usys.S: gensyscalls.pl + ./gensyscalls.pl -a > usys.S + +entryother: entryother.S + $(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c entryother.S + $(LD) $(LDFLAGS) -N -e start -Ttext 0x7000 -o bootblockother.o entryother.o + $(OBJCOPY) -S -O binary -j .text bootblockother.o entryother + $(OBJDUMP) -S bootblockother.o > entryother.asm + +initcode: initcode.S + $(CC) $(CFLAGS) -nostdinc -I. -c initcode.S + $(LD) $(LDFLAGS) -N -e start -Ttext 0 -o initcode.out initcode.o + $(OBJCOPY) -S -O binary initcode.out initcode + $(OBJDUMP) -S initcode.o > initcode.asm + +kernel: syscall.h syscalltable.h $(OBJS) entry.o entryother initcode kernel.ld + $(LD) $(LDFLAGS) -T kernel.ld -o kernel entry.o $(OBJS) -b binary initcode entryother + $(OBJDUMP) -S kernel > kernel.asm + $(OBJDUMP) -t kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernel.sym + +# kernelmemfs is a copy of kernel that maintains the +# disk image in memory instead of writing to a disk. +# This is not so useful for testing persistent storage or +# exploring disk buffering implementations, but it is +# great for testing the kernel on real hardware without +# needing a scratch disk. +MEMFSOBJS = $(filter-out ide.o,$(OBJS)) memide.o +kernelmemfs: $(MEMFSOBJS) entry.o entryother initcode kernel.ld fs.img + $(LD) $(LDFLAGS) -T kernel.ld -o kernelmemfs entry.o $(MEMFSOBJS) -b binary initcode entryother fs.img + $(OBJDUMP) -S kernelmemfs > kernelmemfs.asm + $(OBJDUMP) -t kernelmemfs | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernelmemfs.sym + +tags: $(OBJS) entryother.S _init + etags *.S *.c + +vectors.S: vectors.pl + # chmod +x vectors.pl + ./vectors.pl > vectors.S + +ULIB = ulib.o usys.o printf.o umalloc.o + +_%: %.o $(ULIB) + $(LD) $(LDFLAGS) -N -e _init -Ttext 0 -o $@ $^ + $(OBJDUMP) -S $@ > $*.asm + $(OBJDUMP) -t $@ | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > $*.sym + +_forktest: forktest.o $(ULIB) + # forktest has less library code linked in - needs to be small + # in order to be able to max out the proc table. + $(LD) $(LDFLAGS) -N -e _init -Ttext 0 -o _forktest forktest.o ulib.o usys.o + $(OBJDUMP) -S _forktest > forktest.asm + +mkfs: mkfs.c fs.h + gcc -Werror -Wall -o mkfs mkfs.c + +# Prevent deletion of intermediate files, e.g. cat.o, after first build, so +# that disk image changes after first build are persistent until clean. More +# details: +# http://www.gnu.org/software/make/manual/html_node/Chained-Rules.html +.PRECIOUS: %.o + +UPROGS=\ + _cat\ + _echo\ + _forktest\ + _grep\ + _init\ + _kill\ + _ln\ + _ls\ + _mkdir\ + _rm\ + _sh\ + _stressfs\ + _usertests\ + _wc\ + _zombie\ + +fs.img: mkfs $(UPROGS) + ./mkfs fs.img $(UPROGS) + +-include *.d + +clean: + rm -f *.tex *.dvi *.idx *.aux *.log *.ind *.ilg \ + *.o *.d *.asm *.sym vectors.S bootblock entryother \ + initcode initcode.out kernel xv6.img fs.img kernelmemfs \ + xv6memfs.img mkfs .gdbinit \ + syscall.h syscalltable.h usys.S \ + $(UPROGS) + +# run in emulators + +# try to generate a unique GDB port +GDBPORT = $(shell expr `id -u` % 5000 + 25000) +# QEMU's gdb stub command line changed in 0.11 +QEMUGDB = $(shell if $(QEMU) -help | grep -q '^-gdb'; \ + then echo "-gdb tcp::$(GDBPORT)"; \ + else echo "-s -p $(GDBPORT)"; fi) +ifndef CPUS +CPUS := 2 +endif +QEMUOPTS = -drive file=fs.img,index=1,media=disk,format=raw -drive file=xv6.img,index=0,media=disk,format=raw -smp $(CPUS) -m 512 $(QEMUEXTRA) + +qemu: fs.img xv6.img + $(QEMU) -vga std -serial mon:stdio $(QEMUOPTS) + +qemu-memfs: xv6memfs.img + $(QEMU) -vga std -drive file=xv6memfs.img,index=0,media=disk,format=raw -smp $(CPUS) -m 256 + +qemu-nox: fs.img xv6.img + $(QEMU) -nographic $(QEMUOPTS) + +qemu-curses: fs.img xv6.img + $(QEMU) -curses $(QEMUOPTS) + +.gdbinit: .gdbinit.tmpl + sed "s/localhost:1234/localhost:$(GDBPORT)/" < $^ > $@ + +qemu-gdb: fs.img xv6.img .gdbinit + @echo "*** Now run 'gdb'." 1>&2 + $(QEMU) -vga std -serial mon:stdio $(QEMUOPTS) -S $(QEMUGDB) + +qemu-nox-gdb: fs.img xv6.img .gdbinit + @echo "*** Now run 'gdb'." 1>&2 + $(QEMU) -nographic $(QEMUOPTS) -S $(QEMUGDB) diff --git a/asm.h b/asm.h new file mode 100644 index 0000000..4a92aad --- /dev/null +++ b/asm.h @@ -0,0 +1,18 @@ +// +// assembler macros to create x86 segments +// + +#define SEG_NULLASM \ + .word 0, 0; \ + .byte 0, 0, 0, 0 + +// The 0xC0 means the limit is in 4096-byte units +// and (for executable segments) 32-bit mode. +#define SEG_ASM(type, base, lim) \ + .word(((lim) >> 12) & 0xffff), ((base) & 0xffff); \ + .byte(((base) >> 16) & 0xff), (0x90 | (type)), \ + (0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff) + +#define STA_X 0x8 // Executable segment +#define STA_W 0x2 // Writeable (non-executable segments) +#define STA_R 0x2 // Readable (executable segments) diff --git a/bio.c b/bio.c new file mode 100644 index 0000000..8faaf8c --- /dev/null +++ b/bio.c @@ -0,0 +1,140 @@ +// Buffer cache. +// +// The buffer cache is a linked list of buf structures holding +// cached copies of disk block contents. Caching disk blocks +// in memory reduces the number of disk reads and also provides +// a synchronization point for disk blocks used by multiple processes. +// +// Interface: +// * To get a buffer for a particular disk block, call bread. +// * After changing buffer data, call bwrite to write it to disk. +// * When done with the buffer, call brelse. +// * Do not use the buffer after calling brelse. +// * Only one process at a time can use a buffer, +// so do not keep them longer than necessary. +// +// The implementation uses two state flags internally: +// * B_VALID: the buffer data has been read from the disk. +// * B_DIRTY: the buffer data has been modified +// and needs to be written to disk. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" + +struct { + struct spinlock lock; + struct buf buf[NBUF]; + + // Linked list of all buffers, through prev/next. + // head.next is most recently used. + struct buf head; +} bcache; + +void binit(void) { + struct buf *b; + + initlock(&bcache.lock, "bcache"); + + // Create linked list of buffers + bcache.head.prev = &bcache.head; + bcache.head.next = &bcache.head; + for (b = bcache.buf; b < bcache.buf + NBUF; b++) { + b->next = bcache.head.next; + b->prev = &bcache.head; + initsleeplock(&b->lock, "buffer"); + bcache.head.next->prev = b; + bcache.head.next = b; + } +} + +// Look through buffer cache for block on device dev. +// If not found, allocate a buffer. +// In either case, return locked buffer. + +static struct buf* bget(uint dev, uint blockno) { + struct buf *b; + + acquire(&bcache.lock); + + // Is the block already cached? + for (b = bcache.head.next; b != &bcache.head; b = b->next) { + if (b->dev == dev && b->blockno == blockno) { + b->refcnt++; + release(&bcache.lock); + acquiresleep(&b->lock); + return b; + } + } + + // Not cached; recycle an unused buffer. + // Even if refcnt==0, B_DIRTY indicates a buffer is in use + // because log.c has modified it but not yet committed it. + + for (b = bcache.head.prev; b != &bcache.head; b = b->prev) { + if (b->refcnt == 0 && (b->flags & B_DIRTY) == 0) { + b->dev = dev; + b->blockno = blockno; + b->flags = 0; + b->refcnt = 1; + release(&bcache.lock); + acquiresleep(&b->lock); + return b; + } + } + panic("bget: no buffers"); +} + +// Return a locked buf with the contents of the indicated block. + +struct buf*bread(uint dev, uint blockno) { + struct buf *b; + + b = bget(dev, blockno); + if ((b->flags & B_VALID) == 0) { + iderw(b); + } + return b; +} + +// Write b's contents to disk. Must be locked. + +void bwrite(struct buf *b) { + if (!holdingsleep(&b->lock)) { + panic("bwrite"); + } + b->flags |= B_DIRTY; + iderw(b); +} + +// Release a locked buffer. +// Move to the head of the MRU list. + +void brelse(struct buf *b) { + if (!holdingsleep(&b->lock)) { + panic("brelse"); + } + + releasesleep(&b->lock); + + acquire(&bcache.lock); + b->refcnt--; + if (b->refcnt == 0) { + // no one is waiting for it. + b->next->prev = b->prev; + b->prev->next = b->next; + b->next = bcache.head.next; + b->prev = &bcache.head; + bcache.head.next->prev = b; + bcache.head.next = b; + } + + release(&bcache.lock); +} + + + diff --git a/bootasm.S b/bootasm.S new file mode 100644 index 0000000..260a156 --- /dev/null +++ b/bootasm.S @@ -0,0 +1,81 @@ +#include "asm.h" +#include "memlayout.h" +#include "mmu.h" + +# Start the first CPU: switch to 32-bit protected mode, jump into C. +# The BIOS loads this code from the first sector of the hard disk into +# memory at physical address 0x7c00 and starts executing in real mode +# with %cs=0 %ip=7c00. + +.code16 # Assemble for 16-bit mode +.globl start +start: + cli # BIOS enabled interrupts; disable + + # Zero data segment registers DS, ES, and SS. + xorw %ax,%ax # Set %ax to zero + movw %ax,%ds # -> Data Segment + movw %ax,%es # -> Extra Segment + movw %ax,%ss # -> Stack Segment + + # Physical address line A20 is tied to zero so that the first PCs + # with 2 MB would run software that assumed 1 MB. Undo that. +seta20.1: + inb $0x64,%al # Wait for not busy + testb $0x2,%al + jnz seta20.1 + + movb $0xd1,%al # 0xd1 -> port 0x64 + outb %al,$0x64 + +seta20.2: + inb $0x64,%al # Wait for not busy + testb $0x2,%al + jnz seta20.2 + + movb $0xdf,%al # 0xdf -> port 0x60 + outb %al,$0x60 + + # Switch from real to protected mode. Use a bootstrap GDT that makes + # virtual addresses map directly to physical addresses so that the + # effective memory map doesn't change during the transition. + lgdt gdtdesc + movl %cr0, %eax + orl $CR0_PE, %eax + movl %eax, %cr0 + + # Complete the transition to 32-bit protected mode by using a long jmp + # to reload %cs and %eip. The segment descriptors are set up with no + # translation, so that the mapping is still the identity mapping. + ljmp $(SEG_KCODE<<3), $start32 + +.code32 # Tell assembler to generate 32-bit code now. +start32: + # Set up the protected-mode data segment registers + movw $(SEG_KDATA<<3), %ax # Our data segment selector + movw %ax, %ds # -> DS: Data Segment + movw %ax, %es # -> ES: Extra Segment + movw %ax, %ss # -> SS: Stack Segment + movw $0, %ax # Zero segments not ready for use + movw %ax, %fs # -> FS + movw %ax, %gs # -> GS + + # Set up the stack pointer and call into C. + movl $start, %esp + call bootmain + + # If bootmain returns (it shouldn't), loop. +spin: + jmp spin + + # Bootstrap GDT +.p2align 2 # force 4 byte alignment +gdt: + SEG_NULLASM # null seg + SEG_ASM(STA_X|STA_R, 0x0, 0xffffffff) # code seg + SEG_ASM(STA_W, 0x0, 0xffffffff) # data seg + +gdtdesc: + .word (gdtdesc - gdt - 1) # sizeof(gdt) - 1 + .long gdt # address gdt + diff --git a/bootmain.c b/bootmain.c new file mode 100644 index 0000000..7efd295 --- /dev/null +++ b/bootmain.c @@ -0,0 +1,93 @@ +// Boot loader. +// +// Part of the boot block, along with bootasm.S, which calls bootmain(). +// bootasm.S has put the processor into protected 32-bit mode. +// bootmain() loads an ELF kernel image from the disk starting at +// sector 1 and then jumps to the kernel entry routine. + +#include "types.h" +#include "elf.h" +#include "x86.h" +#include "memlayout.h" + +#define SECTSIZE 512 + +void readseg(uchar*, uint, uint); + +void bootmain(void) { + struct elfhdr *elf; + struct proghdr *ph, *eph; + void (*entry)(void); + uchar* pa; + + elf = (struct elfhdr*)0x10000; // scratch space + + // Read 1st page off disk + readseg((uchar*)elf, 4096, 0); + + // Is this an ELF executable? + if (elf->magic != ELF_MAGIC) { + return; // let bootasm.S handle error + + } + // Load each program segment (ignores ph flags). + ph = (struct proghdr*)((uchar*)elf + elf->phoff); + eph = ph + elf->phnum; + for (; ph < eph; ph++) { + pa = (uchar*)ph->paddr; + readseg(pa, ph->filesz, ph->off); + if (ph->memsz > ph->filesz) { + stosb(pa + ph->filesz, 0, ph->memsz - ph->filesz); + } + } + + // Call the entry point from the ELF header. + // Does not return! + entry = (void (*)(void))(elf->entry); + entry(); +} + +void waitdisk(void) { + // Wait for disk ready. + while ((inb(0x1F7) & 0xC0) != 0x40) { + ; + } +} + +// Read a single sector at offset into dst. +void readsect(void *dst, uint offset) { + // Issue command. + waitdisk(); + outb(0x1F2, 1); // count = 1 + outb(0x1F3, offset); + outb(0x1F4, offset >> 8); + outb(0x1F5, offset >> 16); + outb(0x1F6, (offset >> 24) | 0xE0); + outb(0x1F7, 0x20); // cmd 0x20 - read sectors + + // Read data. + waitdisk(); + insl(0x1F0, dst, SECTSIZE / 4); +} + +// Read 'count' bytes at 'offset' from kernel into physical address 'pa'. +// Might copy more than asked. + +void readseg(uchar* pa, uint count, uint offset) { + uchar* epa; + + epa = pa + count; + + // Round down to sector boundary. + pa -= offset % SECTSIZE; + + // Translate from bytes to sectors; kernel starts at sector 1. + offset = (offset / SECTSIZE) + 1; + + // If this is too slow, we could read lots of sectors at a time. + // We'd write more to memory than asked, but it doesn't matter -- + // we load in increasing order. + for (; pa < epa; pa += SECTSIZE, offset++) { + readsect(pa, offset); + } +} diff --git a/buf.h b/buf.h new file mode 100644 index 0000000..d5d8d4d --- /dev/null +++ b/buf.h @@ -0,0 +1,14 @@ +struct buf { + int flags; + uint dev; + uint blockno; + struct sleeplock lock; + uint refcnt; + struct buf *prev; // LRU cache list + struct buf *next; + struct buf *qnext; // disk queue + uchar data[BSIZE]; +}; +#define B_VALID 0x2 // buffer has been read from disk +#define B_DIRTY 0x4 // buffer needs to be written to disk + diff --git a/cat.c b/cat.c new file mode 100644 index 0000000..b523487 --- /dev/null +++ b/cat.c @@ -0,0 +1,39 @@ +#include "types.h" +#include "stat.h" +#include "user.h" + +char buf[512]; + +void cat(int fd) { + int n; + + while ((n = read(fd, buf, sizeof(buf))) > 0) { + if (write(1, buf, n) != n) { + printf(1, "cat: write error\n"); + exit(); + } + } + if (n < 0) { + printf(1, "cat: read error\n"); + exit(); + } +} + +int main(int argc, char *argv[]) { + int fd, i; + + if (argc <= 1) { + cat(0); + exit(); + } + + for (i = 1; i < argc; i++) { + if ((fd = open(argv[i], 0)) < 0) { + printf(1, "cat: cannot open %s\n", argv[i]); + exit(); + } + cat(fd); + close(fd); + } + exit(); +} diff --git a/config.xlaunch b/config.xlaunch new file mode 100644 index 0000000..63548ab --- /dev/null +++ b/config.xlaunch @@ -0,0 +1,2 @@ + + diff --git a/console.c b/console.c new file mode 100644 index 0000000..a330c97 --- /dev/null +++ b/console.c @@ -0,0 +1,332 @@ +// Console input and output. +// Input is from the keyboard or serial port. +// Output is written to the screen and serial port. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "traps.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "file.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "x86.h" + +#define INPUT_BUF 128 + +struct kbdbuffer { + char buf[INPUT_BUF]; + uint r; // Read index + uint w; // Write index + uint e; // Edit index +}; + +struct kbdbuffer inputBuffer; + +struct kbdbuffer * input = 0; + +#define C(x) ((x) - '@') // Control-x + + + +static void consputc(int); + +static int panicked = 0; + +static struct { + struct spinlock lock; + int locking; +} cons; + +static void printint(int xx, int base, int sign) { + static char digits[] = "0123456789abcdef"; + char buf[16]; + int i; + uint x; + + if (sign && (sign = xx < 0)) { + x = -xx; + } + else { + x = xx; + } + + i = 0; + do { + buf[i++] = digits[x % base]; + } + while ((x /= base) != 0); + + if (sign) { + buf[i++] = '-'; + } + + while (--i >= 0) { + consputc(buf[i]); + } +} + +// Print to the console. only understands %d, %x, %p, %s. +void cprintf(char *fmt, ...) { + int i, c, locking; + uint *argp; + char *s; + + locking = cons.locking; + if (locking) { + acquire(&cons.lock); + } + + if (fmt == 0) { + panic("null fmt"); + } + + argp = (uint*)(void*)(&fmt + 1); + for (i = 0; (c = fmt[i] & 0xff) != 0; i++) { + if (c != '%') { + consputc(c); + continue; + } + c = fmt[++i] & 0xff; + if (c == 0) { + break; + } + switch (c) { + case 'd': + printint(*argp++, 10, 1); + break; + case 'x': + case 'p': + printint(*argp++, 16, 0); + break; + case 's': + if ((s = (char*)*argp++) == 0) { + s = "(null)"; + } + for (; *s; s++) { + consputc(*s); + } + break; + case '%': + consputc('%'); + break; + default: + // Print unknown % sequence to draw attention. + consputc('%'); + consputc(c); + break; + } + } + + if (locking) { + release(&cons.lock); + } +} + +void panic(char *s) { + int i; + uint pcs[10]; + + cli(); + cons.locking = 0; + // use lapiccpunum so that we can call panic from mycpu() + cprintf("lapicid %d: panic: ", lapicid()); + cprintf(s); + cprintf("\n"); + getcallerpcs(&s, pcs); + for (i = 0; i < 10; i++) { + cprintf(" %p", pcs[i]); + } + panicked = 1; // freeze other CPU + for (;;) { + ; + } +} + +#define BACKSPACE 0x100 +#define CRTPORT 0x3d4 +static ushort *crt = (ushort*)P2V(0xb8000); // CGA memory + +static void cgaputc(int c) { + int pos; + + // Cursor position: col + 80*row. + outb(CRTPORT, 14); + pos = inb(CRTPORT + 1) << 8; + outb(CRTPORT, 15); + pos |= inb(CRTPORT + 1); + + if (c == '\n') { + pos += 80 - pos % 80; + } + else if (c == BACKSPACE) { + if (pos > 0) { + --pos; + } + } + else { + crt[pos++] = (c & 0xff) | 0x0700; // black on white + + } + if (pos < 0 || pos > 25 * 80) { + panic("pos under/overflow"); + } + + if ((pos / 80) >= 24) { // Scroll up. + memmove(crt, crt + 80, sizeof(crt[0]) * 23 * 80); + pos -= 80; + memset(crt + pos, 0, sizeof(crt[0]) * (24 * 80 - pos)); + } + + outb(CRTPORT, 14); + outb(CRTPORT + 1, pos >> 8); + outb(CRTPORT, 15); + outb(CRTPORT + 1, pos); + crt[pos] = ' ' | 0x0700; +} + +void consputc(int c) { + if (panicked) { + cli(); + for (;;) { + ; + } + } + + if (c == BACKSPACE) { + uartputc('\b'); + uartputc(' '); + uartputc('\b'); + } + else { + uartputc(c); + } + cgaputc(c); +} + +int consoleget(void) { + int c; + + acquire(&cons.lock); + + while ((c = kbdgetc()) <= 0) { + if (c == 0) { + c = kbdgetc(); + } + } + + release(&cons.lock); + + return c; +} + +void consoleintr(int (*getc)(void)) { + int c, doprocdump = 0; + + acquire(&cons.lock); + while ((c = getc()) >= 0) { + switch (c) { + case C('P'): // Process listing. + // procdump() locks cons.lock indirectly; invoke later + doprocdump = 1; + break; + case C('U'): // Kill line. + while (input->e != input->w && + input->buf[(input->e - 1) % INPUT_BUF] != '\n') { + input->e--; + consputc(BACKSPACE); + } + break; + case C('H'): + case '\x7f': // Backspace + if (input->e != input->w) { + input->e--; + consputc(BACKSPACE); + } + break; + default: + if (c != 0 && input->e - input->r < INPUT_BUF) { + c = (c == '\r') ? '\n' : c; + input->buf[input->e++ % INPUT_BUF] = c; + consputc(c); + if (c == '\n' || c == C('D') || input->e == input->r + INPUT_BUF) { + input->w = input->e; + wakeup(&(input->r)); + } + } + break; + } + } + release(&cons.lock); + if (doprocdump) { + procdump(); // now call procdump() wo. cons.lock held + } +} + +int consoleread(struct inode *ip, char *dst, int n) { + uint target; + int c; + + iunlock(ip); + target = n; + acquire(&cons.lock); + while (n > 0) { + while (input->r == input->w) { + if (myproc()->killed) { + release(&cons.lock); + ilock(ip); + return -1; + } + sleep(&(input->r), &cons.lock); + } + c = input->buf[input->r++ % INPUT_BUF]; + if (c == C('D')) { // EOF + if (n < target) { + // Save ^D for next time, to make sure + // caller gets a 0-byte result. + input->r--; + } + break; + } + *dst++ = c; + --n; + if (c == '\n') { + break; + } + } + release(&cons.lock); + ilock(ip); + + return target - n; +} + +int consolewrite(struct inode *ip, char *buf, int n) { + int i; + + iunlock(ip); + acquire(&cons.lock); + for (i = 0; i < n; i++) { + consputc(buf[i] & 0xff); + } + release(&cons.lock); + ilock(ip); + + return n; +} + +void consoleinit(void) { + initlock(&cons.lock, "console"); + + // Initialise pointer to point to our console input buffer + input = &inputBuffer; + + devsw[CONSOLE].write = consolewrite; + devsw[CONSOLE].read = consoleread; + cons.locking = 1; + + ioapicenable(IRQ_KBD, 0); +} + diff --git a/cuth b/cuth new file mode 100644 index 0000000..cce8c0c --- /dev/null +++ b/cuth @@ -0,0 +1,48 @@ +#!/usr/bin/perl + +$| = 1; + +sub writefile($@){ + my ($file, @lines) = @_; + + sleep(1); + open(F, ">$file") || die "open >$file: $!"; + print F @lines; + close(F); +} + +# Cut out #include lines that don't contribute anything. +for($i=0; $i<@ARGV; $i++){ + $file = $ARGV[$i]; + if(!open(F, $file)){ + print STDERR "open $file: $!\n"; + next; + } + @lines = ; + close(F); + + $obj = "$file.o"; + $obj =~ s/\.c\.o$/.o/; + system("touch $file"); + + if(system("make CC='gcc -Werror' $obj >/dev/null 2>\&1") != 0){ + print STDERR "make $obj failed: $rv\n"; + next; + } + + system("cp $file =$file"); + for($j=@lines-1; $j>=0; $j--){ + if($lines[$j] =~ /^#include/){ + $old = $lines[$j]; + $lines[$j] = "/* CUT-H */\n"; + writefile($file, @lines); + if(system("make CC='gcc -Werror' $obj >/dev/null 2>\&1") != 0){ + $lines[$j] = $old; + }else{ + print STDERR "$file $old"; + } + } + } + writefile($file, grep {!/CUT-H/} @lines); + system("rm =$file"); +} diff --git a/date.h b/date.h new file mode 100644 index 0000000..f2e81b0 --- /dev/null +++ b/date.h @@ -0,0 +1,8 @@ +struct rtcdate { + uint second; + uint minute; + uint hour; + uint day; + uint month; + uint year; +}; diff --git a/defs.h b/defs.h new file mode 100644 index 0000000..02710a1 --- /dev/null +++ b/defs.h @@ -0,0 +1,191 @@ +struct buf; +struct context; +struct file; +struct inode; +struct pipe; +struct proc; +struct rtcdate; +struct spinlock; +struct sleeplock; +struct stat; +struct superblock; + +// bio.c +void binit(void); +struct buf* bread(uint, uint); +void brelse(struct buf*); +void bwrite(struct buf*); + +// console.c +void consoleinit(void); +void cprintf(char*, ...); +void consoleintr(int (*)(void)); +int consoleget(void); +void panic(char*) __attribute__((noreturn)); + +// exec.c +int exec(char*, char**); + +// file.c +struct file* filealloc(void); +void fileclose(struct file*); +struct file* filedup(struct file*); +void fileinit(void); +int fileread(struct file*, char*, int n); +int filestat(struct file*, struct stat*); +int filewrite(struct file*, char*, int n); + +// fs.c +void readsb(int dev, struct superblock *sb); +int dirlink(struct inode*, char*, uint); +struct inode* dirlookup(struct inode*, char*, uint*); +struct inode* ialloc(uint, short); +struct inode* idup(struct inode*); +void iinit(int dev); +void ilock(struct inode*); +void iput(struct inode*); +void iunlock(struct inode*); +void iunlockput(struct inode*); +void iupdate(struct inode*); +int namecmp(const char*, const char*); +struct inode* namei(char*); +struct inode* nameiparent(char*, char*); +int readi(struct inode*, char*, uint, uint); +void stati(struct inode*, struct stat*); +int writei(struct inode*, char*, uint, uint); + +// ide.c +void ideinit(void); +void ideintr(void); +void iderw(struct buf*); + +// ioapic.c +void ioapicenable(int irq, int cpu); +extern uchar ioapicid; +void ioapicinit(void); + +// kalloc.c +char* kalloc(void); +void kfree(char*); +void kinit1(void*, void*); +void kinit2(void*, void*); + +// kbd.c +void kbdintr(void); +int kbdgetc(void); + +// lapic.c +void cmostime(struct rtcdate *r); +int lapicid(void); +extern volatile uint* lapic; +void lapiceoi(void); +void lapicinit(void); +void lapicstartap(uchar, uint); +void microdelay(int); + +// log.c +void initlog(int dev); +void log_write(struct buf*); +void begin_op(); +void end_op(); + +// mp.c +extern int ismp; +void mpinit(void); + +// picirq.c +void picenable(int); +void picinit(void); + +// pipe.c +int pipealloc(struct file**, struct file**); +void pipeclose(struct pipe*, int); +int piperead(struct pipe*, char*, int); +int pipewrite(struct pipe*, char*, int); + +// proc.c +int cpuid(void); +void exit(void); +int fork(void); +int growproc(int); +int kill(int); +struct cpu* mycpu(void); +struct proc* myproc(); +void pinit(void); +void procdump(void); +void scheduler(void) __attribute__((noreturn)); +void sched(void); +void setproc(struct proc*); +void sleep(void*, struct spinlock*); +void userinit(void); +int wait(void); +void wakeup(void*); +void yield(void); + +// swtch.S +void swtch(struct context**, struct context*); + +// spinlock.c +void acquire(struct spinlock*); +void getcallerpcs(void*, uint*); +int holding(struct spinlock*); +void initlock(struct spinlock*, char*); +void release(struct spinlock*); +void pushcli(void); +void popcli(void); + +// sleeplock.c +void acquiresleep(struct sleeplock*); +void releasesleep(struct sleeplock*); +int holdingsleep(struct sleeplock*); +void initsleeplock(struct sleeplock*, char*); + +// string.c +int memcmp(const void*, const void*, uint); +void* memmove(void*, const void*, uint); +void* memset(void*, int, uint); +char* safestrcpy(char*, const char*, int); +int strlen(const char*); +int strncmp(const char*, const char*, uint); +char* strncpy(char*, const char*, int); + +// syscall.c +int argint(int, int*); +int argptr(int, char**, int); +int argstr(int, char**); +int fetchint(uint, int*); +int fetchstr(uint, char**); +void syscall(void); + +// timer.c +void timerinit(void); + +// trap.c +void idtinit(void); +extern uint ticks; +void tvinit(void); +extern struct spinlock tickslock; + +// uart.c +void uartinit(void); +void uartintr(void); +void uartputc(int); + +// vm.c +void seginit(void); +void kvmalloc(void); +pde_t* setupkvm(void); +char* uva2ka(pde_t*, char*); +int allocuvm(pde_t*, uint, uint); +int deallocuvm(pde_t*, uint, uint); +void freevm(pde_t*); +void inituvm(pde_t*, char*, uint); +int loaduvm(pde_t*, char*, struct inode*, uint, uint); +pde_t* copyuvm(pde_t*, uint); +void switchuvm(struct proc*); +void switchkvm(void); +int copyout(pde_t*, uint, void*, uint); +void clearpteu(pde_t *pgdir, char *uva); + +// number of elements in fixed-size array +#define NELEM(x) (sizeof(x) / sizeof((x)[0])) diff --git a/echo.c b/echo.c new file mode 100644 index 0000000..89b105c --- /dev/null +++ b/echo.c @@ -0,0 +1,12 @@ +#include "types.h" +#include "stat.h" +#include "user.h" + +int main(int argc, char *argv[]) { + int i; + + for (i = 1; i < argc; i++) { + printf(1, "%s%s", argv[i], i + 1 < argc ? " " : "\n"); + } + exit(); +} diff --git a/elf.h b/elf.h new file mode 100644 index 0000000..0a312a5 --- /dev/null +++ b/elf.h @@ -0,0 +1,42 @@ +// Format of an ELF executable file + +#define ELF_MAGIC 0x464C457FU // "\x7FELF" in little endian + +// File header +struct elfhdr { + uint magic; // must equal ELF_MAGIC + uchar elf[12]; + ushort type; + ushort machine; + uint version; + uint entry; + uint phoff; + uint shoff; + uint flags; + ushort ehsize; + ushort phentsize; + ushort phnum; + ushort shentsize; + ushort shnum; + ushort shstrndx; +}; + +// Program section header +struct proghdr { + uint type; + uint off; + uint vaddr; + uint paddr; + uint filesz; + uint memsz; + uint flags; + uint align; +}; + +// Values for Proghdr type +#define ELF_PROG_LOAD 1 + +// Flag bits for Proghdr flags +#define ELF_PROG_FLAG_EXEC 1 +#define ELF_PROG_FLAG_WRITE 2 +#define ELF_PROG_FLAG_READ 4 diff --git a/entry.S b/entry.S new file mode 100644 index 0000000..2d9504d --- /dev/null +++ b/entry.S @@ -0,0 +1,68 @@ +# The xv6 kernel starts executing in this file. This file is linked with +# the kernel C code, so it can refer to kernel symbols such as main(). +# The boot block (bootasm.S and bootmain.c) jumps to entry below. + +# Multiboot header, for multiboot boot loaders like GNU Grub. +# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html +# +# Using GRUB 2, you can boot xv6 from a file stored in a +# Linux file system by copying kernel or kernelmemfs to /boot +# and then adding this menu entry: +# +# menuentry "xv6" { +# insmod ext2 +# set root='(hd0,msdos1)' +# set kernel='/boot/kernel' +# echo "Loading ${kernel}..." +# multiboot ${kernel} ${kernel} +# boot +# } + +#include "asm.h" +#include "memlayout.h" +#include "mmu.h" +#include "param.h" + +# Multiboot header. Data to direct multiboot loader. +.p2align 2 +.text +.globl multiboot_header +multiboot_header: + #define magic 0x1badb002 + #define flags 0 + .long magic + .long flags + .long (-magic-flags) + +# By convention, the _start symbol specifies the ELF entry point. +# Since we haven't set up virtual memory yet, our entry point is +# the physical address of 'entry'. +.globl _start +_start = V2P_WO(entry) + +# Entering xv6 on boot processor, with paging off. +.globl entry +entry: + # Turn on page size extension for 4Mbyte pages + movl %cr4, %eax + orl $(CR4_PSE), %eax + movl %eax, %cr4 + # Set page directory + movl $(V2P_WO(entrypgdir)), %eax + movl %eax, %cr3 + # Turn on paging. + movl %cr0, %eax + orl $(CR0_PG|CR0_WP), %eax + movl %eax, %cr0 + + # Set up the stack pointer. + movl $(stack + KSTACKSIZE), %esp + + # Jump to main(), and switch to executing at + # high addresses. The indirect call is needed because + # the assembler produces a PC-relative instruction + # for a direct jump. + mov $main, %eax + jmp *%eax + +.comm stack, KSTACKSIZE diff --git a/entryother.S b/entryother.S new file mode 100644 index 0000000..93aea8e --- /dev/null +++ b/entryother.S @@ -0,0 +1,89 @@ +#include "asm.h" +#include "memlayout.h" +#include "mmu.h" + +# Each non-boot CPU ("AP") is started up in response to a STARTUP +# IPI from the boot CPU. Section B.4.2 of the Multi-Processor +# Specification says that the AP will start in real mode with CS:IP +# set to XY00:0000, where XY is an 8-bit value sent with the +# STARTUP. Thus this code must start at a 4096-byte boundary. +# +# Because this code sets DS to zero, it must sit +# at an address in the low 2^16 bytes. +# +# Startothers (in main.c) sends the STARTUPs one at a time. +# It copies this code (start) at 0x7000. It puts the address of +# a newly allocated per-core stack in start-4,the address of the +# place to jump to (mpenter) in start-8, and the physical address +# of entrypgdir in start-12. +# +# This code combines elements of bootasm.S and entry.S. + +.code16 +.globl start +start: + cli + + # Zero data segment registers DS, ES, and SS. + xorw %ax,%ax + movw %ax,%ds + movw %ax,%es + movw %ax,%ss + + # Switch from real to protected mode. Use a bootstrap GDT that makes + # virtual addresses map directly to physical addresses so that the + # effective memory map doesn't change during the transition. + lgdt gdtdesc + movl %cr0, %eax + orl $CR0_PE, %eax + movl %eax, %cr0 + + # Complete the transition to 32-bit protected mode by using a long jmp + # to reload %cs and %eip. The segment descriptors are set up with no + # translation, so that the mapping is still the identity mapping. + ljmpl $(SEG_KCODE<<3), $(start32) + + +.code32 # Tell assembler to generate 32-bit code now. +start32: + # Set up the protected-mode data segment registers + movw $(SEG_KDATA<<3), %ax # Our data segment selector + movw %ax, %ds # -> DS: Data Segment + movw %ax, %es # -> ES: Extra Segment + movw %ax, %ss # -> SS: Stack Segment + movw $0, %ax # Zero segments not ready for use + movw %ax, %fs # -> FS + movw %ax, %gs # -> GS + + # Turn on page size extension for 4Mbyte pages + movl %cr4, %eax + orl $(CR4_PSE), %eax + movl %eax, %cr4 + # Use entrypgdir as our initial page table + movl (start-12), %eax + movl %eax, %cr3 + # Turn on paging. + movl %cr0, %eax + orl $(CR0_PE|CR0_PG|CR0_WP), %eax + movl %eax, %cr0 + + # Switch to the stack allocated by startothers() + movl (start-4), %esp + # Call mpenter() + call *(start-8) + + # We should never return. +spin: + jmp spin + +.p2align 2 +gdt: + SEG_NULLASM + SEG_ASM(STA_X|STA_R, 0, 0xffffffff) + SEG_ASM(STA_W, 0, 0xffffffff) + + +gdtdesc: + .word (gdtdesc - gdt - 1) + .long gdt + diff --git a/exec.c b/exec.c new file mode 100644 index 0000000..73c54b1 --- /dev/null +++ b/exec.c @@ -0,0 +1,142 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "defs.h" +#include "x86.h" +#include "elf.h" + +void cleanupexec(pde_t * pgdir, struct inode *ip) { + if (pgdir) { + freevm(pgdir); + } + if (ip) { + iunlockput(ip); + end_op(); + } +} + +int exec(char *path, char **argv) { + char *s, *last; + int i, off; + uint argc, sz, sp, ustack[3 + MAXARG + 1]; + struct elfhdr elf; + struct inode *ip; + struct proghdr ph; + pde_t *pgdir, *oldpgdir; + struct proc *curproc = myproc(); + + begin_op(); + + if ((ip = namei(path)) == 0) { + end_op(); + cprintf("exec: fail\n"); + return -1; + } + ilock(ip); + pgdir = 0; + + // Check ELF header + if (readi(ip, (char*)&elf, 0, sizeof(elf)) != sizeof(elf)) { + cleanupexec(pgdir, ip); + return -1; + } + if (elf.magic != ELF_MAGIC) { + cleanupexec(pgdir, ip); + return -1; + } + + if ((pgdir = setupkvm()) == 0) { + cleanupexec(pgdir, ip); + return -1; + } + + // Load program into memory. + sz = 0; + for (i = 0, off = elf.phoff; i < elf.phnum; i++, off += sizeof(ph)) { + if (readi(ip, (char*)&ph, off, sizeof(ph)) != sizeof(ph)) { + cleanupexec(pgdir, ip); + return -1; + } + if (ph.type != ELF_PROG_LOAD) { + continue; + } + if (ph.memsz < ph.filesz) { + cleanupexec(pgdir, ip); + return -1; + } + if (ph.vaddr + ph.memsz < ph.vaddr) { + cleanupexec(pgdir, ip); + return -1; + } + if ((sz = allocuvm(pgdir, sz, ph.vaddr + ph.memsz)) == 0) { + cleanupexec(pgdir, ip); + return -1; + } + if (ph.vaddr % PGSIZE != 0) { + cleanupexec(pgdir, ip); + return -1; + } + if (loaduvm(pgdir, (char*)ph.vaddr, ip, ph.off, ph.filesz) < 0) { + cleanupexec(pgdir, ip); + return -1; + } + } + iunlockput(ip); + end_op(); + ip = 0; + + // Allocate two pages at the next page boundary. + // Make the first inaccessible. Use the second as the user stack. + sz = PGROUNDUP(sz); + if ((sz = allocuvm(pgdir, sz, sz + 2 * PGSIZE)) == 0) { + cleanupexec(pgdir, ip); + return -1; + } + clearpteu(pgdir, (char*)(sz - 2 * PGSIZE)); + sp = sz; + + // Push argument strings, prepare rest of stack in ustack. + for (argc = 0; argv[argc]; argc++) { + if (argc >= MAXARG) { + cleanupexec(pgdir, ip); + return -1; + } + sp = (sp - (strlen(argv[argc]) + 1)) & ~3; + if (copyout(pgdir, sp, argv[argc], strlen(argv[argc]) + 1) < 0) { + cleanupexec(pgdir, ip); + return -1; + } + ustack[3 + argc] = sp; + } + ustack[3 + argc] = 0; + + ustack[0] = 0xffffffff; // fake return PC + ustack[1] = argc; + ustack[2] = sp - (argc + 1) * 4; // argv pointer + + sp -= (3 + argc + 1) * 4; + if (copyout(pgdir, sp, ustack, (3 + argc + 1) * 4) < 0) { + cleanupexec(pgdir, ip); + return -1; + } + + // Save program name for debugging. + for (last = s = path; *s; s++) { + if (*s == '/') { + last = s + 1; + } + } + safestrcpy(curproc->name, last, sizeof(curproc->name)); + + // Commit to the user image. + oldpgdir = curproc->pgdir; + curproc->pgdir = pgdir; + curproc->sz = sz; + curproc->tf->eip = elf.entry; // main + curproc->tf->esp = sp; + switchuvm(curproc); + freevm(oldpgdir); + return 0; +} diff --git a/fcntl.h b/fcntl.h new file mode 100644 index 0000000..d565483 --- /dev/null +++ b/fcntl.h @@ -0,0 +1,4 @@ +#define O_RDONLY 0x000 +#define O_WRONLY 0x001 +#define O_RDWR 0x002 +#define O_CREATE 0x200 diff --git a/file.c b/file.c new file mode 100644 index 0000000..bbd83ab --- /dev/null +++ b/file.c @@ -0,0 +1,155 @@ +// +// File descriptors +// + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "fs.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "file.h" + +struct devsw devsw[NDEV]; +struct { + struct spinlock lock; + struct file file[NFILE]; +} ftable; + +void fileinit(void) { + initlock(&ftable.lock, "ftable"); +} + +// Allocate a file structure. +struct file* filealloc(void) { + struct file *f; + + acquire(&ftable.lock); + for (f = ftable.file; f < ftable.file + NFILE; f++) { + if (f->ref == 0) { + f->ref = 1; + release(&ftable.lock); + return f; + } + } + release(&ftable.lock); + return 0; +} + +// Increment ref count for file f. +struct file* filedup(struct file *f) { + acquire(&ftable.lock); + if (f->ref < 1) { + panic("filedup"); + } + f->ref++; + release(&ftable.lock); + return f; +} + +// Close file f. (Decrement ref count, close when reaches 0.) +void fileclose(struct file *f) { + struct file ff; + + acquire(&ftable.lock); + if (f->ref < 1) { + panic("fileclose"); + } + if (--f->ref > 0) { + release(&ftable.lock); + return; + } + ff = *f; + f->ref = 0; + f->type = FD_NONE; + release(&ftable.lock); + + if (ff.type == FD_PIPE) { + pipeclose(ff.pipe, ff.writable); + } + else if (ff.type == FD_INODE) { + begin_op(); + iput(ff.ip); + end_op(); + } +} + +// Get metadata about file f. +int filestat(struct file *f, struct stat *st) { + if (f->type == FD_INODE) { + ilock(f->ip); + stati(f->ip, st); + iunlock(f->ip); + return 0; + } + return -1; +} + +// Read from file f. +int fileread(struct file *f, char *addr, int n) { + int r; + + if (f->readable == 0) { + return -1; + } + if (f->type == FD_PIPE) { + return piperead(f->pipe, addr, n); + } + if (f->type == FD_INODE) { + ilock(f->ip); + if ((r = readi(f->ip, addr, f->off, n)) > 0) { + f->off += r; + } + iunlock(f->ip); + return r; + } + panic("fileread"); +} + + +// Write to file f. +int filewrite(struct file *f, char *addr, int n) { + int r; + + if (f->writable == 0) { + return -1; + } + if (f->type == FD_PIPE) { + return pipewrite(f->pipe, addr, n); + } + if (f->type == FD_INODE) { + // write a few blocks at a time to avoid exceeding + // the maximum log transaction size, including + // i-node, indirect block, allocation blocks, + // and 2 blocks of slop for non-aligned writes. + // this really belongs lower down, since writei() + // might be writing a device like the console. + int max = ((MAXOPBLOCKS - 1 - 1 - 2) / 2) * 512; + int i = 0; + while (i < n) { + int n1 = n - i; + if (n1 > max) { + n1 = max; + } + + begin_op(); + ilock(f->ip); + if ((r = writei(f->ip, addr + i, f->off, n1)) > 0) { + f->off += r; + } + iunlock(f->ip); + end_op(); + + if (r < 0) { + break; + } + if (r != n1) { + panic("short filewrite"); + } + i += r; + } + return i == n ? n : -1; + } + panic("filewrite"); +} + diff --git a/file.h b/file.h new file mode 100644 index 0000000..7ffe18b --- /dev/null +++ b/file.h @@ -0,0 +1,37 @@ +struct file { + enum { FD_NONE, FD_PIPE, FD_INODE } type; + int ref; // reference count + char readable; + char writable; + struct pipe *pipe; + struct inode *ip; + uint off; +}; + + +// in-memory copy of an inode +struct inode { + uint dev; // Device number + uint inum; // Inode number + int ref; // Reference count + struct sleeplock lock; // protects everything below here + int valid; // inode has been read from disk? + + short type; // copy of disk inode + short major; + short minor; + short nlink; + uint size; + uint addrs[NDIRECT + 1]; +}; + +// table mapping major device number to +// device functions +struct devsw { + int (*read)(struct inode*, char*, int); + int (*write)(struct inode*, char*, int); +}; + +extern struct devsw devsw[]; + +#define CONSOLE 1 diff --git a/forktest.c b/forktest.c new file mode 100644 index 0000000..89c6d91 --- /dev/null +++ b/forktest.c @@ -0,0 +1,52 @@ +// Test that fork fails gracefully. +// Tiny executable so that the limit can be filling the proc table. + +#include "types.h" +#include "stat.h" +#include "user.h" + +#define N 1000 + +void printf(int fd, const char *s, ...) { + write(fd, s, strlen(s)); +} + +void forktest(void) { + int n, pid; + + printf(1, "fork test\n"); + + for (n = 0; n < N; n++) { + pid = fork(); + if (pid < 0) { + break; + } + if (pid == 0) { + exit(); + } + } + + if (n == N) { + printf(1, "fork claimed to work N times!\n", N); + exit(); + } + + for (; n > 0; n--) { + if (wait() < 0) { + printf(1, "wait stopped early\n"); + exit(); + } + } + + if (wait() != -1) { + printf(1, "wait got too many\n"); + exit(); + } + + printf(1, "fork test OK\n"); +} + +int main(int argc, char* argv[]) { + forktest(); + exit(); +} diff --git a/fs.c b/fs.c new file mode 100644 index 0000000..9ada0f2 --- /dev/null +++ b/fs.c @@ -0,0 +1,649 @@ +// File system implementation. Five layers: +// + Blocks: allocator for raw disk blocks. +// + Log: crash recovery for multi-step updates. +// + Files: inode allocator, reading, writing, metadata. +// + Directories: inode with special contents (list of other inodes!) +// + Names: paths like /usr/rtm/xv6/fs.c for convenient naming. +// +// This file contains the low-level file system manipulation +// routines. The (higher-level) system call implementations +// are in sysfile.c. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "stat.h" +#include "mmu.h" +#include "proc.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" +#include "file.h" + +#define min(a, b) ((a) < (b) ? (a) : (b)) + +static void itrunc(struct inode*); +// there should be one superblock per disk device, but we run with +// only one device +struct superblock sb; + +// Read the super block. +void readsb(int dev, struct superblock *sb) { + struct buf *bp; + + bp = bread(dev, 1); + memmove(sb, bp->data, sizeof(*sb)); + brelse(bp); +} + +// Zero a block. +static void bzero(int dev, int bno) { + struct buf *bp; + + bp = bread(dev, bno); + memset(bp->data, 0, BSIZE); + log_write(bp); + brelse(bp); +} + +// Blocks. + +// Allocate a zeroed disk block. +static uint balloc(uint dev) { + int b, bi, m; + struct buf *bp; + + bp = 0; + for (b = 0; b < sb.size; b += BPB) { + bp = bread(dev, BBLOCK(b, sb)); + for (bi = 0; bi < BPB && b + bi < sb.size; bi++) { + m = 1 << (bi % 8); + if ((bp->data[bi / 8] & m) == 0) { // Is block free? + bp->data[bi / 8] |= m; // Mark block in use. + log_write(bp); + brelse(bp); + bzero(dev, b + bi); + return b + bi; + } + } + brelse(bp); + } + panic("balloc: out of blocks"); +} + +// Free a disk block. +static void bfree(int dev, uint b) { + struct buf *bp; + int bi, m; + + bp = bread(dev, BBLOCK(b, sb)); + bi = b % BPB; + m = 1 << (bi % 8); + if ((bp->data[bi / 8] & m) == 0) { + panic("freeing free block"); + } + bp->data[bi / 8] &= ~m; + log_write(bp); + brelse(bp); +} + +// Inodes. +// +// An inode describes a single unnamed file. +// The inode disk structure holds metadata: the file's type, +// its size, the number of links referring to it, and the +// list of blocks holding the file's content. +// +// The inodes are laid out sequentially on disk at +// sb.startinode. Each inode has a number, indicating its +// position on the disk. +// +// The kernel keeps a cache of in-use inodes in memory +// to provide a place for synchronizing access +// to inodes used by multiple processes. The cached +// inodes include book-keeping information that is +// not stored on disk: ip->ref and ip->valid. +// +// An inode and its in-memory representation go through a +// sequence of states before they can be used by the +// rest of the file system code. +// +// * Allocation: an inode is allocated if its type (on disk) +// is non-zero. ialloc() allocates, and iput() frees if +// the reference and link counts have fallen to zero. +// +// * Referencing in cache: an entry in the inode cache +// is free if ip->ref is zero. Otherwise ip->ref tracks +// the number of in-memory pointers to the entry (open +// files and current directories). iget() finds or +// creates a cache entry and increments its ref; iput() +// decrements ref. +// +// * Valid: the information (type, size, &c) in an inode +// cache entry is only correct when ip->valid is 1. +// ilock() reads the inode from +// the disk and sets ip->valid, while iput() clears +// ip->valid if ip->ref has fallen to zero. +// +// * Locked: file system code may only examine and modify +// the information in an inode and its content if it +// has first locked the inode. +// +// Thus a typical sequence is: +// ip = iget(dev, inum) +// ilock(ip) +// ... examine and modify ip->xxx ... +// iunlock(ip) +// iput(ip) +// +// ilock() is separate from iget() so that system calls can +// get a long-term reference to an inode (as for an open file) +// and only lock it for short periods (e.g., in read()). +// The separation also helps avoid deadlock and races during +// pathname lookup. iget() increments ip->ref so that the inode +// stays cached and pointers to it remain valid. +// +// Many internal file system functions expect the caller to +// have locked the inodes involved; this lets callers create +// multi-step atomic operations. +// +// The icache.lock spin-lock protects the allocation of icache +// entries. Since ip->ref indicates whether an entry is free, +// and ip->dev and ip->inum indicate which i-node an entry +// holds, one must hold icache.lock while using any of those fields. +// +// An ip->lock sleep-lock protects all ip-> fields other than ref, +// dev, and inum. One must hold ip->lock in order to +// read or write that inode's ip->valid, ip->size, ip->type, &c. + +struct { + struct spinlock lock; + struct inode inode[NINODE]; +} icache; + +void iinit(int dev) { + int i = 0; + + initlock(&icache.lock, "icache"); + for (i = 0; i < NINODE; i++) { + initsleeplock(&icache.inode[i].lock, "inode"); + } + + readsb(dev, &sb); + cprintf("sb: size %d nblocks %d ninodes %d nlog %d logstart %d\ + inodestart %d bmap start %d\n", sb.size, sb.nblocks, + sb.ninodes, sb.nlog, sb.logstart, sb.inodestart, + sb.bmapstart); +} + +static struct inode* iget(uint dev, uint inum); + + +// Allocate an inode on device dev. +// Mark it as allocated by giving it type type. +// Returns an unlocked but allocated and referenced inode. +struct inode* ialloc(uint dev, short type) { + int inum; + struct buf *bp; + struct dinode *dip; + + for (inum = 1; inum < sb.ninodes; inum++) { + bp = bread(dev, IBLOCK(inum, sb)); + dip = (struct dinode*)bp->data + inum % IPB; + if (dip->type == 0) { // a free inode + memset(dip, 0, sizeof(*dip)); + dip->type = type; + log_write(bp); // mark it allocated on the disk + brelse(bp); + return iget(dev, inum); + } + brelse(bp); + } + panic("ialloc: no inodes"); +} + +// Copy a modified in-memory inode to disk. +// Must be called after every change to an ip->xxx field +// that lives on disk, since i-node cache is write-through. +// Caller must hold ip->lock. +void iupdate(struct inode *ip) { + struct buf *bp; + struct dinode *dip; + + bp = bread(ip->dev, IBLOCK(ip->inum, sb)); + dip = (struct dinode*)bp->data + ip->inum % IPB; + dip->type = ip->type; + dip->major = ip->major; + dip->minor = ip->minor; + dip->nlink = ip->nlink; + dip->size = ip->size; + memmove(dip->addrs, ip->addrs, sizeof(ip->addrs)); + log_write(bp); + brelse(bp); +} + +// Find the inode with number inum on device dev +// and return the in-memory copy. Does not lock +// the inode and does not read it from disk. +static struct inode* iget(uint dev, uint inum) { + struct inode *ip, *empty; + + acquire(&icache.lock); + + // Is the inode already cached? + empty = 0; + for (ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++) { + if (ip->ref > 0 && ip->dev == dev && ip->inum == inum) { + ip->ref++; + release(&icache.lock); + return ip; + } + if (empty == 0 && ip->ref == 0) { // Remember empty slot. + empty = ip; + } + } + + // Recycle an inode cache entry. + if (empty == 0) { + panic("iget: no inodes"); + } + + ip = empty; + ip->dev = dev; + ip->inum = inum; + ip->ref = 1; + ip->valid = 0; + release(&icache.lock); + + return ip; +} + +// Increment reference count for ip. +// Returns ip to enable ip = idup(ip1) idiom. +struct inode* idup(struct inode *ip) { + acquire(&icache.lock); + ip->ref++; + release(&icache.lock); + return ip; +} + +// Lock the given inode. +// Reads the inode from disk if necessary. +void ilock(struct inode *ip) { + struct buf *bp; + struct dinode *dip; + + if (ip == 0 || ip->ref < 1) { + panic("ilock"); + } + + acquiresleep(&ip->lock); + + if (ip->valid == 0) { + bp = bread(ip->dev, IBLOCK(ip->inum, sb)); + dip = (struct dinode*)bp->data + ip->inum % IPB; + ip->type = dip->type; + ip->major = dip->major; + ip->minor = dip->minor; + ip->nlink = dip->nlink; + ip->size = dip->size; + memmove(ip->addrs, dip->addrs, sizeof(ip->addrs)); + brelse(bp); + ip->valid = 1; + if (ip->type == 0) { + panic("ilock: no type"); + } + } +} + +// Unlock the given inode. +void iunlock(struct inode *ip) { + if (ip == 0 || !holdingsleep(&ip->lock) || ip->ref < 1) { + panic("iunlock"); + } + + releasesleep(&ip->lock); +} + +// Drop a reference to an in-memory inode. +// If that was the last reference, the inode cache entry can +// be recycled. +// If that was the last reference and the inode has no links +// to it, free the inode (and its content) on disk. +// All calls to iput() must be inside a transaction in +// case it has to free the inode. +void iput(struct inode *ip) { + acquiresleep(&ip->lock); + if (ip->valid && ip->nlink == 0) { + acquire(&icache.lock); + int r = ip->ref; + release(&icache.lock); + if (r == 1) { + // inode has no links and no other references: truncate and free. + itrunc(ip); + ip->type = 0; + iupdate(ip); + ip->valid = 0; + } + } + releasesleep(&ip->lock); + + acquire(&icache.lock); + ip->ref--; + release(&icache.lock); +} + +// Common idiom: unlock, then put. +void iunlockput(struct inode *ip) { + iunlock(ip); + iput(ip); +} + + +// Inode content +// +// The content (data) associated with each inode is stored +// in blocks on the disk. The first NDIRECT block numbers +// are listed in ip->addrs[]. The next NINDIRECT blocks are +// listed in block ip->addrs[NDIRECT]. + +// Return the disk block address of the nth block in inode ip. +// If there is no such block, bmap allocates one. +static uint bmap(struct inode *ip, uint bn) { + uint addr, *a; + struct buf *bp; + + if (bn < NDIRECT) { + if ((addr = ip->addrs[bn]) == 0) { + ip->addrs[bn] = addr = balloc(ip->dev); + } + return addr; + } + bn -= NDIRECT; + + if (bn < NINDIRECT) { + // Load indirect block, allocating if necessary. + if ((addr = ip->addrs[NDIRECT]) == 0) { + ip->addrs[NDIRECT] = addr = balloc(ip->dev); + } + bp = bread(ip->dev, addr); + a = (uint*)bp->data; + if ((addr = a[bn]) == 0) { + a[bn] = addr = balloc(ip->dev); + log_write(bp); + } + brelse(bp); + return addr; + } + + panic("bmap: out of range"); +} + +// Truncate inode (discard contents). +// Only called when the inode has no links +// to it (no directory entries referring to it) +// and has no in-memory reference to it (is +// not an open file or current directory). +static void itrunc(struct inode *ip) { + int i, j; + struct buf *bp; + uint *a; + + for (i = 0; i < NDIRECT; i++) { + if (ip->addrs[i]) { + bfree(ip->dev, ip->addrs[i]); + ip->addrs[i] = 0; + } + } + + if (ip->addrs[NDIRECT]) { + bp = bread(ip->dev, ip->addrs[NDIRECT]); + a = (uint*)bp->data; + for (j = 0; j < NINDIRECT; j++) { + if (a[j]) { + bfree(ip->dev, a[j]); + } + } + brelse(bp); + bfree(ip->dev, ip->addrs[NDIRECT]); + ip->addrs[NDIRECT] = 0; + } + + ip->size = 0; + iupdate(ip); +} + +// Copy stat information from inode. +// Caller must hold ip->lock. +void stati(struct inode *ip, struct stat *st) { + st->dev = ip->dev; + st->ino = ip->inum; + st->type = ip->type; + st->nlink = ip->nlink; + st->size = ip->size; +} + + +// Read data from inode. +// Caller must hold ip->lock. +int readi(struct inode *ip, char *dst, uint off, uint n) { + uint tot, m; + struct buf *bp; + + if (ip->type == T_DEV) { + if (ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read) { + return -1; + } + return devsw[ip->major].read(ip, dst, n); + } + + if (off > ip->size || off + n < off) { + return -1; + } + if (off + n > ip->size) { + n = ip->size - off; + } + + for (tot = 0; tot < n; tot += m, off += m, dst += m) { + bp = bread(ip->dev, bmap(ip, off / BSIZE)); + m = min(n - tot, BSIZE - off % BSIZE); + memmove(dst, bp->data + off % BSIZE, m); + brelse(bp); + } + return n; +} + +// Write data to inode. +// Caller must hold ip->lock. +int writei(struct inode *ip, char *src, uint off, uint n) { + uint tot, m; + struct buf *bp; + + if (ip->type == T_DEV) { + if (ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write) { + return -1; + } + return devsw[ip->major].write(ip, src, n); + } + + if (off > ip->size || off + n < off) { + return -1; + } + if (off + n > MAXFILE * BSIZE) { + return -1; + } + + for (tot = 0; tot < n; tot += m, off += m, src += m) { + bp = bread(ip->dev, bmap(ip, off / BSIZE)); + m = min(n - tot, BSIZE - off % BSIZE); + memmove(bp->data + off % BSIZE, src, m); + log_write(bp); + brelse(bp); + } + + if (n > 0 && off > ip->size) { + ip->size = off; + iupdate(ip); + } + return n; +} + + +// Directories + +int namecmp(const char *s, const char *t) { + return strncmp(s, t, DIRSIZ); +} + +// Look for a directory entry in a directory. +// If found, set *poff to byte offset of entry. +struct inode* dirlookup(struct inode *dp, char *name, uint *poff) { + uint off, inum; + struct dirent de; + + if (dp->type != T_DIR) { + panic("dirlookup not DIR"); + } + + for (off = 0; off < dp->size; off += sizeof(de)) { + if (readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) { + panic("dirlookup read"); + } + if (de.inum == 0) { + continue; + } + if (namecmp(name, de.name) == 0) { + // entry matches path element + if (poff) { + *poff = off; + } + inum = de.inum; + return iget(dp->dev, inum); + } + } + + return 0; +} + +// Write a new directory entry (name, inum) into the directory dp. +int dirlink(struct inode *dp, char *name, uint inum) { + int off; + struct dirent de; + struct inode *ip; + + // Check that name is not present. + if ((ip = dirlookup(dp, name, 0)) != 0) { + iput(ip); + return -1; + } + + // Look for an empty dirent. + for (off = 0; off < dp->size; off += sizeof(de)) { + if (readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) { + panic("dirlink read"); + } + if (de.inum == 0) { + break; + } + } + + strncpy(de.name, name, DIRSIZ); + de.inum = inum; + if (writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) { + panic("dirlink"); + } + + return 0; +} + + +// Paths + +// Copy the next path element from path into name. +// Return a pointer to the element following the copied one. +// The returned path has no leading slashes, +// so the caller can check *path=='\0' to see if the name is the last one. +// If no name to remove, return 0. +// +// Examples: +// skipelem("a/bb/c", name) = "bb/c", setting name = "a" +// skipelem("///a//bb", name) = "bb", setting name = "a" +// skipelem("a", name) = "", setting name = "a" +// skipelem("", name) = skipelem("////", name) = 0 +// +static char* skipelem(char *path, char *name) { + char *s; + int len; + + while (*path == '/') { + path++; + } + if (*path == 0) { + return 0; + } + s = path; + while (*path != '/' && *path != 0) { + path++; + } + len = path - s; + if (len >= DIRSIZ) { + memmove(name, s, DIRSIZ); + } + else { + memmove(name, s, len); + name[len] = 0; + } + while (*path == '/') { + path++; + } + return path; +} + +// Look up and return the inode for a path name. +// If parent != 0, return the inode for the parent and copy the final +// path element into name, which must have room for DIRSIZ bytes. +// Must be called inside a transaction since it calls iput(). +static struct inode* namex(char *path, int nameiparent, char *name) { + struct inode *ip, *next; + + if (*path == '/') { + ip = iget(ROOTDEV, ROOTINO); + } + else { + ip = idup(myproc()->cwd); + } + + while ((path = skipelem(path, name)) != 0) { + ilock(ip); + if (ip->type != T_DIR) { + iunlockput(ip); + return 0; + } + if (nameiparent && *path == '\0') { + // Stop one level early. + iunlock(ip); + return ip; + } + if ((next = dirlookup(ip, name, 0)) == 0) { + iunlockput(ip); + return 0; + } + iunlockput(ip); + ip = next; + } + if (nameiparent) { + iput(ip); + return 0; + } + return ip; +} + +struct inode* namei(char *path) { + char name[DIRSIZ]; + return namex(path, 0, name); +} + +struct inode*nameiparent(char *path, char *name) { + return namex(path, 1, name); +} diff --git a/fs.h b/fs.h new file mode 100644 index 0000000..3651c7b --- /dev/null +++ b/fs.h @@ -0,0 +1,57 @@ +// On-disk file system format. +// Both the kernel and user programs use this header file. + + +#define ROOTINO 1 // root i-number +#define BSIZE 512 // block size + +// Disk layout: +// [ boot block | super block | log | inode blocks | +// free bit map | data blocks] +// +// mkfs computes the super block and builds an initial file system. The +// super block describes the disk layout: +struct superblock { + uint size; // Size of file system image (blocks) + uint nblocks; // Number of data blocks + uint ninodes; // Number of inodes. + uint nlog; // Number of log blocks + uint logstart; // Block number of first log block + uint inodestart; // Block number of first inode block + uint bmapstart; // Block number of first free map block +}; + +#define NDIRECT 12 +#define NINDIRECT (BSIZE / sizeof(uint)) +#define MAXFILE (NDIRECT + NINDIRECT) + +// On-disk inode structure +struct dinode { + short type; // File type + short major; // Major device number (T_DEV only) + short minor; // Minor device number (T_DEV only) + short nlink; // Number of links to inode in file system + uint size; // Size of file (bytes) + uint addrs[NDIRECT + 1]; // Data block addresses +}; + +// Inodes per block. +#define IPB (BSIZE / sizeof(struct dinode)) + +// Block containing inode i +#define IBLOCK(i, sb) ((i) / IPB + sb.inodestart) + +// Bitmap bits per block +#define BPB (BSIZE * 8) + +// Block of free map containing bit for block b +#define BBLOCK(b, sb) (b / BPB + sb.bmapstart) + +// Directory is a file containing a sequence of dirent structures. +#define DIRSIZ 14 + +struct dirent { + ushort inum; + char name[DIRSIZ]; +}; + diff --git a/gdbutil b/gdbutil new file mode 100644 index 0000000..e0c362f --- /dev/null +++ b/gdbutil @@ -0,0 +1,291 @@ +# -*- gdb-script -*- + +# Utility functions to pretty-print x86 segment/interrupt descriptors. +# To load this file, run "source gdbutil" in gdb. +# printdesc and printdescs are the main entry points. + +# IA32 2007, Volume 3A, Table 3-2 +set $STS_T16A = 0x1 +set $STS_LDT = 0x2 +set $STS_T16B = 0x3 +set $STS_CG16 = 0x4 +set $STS_TG = 0x5 +set $STS_IG16 = 0x6 +set $STS_TG16 = 0x7 +set $STS_T32A = 0x9 +set $STS_T32B = 0xB +set $STS_CG32 = 0xC +set $STS_IG32 = 0xE +set $STS_TG32 = 0xF + +define outputsts + while 1 + if $arg0 == $STS_T16A + echo STS_T16A + loop_break + end + if $arg0 == $STS_LDT + echo STS_LDT\ + loop_break + end + if $arg0 == $STS_T16B + echo STS_T16B + loop_break + end + if $arg0 == $STS_CG16 + echo STS_CG16 + loop_break + end + if $arg0 == $STS_TG + echo STS_TG\ \ + loop_break + end + if $arg0 == $STS_IG16 + echo STS_IG16 + loop_break + end + if $arg0 == $STS_TG16 + echo STS_TG16 + loop_break + end + if $arg0 == $STS_T32A + echo STS_T32A + loop_break + end + if $arg0 == $STS_T32B + echo STS_T32B + loop_break + end + if $arg0 == $STS_CG32 + echo STS_CG32 + loop_break + end + if $arg0 == $STS_IG32 + echo STS_IG32 + loop_break + end + if $arg0 == $STS_TG32 + echo STS_TG32 + loop_break + end + echo Reserved + loop_break + end +end + +# IA32 2007, Volume 3A, Table 3-1 +set $STA_X = 0x8 +set $STA_E = 0x4 +set $STA_C = 0x4 +set $STA_W = 0x2 +set $STA_R = 0x2 +set $STA_A = 0x1 + +define outputsta + if $arg0 & $STA_X + # Code segment + echo code + if $arg0 & $STA_C + echo |STA_C + end + if $arg0 & $STA_R + echo |STA_R + end + else + # Data segment + echo data + if $arg0 & $STA_E + echo |STA_E + end + if $arg0 & $STA_W + echo |STA_W + end + end + if $arg0 & $STA_A + echo |STA_A + else + printf " " + end +end + +# xv6-specific +set $SEG_KCODE = 1 +set $SEG_KDATA = 2 +set $SEG_KCPU = 3 +set $SEG_UCODE = 4 +set $SEG_UDATA = 5 +set $SEG_TSS = 6 + +define outputcs + if ($arg0 & 4) == 0 + if $arg0 >> 3 == $SEG_KCODE + printf "SEG_KCODE<<3" + end + if $arg0 >> 3 == $SEG_KDATA + printf "SEG_KDATA<<3" + end + if $arg0 >> 3 == $SEG_KCPU + printf "SEG_KCPU<<3" + end + if $arg0 >> 3 == $SEG_UCODE + printf "SEG_UCODE<<3" + end + if $arg0 >> 3 == $SEG_UDATA + printf "SEG_UDATA<<3" + end + if $arg0 >> 3 == $SEG_TSS + printf "SEG_TSS<<3" + end + if ($arg0 >> 3 < 1) + ($arg0 >> 3 > 6) + printf "GDT[%d]", $arg0 >> 3 + end + else + printf "LDT[%d]", $arg0 >> 3 + end + if ($arg0 & 3) > 0 + printf "|" + outputdpl ($arg0&3) + end +end + +define outputdpl + if $arg0 == 0 + printf "DPL_KERN" + else + if $arg0 == 3 + printf "DPL_USER" + else + printf "DPL%d", $arg0 + end + end +end + +define printdesc + if $argc != 1 + echo Usage: printdesc expr + else + _printdesc ((uint*)&($arg0))[0] ((uint*)&($arg0))[1] + printf "\n" + end +end + +document printdesc +Print an x86 segment or gate descriptor. +printdesc EXPR +EXPR must evaluate to a descriptor value. It can be of any C type. +end + +define _printdesc + _printdesc1 $arg0 $arg1 ($arg1>>15&1) ($arg1>>13&3) ($arg1>>12&1) ($arg1>>8&15) +end + +define _printdesc1 + # 2:P 3:DPL 4:S 5:Type + if $arg2 == 0 + printf "P = 0 (Not present)" + else + printf "type = " + if $arg4 == 0 + # System segment + outputsts $arg5 + printf " (0x%x) ", $arg5 + _printsysdesc $arg0 $arg1 $arg5 + else + # Code/data segment + outputsta $arg5 + printf " " + _printsegdesc $arg0 $arg1 + end + + printf " DPL = " + outputdpl $arg3 + printf " (%d)", $arg3 + end +end + +define _printsysdesc + # 2:Type + # GDB's || is buggy + if ($arg2 == $STS_TG) + (($arg2&7) == $STS_IG16) + (($arg2&7) == $STS_TG16) + # Gate descriptor + _printgate $arg2 ($arg0>>16) ($arg0&0xFFFF) ($arg1>>16) + else + # System segment descriptor + _printsegdesc $arg0 $arg1 + end +end + +define _printgate + # IA32 2007, Voume 3A, Figure 5-2 + # 0:Type 1:CS 2:Offset 15..0 3:Offset 31..16 + printf "CS = " + outputcs $arg1 + printf " (%d)", $arg1 + + if (($arg0&7) == $STS_IG16) + (($arg0&7) == $STS_TG16) + printf " Offset = " + output/a $arg3 << 16 | $arg2 + end +end + +define _printsegdesc + # IA32 20007, Volume 3A, Figure 3-8 and Figure 4-1 + _printsegdesc1 ($arg0>>16) ($arg1&0xFF) ($arg1>>24) ($arg0&0xFFFF) ($arg1>>16&15) ($arg1>>23&1) + if ($arg1>>12&1) == 1 + printf " AVL = %d", $arg1>>20&1 + if ($arg1>>11&1) == 0 + # Data segment + if ($arg1>>22&1) == 0 + printf " B = small (0) " + else + printf " B = big (1) " + end + else + # Code segment + printf " D = " + if ($arg1>>22&1) == 0 + printf "16-bit (0)" + else + printf "32-bit (1)" + end + end + end +end + +define _printsegdesc1 + # 0:Base 0..15 1:Base 16..23 2:Base 24..32 3:Limit 0..15 4:Limit 16..19 5:G + printf "base = 0x%08x", $arg0 | ($arg1<<16) | ($arg2<<24) + printf " limit = 0x" + if $arg5 == 0 + printf "%08x", $arg3 | ($arg4<<16) + else + printf "%08x", (($arg3 | ($arg4<<16)) << 12) | 0xFFF + end +end + +define printdescs + if $argc < 1 || $argc > 2 + echo Usage: printdescs expr [count] + else + if $argc == 1 + _printdescs ($arg0) (sizeof($arg0)/sizeof(($arg0)[0])) + else + _printdescs ($arg0) ($arg1) + end + end +end + +document printdescs +Print an array of x86 segment or gate descriptors. +printdescs EXPR [COUNT] +EXPR must evaluate to an array of descriptors. +end + +define _printdescs + set $i = 0 + while $i < $arg1 + printf "[%d] ", $i + printdesc $arg0[$i] + set $i = $i + 1 + end +end diff --git a/gensyscalls.pl b/gensyscalls.pl new file mode 100755 index 0000000..16b7975 --- /dev/null +++ b/gensyscalls.pl @@ -0,0 +1,102 @@ +#!/usr/bin/perl -w + +# Generate syscall.h, syscalltable.h or usys.S. These are the header and assembly +# files for system calls. +# +# Generating these files from one script avoids them getting out of sync. +# +# Specify an argument of -h to generate syscall.h +# Specify an argument of -c to generate syscalltable.h +# Specify an argument of -a to generate usys.S +# +# Note that you also need to update user.h with the declarations for these functions that +# user programs will use. This ensures that the C compiler generates the correct code to +# push the parameters on to the stack. + +my @syscalls = ( + "fork", + "exit", + "wait", + "pipe", + "read", + "kill", + "exec", + "fstat", + "chdir", + "dup", + "getpid", + "sbrk", + "sleep", + "uptime", + "open", + "write", + "mknod", + "unlink", + "link", + "mkdir", + "close", + "getch" + ); + +my $i; +if ($#ARGV == -1) +{ + print 'Error: No argument supplied to gensyscalls.pl'; + exit(1); +} +if (($ARGV[0] ne '-h') && ($ARGV[0] ne '-a') && ($ARGV[0] ne '-c')) +{ + print 'Error: Invalid argument to gensyscalls.pl'; + exit(1); +} +if ($ARGV[0] eq '-h'|| $ARGV[0] eq '-c') +{ + print "// Generated by gensyscalls.pl. Do not edit.\n"; + print "// To change syscall numbers or add new syscalls, edit gensyscalls.pl\n"; + print "\n"; +} +else +{ + print "# Generated by gensyscalls.pl. Do not edit.\n"; + print "# To change syscall numbers or add new syscalls, edit gensyscalls.pl\n"; + print "\n"; +} +for ($i = 0; $i < scalar(@syscalls); $i++) +{ + my $index = $i + 1; + if ($ARGV[0] eq '-h') + { + print "#define SYS_$syscalls[$i]\t\t$index\n"; + } + elsif ($ARGV[0] eq '-c') + { + print "extern int sys_$syscalls[$i](void);\n"; + } +} +if ($ARGV[0] eq '-a') +{ + print "#include \"syscall.h\"\n"; + print "#include \"traps.h\"\n"; + print "\n"; + print "#define SYSCALL(name) \\\n"; + print ".globl name; \\\n"; + print "name: \\\n"; + print "\tmovl\t\$SYS_ ## name, \%eax; \\\n"; + print "\tint\t\$T_SYSCALL; \\\n"; + print "\tret\n"; + print "\n"; + for ($i = 0; $i < scalar(@syscalls); $i++) + { + print "SYSCALL($syscalls[$i])\n"; + } +} +elsif ($ARGV[0] eq '-c') +{ + print "\n"; + print "static int(*syscalls[])(void) = {\n"; + for ($i = 0; $i < scalar(@syscalls); $i++) + { + print "[SYS_$syscalls[$i]]\tsys_$syscalls[$i],\n"; + } + print "};\n" +} diff --git a/grep.c b/grep.c new file mode 100644 index 0000000..a0aa833 --- /dev/null +++ b/grep.c @@ -0,0 +1,109 @@ +// Simple grep. Only supports ^ . * $ operators. + +#include "types.h" +#include "stat.h" +#include "user.h" + +char buf[1024]; +int match(char*, char*); + +void grep(char *pattern, int fd) { + int n, m; + char *p, *q; + + m = 0; + while ((n = read(fd, buf + m, sizeof(buf) - m - 1)) > 0) { + m += n; + buf[m] = '\0'; + p = buf; + while ((q = strchr(p, '\n')) != 0) { + *q = 0; + if (match(pattern, p)) { + *q = '\n'; + write(1, p, q + 1 - p); + } + p = q + 1; + } + if (p == buf) { + m = 0; + } + if (m > 0) { + m -= p - buf; + memmove(buf, p, m); + } + } +} + +int main(int argc, char *argv[]) { + int fd, i; + char *pattern; + + if (argc <= 1) { + printf(2, "usage: grep pattern [file ...]\n"); + exit(); + } + pattern = argv[1]; + + if (argc <= 2) { + grep(pattern, 0); + exit(); + } + + for (i = 2; i < argc; i++) { + if ((fd = open(argv[i], 0)) < 0) { + printf(1, "grep: cannot open %s\n", argv[i]); + exit(); + } + grep(pattern, fd); + close(fd); + } + exit(); +} + +// Regexp matcher from Kernighan & Pike, +// The Practice of Programming, Chapter 9. + +int matchhere(char*, char*); +int matchstar(int, char*, char*); + +int match(char *re, char *text) { + if (re[0] == '^') { + return matchhere(re + 1, text); + } + do { // must look at empty string + if (matchhere(re, text)) { + return 1; + } + } + while (*text++ != '\0'); + return 0; +} + +// matchhere: search for re at beginning of text +int matchhere(char *re, char *text){ + if (re[0] == '\0') { + return 1; + } + if (re[1] == '*') { + return matchstar(re[0], re + 2, text); + } + if (re[0] == '$' && re[1] == '\0') { + return *text == '\0'; + } + if (*text != '\0' && (re[0] == '.' || re[0] == *text)) { + return matchhere(re + 1, text + 1); + } + return 0; +} + +// matchstar: search for c*re at beginning of text +int matchstar(int c, char *re, char *text) { + do { // a * matches zero or more instances + if (matchhere(re, text)) { + return 1; + } + } + while (*text != '\0' && (*text++ == c || c == '.')); + return 0; +} + diff --git a/ide.c b/ide.c new file mode 100644 index 0000000..1278db5 --- /dev/null +++ b/ide.c @@ -0,0 +1,171 @@ +// Simple PIO-based (non-DMA) IDE driver code. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "x86.h" +#include "traps.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" + +#define SECTOR_SIZE 512 +#define IDE_BSY 0x80 +#define IDE_DRDY 0x40 +#define IDE_DF 0x20 +#define IDE_ERR 0x01 + +#define IDE_CMD_READ 0x20 +#define IDE_CMD_WRITE 0x30 +#define IDE_CMD_RDMUL 0xc4 +#define IDE_CMD_WRMUL 0xc5 + +// idequeue points to the buf now being read/written to the disk. +// idequeue->qnext points to the next buf to be processed. +// You must hold idelock while manipulating queue. + +static struct spinlock idelock; +static struct buf *idequeue; + +static int havedisk1; +static void idestart(struct buf*); + +// Wait for IDE disk to become ready. +static int idewait(int checkerr) { + int r; + + while (((r = inb(0x1f7)) & (IDE_BSY | IDE_DRDY)) != IDE_DRDY) { + ; + } + if (checkerr && (r & (IDE_DF | IDE_ERR)) != 0) { + return -1; + } + return 0; +} + +void ideinit(void) { + int i; + + initlock(&idelock, "ide"); + ioapicenable(IRQ_IDE, ncpu - 1); + idewait(0); + + // Check if disk 1 is present + outb(0x1f6, 0xe0 | (1 << 4)); + for (i = 0; i < 1000; i++) { + if (inb(0x1f7) != 0) { + havedisk1 = 1; + break; + } + } + + // Switch back to disk 0. + outb(0x1f6, 0xe0 | (0 << 4)); +} + +// Start the request for b. Caller must hold idelock. +static void idestart(struct buf *b) { + if (b == 0) { + panic("idestart"); + } + if (b->blockno >= FSSIZE) { + panic("incorrect blockno"); + } + int sector_per_block = BSIZE / SECTOR_SIZE; + int sector = b->blockno * sector_per_block; + int read_cmd = (sector_per_block == 1) ? IDE_CMD_READ : IDE_CMD_RDMUL; + int write_cmd = (sector_per_block == 1) ? IDE_CMD_WRITE : IDE_CMD_WRMUL; + + if (sector_per_block > 7) { + panic("idestart"); + } + + idewait(0); + outb(0x3f6, 0); // generate interrupt + outb(0x1f2, sector_per_block); // number of sectors + outb(0x1f3, sector & 0xff); + outb(0x1f4, (sector >> 8) & 0xff); + outb(0x1f5, (sector >> 16) & 0xff); + outb(0x1f6, 0xe0 | ((b->dev & 1) << 4) | ((sector >> 24) & 0x0f)); + if (b->flags & B_DIRTY) { + outb(0x1f7, write_cmd); + outsl(0x1f0, b->data, BSIZE / 4); + } + else { + outb(0x1f7, read_cmd); + } +} + +// Interrupt handler. +void ideintr(void) { + struct buf *b; + + // First queued buffer is the active request. + acquire(&idelock); + + if ((b = idequeue) == 0) { + release(&idelock); + return; + } + idequeue = b->qnext; + + // Read data if needed. + if (!(b->flags & B_DIRTY) && idewait(1) >= 0) { + insl(0x1f0, b->data, BSIZE / 4); + } + + // Wake process waiting for this buf. + b->flags |= B_VALID; + b->flags &= ~B_DIRTY; + wakeup(b); + + // Start disk on next buf in queue. + if (idequeue != 0) { + idestart(idequeue); + } + + release(&idelock); +} + + +// Sync buf with disk. +// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID. +// Else if B_VALID is not set, read buf from disk, set B_VALID. +void iderw(struct buf *b) { + struct buf **pp; + + if (!holdingsleep(&b->lock)) { + panic("iderw: buf not locked"); + } + if ((b->flags & (B_VALID | B_DIRTY)) == B_VALID) { + panic("iderw: nothing to do"); + } + if (b->dev != 0 && !havedisk1) { + panic("iderw: ide disk 1 not present"); + } + + acquire(&idelock); //DOC:acquire-lock + + // Append b to idequeue. + b->qnext = 0; + for (pp = &idequeue; *pp; pp = &(*pp)->qnext) { //DOC:insert-queue + ; + } + *pp = b; + + // Start disk if necessary. + if (idequeue == b) { + idestart(b); + } + + // Wait for request to finish. + while ((b->flags & (B_VALID | B_DIRTY)) != B_VALID) { + sleep(b, &idelock); + } + + release(&idelock); +} diff --git a/init.c b/init.c new file mode 100644 index 0000000..f6f01a3 --- /dev/null +++ b/init.c @@ -0,0 +1,35 @@ +// init: The initial user-level program + +#include "types.h" +#include "stat.h" +#include "user.h" +#include "fcntl.h" + +char *shell_argv[] = { "sh", 0 }; + +int main(int argc, char* argv[]) { + int pid, wpid; + + if (open("console", O_RDWR) < 0) { + mknod("console", 1, 1); + open("console", O_RDWR); + } + dup(0); // stdout + dup(0); // stderr + + for (;;) { + printf(1, "init: starting sh\n"); + pid = fork(); + if (pid < 0) { + printf(1, "init: fork failed\n"); + exit(); + } + if (pid == 0) { + exec("sh", shell_argv); + printf(1, "init: exec sh failed\n"); + exit(); + } + while ((wpid = wait()) >= 0 && wpid != pid) { + } + } +} diff --git a/initcode.S b/initcode.S new file mode 100644 index 0000000..71842d8 --- /dev/null +++ b/initcode.S @@ -0,0 +1,32 @@ +# Initial process execs /init. +# This code runs in user space. + +#include "syscall.h" +#include "traps.h" + + +# exec(init, argv) +.globl start +start: + pushl $argv + pushl $init + pushl $0 // where caller pc would be + movl $SYS_exec, %eax + int $T_SYSCALL + +# for(;;) exit(); +exit: + movl $SYS_exit, %eax + int $T_SYSCALL + jmp exit + +# char init[] = "/init\0"; +init: + .string "/init\0" + +# char *argv[] = { init, 0 }; + .p2align 2 +argv: + .long init + .long 0 + diff --git a/ioapic.c b/ioapic.c new file mode 100644 index 0000000..1756da4 --- /dev/null +++ b/ioapic.c @@ -0,0 +1,68 @@ +// The I/O APIC manages hardware interrupts for an SMP system. +// http://www.intel.com/design/chipsets/datashts/29056601.pdf +// See also picirq.c. + +#include "types.h" +#include "defs.h" +#include "traps.h" + +#define IOAPIC 0xFEC00000 // Default physical address of IO APIC + +#define REG_ID 0x00 // Register index: ID +#define REG_VER 0x01 // Register index: version +#define REG_TABLE 0x10 // Redirection table base + +// The redirection table starts at REG_TABLE and uses +// two registers to configure each interrupt. +// The first (low) register in a pair contains configuration bits. +// The second (high) register contains a bitmask telling which +// CPUs can serve that interrupt. +#define INT_DISABLED 0x00010000 // Interrupt disabled +#define INT_LEVEL 0x00008000 // Level-triggered (vs edge-) +#define INT_ACTIVELOW 0x00002000 // Active low (vs high) +#define INT_LOGICAL 0x00000800 // Destination is CPU id (vs APIC ID) + +volatile struct ioapic *ioapic; + +// IO APIC MMIO structure: write reg, then read or write data. +struct ioapic { + uint reg; + uint pad[3]; + uint data; +}; + +static uint ioapicread(int reg) { + ioapic->reg = reg; + return ioapic->data; +} + +static void ioapicwrite(int reg, uint data) { + ioapic->reg = reg; + ioapic->data = data; +} + +void ioapicinit(void) { + int i, id, maxintr; + + ioapic = (volatile struct ioapic*)IOAPIC; + maxintr = (ioapicread(REG_VER) >> 16) & 0xFF; + id = ioapicread(REG_ID) >> 24; + if (id != ioapicid) { + cprintf("ioapicinit: id isn't equal to ioapicid; not a MP\n"); + } + + // Mark all interrupts edge-triggered, active high, disabled, + // and not routed to any CPUs. + for (i = 0; i <= maxintr; i++) { + ioapicwrite(REG_TABLE + 2 * i, INT_DISABLED | (T_IRQ0 + i)); + ioapicwrite(REG_TABLE + 2 * i + 1, 0); + } +} + +void ioapicenable(int irq, int cpunum) { + // Mark interrupt edge-triggered, active high, + // enabled, and routed to the given cpunum, + // which happens to be that cpu's APIC ID. + ioapicwrite(REG_TABLE + 2 * irq, T_IRQ0 + irq); + ioapicwrite(REG_TABLE + 2 * irq + 1, cpunum << 24); +} diff --git a/kalloc.c b/kalloc.c new file mode 100644 index 0000000..d1ef801 --- /dev/null +++ b/kalloc.c @@ -0,0 +1,93 @@ +// Physical memory allocator, intended to allocate +// memory for user processes, kernel stacks, page table pages, +// and pipe buffers. Allocates 4096-byte pages. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "spinlock.h" + +void freerange(void *vstart, void *vend); +extern char end[]; // first address after kernel loaded from ELF file + // defined by the kernel linker script in kernel.ld + +struct run { + struct run *next; +}; + +struct { + struct spinlock lock; + int use_lock; + struct run *freelist; +} kmem; + +// Initialization happens in two phases. +// 1. main() calls kinit1() while still using entrypgdir to place just +// the pages mapped by entrypgdir on free list. +// 2. main() calls kinit2() with the rest of the physical pages +// after installing a full page table that maps them on all cores. +void kinit1(void *vstart, void *vend) { + initlock(&kmem.lock, "kmem"); + kmem.use_lock = 0; + freerange(vstart, vend); +} + +void kinit2(void *vstart, void *vend) { + freerange(vstart, vend); + kmem.use_lock = 1; +} + +void freerange(void *vstart, void *vend) { + char *p; + p = (char*)PGROUNDUP((uint)vstart); + for (; p + PGSIZE <= (char*)vend; p += PGSIZE) { + kfree(p); + } +} + +// Free the page of physical memory pointed at by v, +// which normally should have been returned by a +// call to kalloc(). (The exception is when +// initializing the allocator; see kinit above.) +void kfree(char *v) { + struct run *r; + + if ((uint)v % PGSIZE || v < end || V2P(v) >= PHYSTOP) { + panic("kfree"); + } + + // Fill with junk to catch dangling refs. + memset(v, 1, PGSIZE); + + if (kmem.use_lock) { + acquire(&kmem.lock); + } + r = (struct run*)v; + r->next = kmem.freelist; + kmem.freelist = r; + if (kmem.use_lock) { + release(&kmem.lock); + } +} + +// Allocate one 4096-byte page of physical memory. +// Returns a pointer that the kernel can use. +// Returns 0 if the memory cannot be allocated. +char* kalloc(void) { + struct run *r; + + if (kmem.use_lock) { + acquire(&kmem.lock); + } + r = kmem.freelist; + if (r) { + kmem.freelist = r->next; + } + if (kmem.use_lock) { + release(&kmem.lock); + } + return (char*)r; +} + diff --git a/kbd.c b/kbd.c new file mode 100644 index 0000000..0426cb9 --- /dev/null +++ b/kbd.c @@ -0,0 +1,51 @@ +#include "types.h" +#include "x86.h" +#include "defs.h" +#include "kbd.h" + +int kbdgetc(void) { + static uint shift; + static uchar *charcode[4] = { + normalmap, shiftmap, ctlmap, ctlmap + }; + uint st, data, c; + + st = inb(KBSTATP); + if ((st & KBS_DIB) == 0) { + return -1; + } + data = inb(KBDATAP); + + if (data == 0xE0) { + shift |= E0ESC; + return 0; + } + else if (data & 0x80) { + // Key released + data = (shift & E0ESC ? data : data & 0x7F); + shift &= ~(shiftcode[data] | E0ESC); + return 0; + } + else if (shift & E0ESC) { + // Last character was an E0 escape; or with 0x80 + data |= 0x80; + shift &= ~E0ESC; + } + + shift |= shiftcode[data]; + shift ^= togglecode[data]; + c = charcode[shift & (CTL | SHIFT)][data]; + if (shift & CAPSLOCK) { + if ('a' <= c && c <= 'z') { + c += 'A' - 'a'; + } + else if ('A' <= c && c <= 'Z') { + c += 'a' - 'A'; + } + } + return c; +} + +void kbdintr(void) { + consoleintr(kbdgetc); +} diff --git a/kbd.h b/kbd.h new file mode 100644 index 0000000..1d11783 --- /dev/null +++ b/kbd.h @@ -0,0 +1,112 @@ +// PC keyboard interface constants + +#define KBSTATP 0x64 // kbd controller status port(I) +#define KBS_DIB 0x01 // kbd data in buffer +#define KBDATAP 0x60 // kbd data port(I) + +#define NO 0 + +#define SHIFT (1 << 0) +#define CTL (1 << 1) +#define ALT (1 << 2) + +#define CAPSLOCK (1 << 3) +#define NUMLOCK (1 << 4) +#define SCROLLLOCK (1 << 5) + +#define E0ESC (1 << 6) + +// Special keycodes +#define KEY_HOME 0xE0 +#define KEY_END 0xE1 +#define KEY_UP 0xE2 +#define KEY_DN 0xE3 +#define KEY_LF 0xE4 +#define KEY_RT 0xE5 +#define KEY_PGUP 0xE6 +#define KEY_PGDN 0xE7 +#define KEY_INS 0xE8 +#define KEY_DEL 0xE9 + +// C('A') == Control-A +#define C(x) (x - '@') + +static uchar shiftcode[256] = +{ + [0x1D] CTL, + [0x2A] SHIFT, + [0x36] SHIFT, + [0x38] ALT, + [0x9D] CTL, + [0xB8] ALT +}; + +static uchar togglecode[256] = +{ + [0x3A] CAPSLOCK, + [0x45] NUMLOCK, + [0x46] SCROLLLOCK +}; + +static uchar normalmap[256] = +{ + NO, 0x1B, '1', '2', '3', '4', '5', '6', // 0x00 + '7', '8', '9', '0', '-', '=', '\b', '\t', + 'q', 'w', 'e', 'r', 't', 'y', 'u', 'i', // 0x10 + 'o', 'p', '[', ']', '\n', NO, 'a', 's', + 'd', 'f', 'g', 'h', 'j', 'k', 'l', ';', // 0x20 + '\'', '`', NO, '\\', 'z', 'x', 'c', 'v', + 'b', 'n', 'm', ',', '.', '/', NO, '*', // 0x30 + NO, ' ', NO, NO, NO, NO, NO, NO, + NO, NO, NO, NO, NO, NO, NO, '7', // 0x40 + '8', '9', '-', '4', '5', '6', '+', '1', + '2', '3', '0', '.', NO, NO, NO, NO, // 0x50 + [0x9C] '\n', // KP_Enter + [0xB5] '/', // KP_Div + [0xC8] KEY_UP, [0xD0] KEY_DN, + [0xC9] KEY_PGUP, [0xD1] KEY_PGDN, + [0xCB] KEY_LF, [0xCD] KEY_RT, + [0x97] KEY_HOME, [0xCF] KEY_END, + [0xD2] KEY_INS, [0xD3] KEY_DEL +}; + +static uchar shiftmap[256] = +{ + NO, 033, '!', '@', '#', '$', '%', '^', // 0x00 + '&', '*', '(', ')', '_', '+', '\b', '\t', + 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I', // 0x10 + 'O', 'P', '{', '}', '\n', NO, 'A', 'S', + 'D', 'F', 'G', 'H', 'J', 'K', 'L', ':', // 0x20 + '"', '~', NO, '|', 'Z', 'X', 'C', 'V', + 'B', 'N', 'M', '<', '>', '?', NO, '*', // 0x30 + NO, ' ', NO, NO, NO, NO, NO, NO, + NO, NO, NO, NO, NO, NO, NO, '7', // 0x40 + '8', '9', '-', '4', '5', '6', '+', '1', + '2', '3', '0', '.', NO, NO, NO, NO, // 0x50 + [0x9C] '\n', // KP_Enter + [0xB5] '/', // KP_Div + [0xC8] KEY_UP, [0xD0] KEY_DN, + [0xC9] KEY_PGUP, [0xD1] KEY_PGDN, + [0xCB] KEY_LF, [0xCD] KEY_RT, + [0x97] KEY_HOME, [0xCF] KEY_END, + [0xD2] KEY_INS, [0xD3] KEY_DEL +}; + +static uchar ctlmap[256] = +{ + NO, NO, NO, NO, NO, NO, NO, NO, + NO, NO, NO, NO, NO, NO, NO, NO, + C('Q'), C('W'), C('E'), C('R'), C('T'), C('Y'), C('U'), C('I'), + C('O'), C('P'), NO, NO, '\r', NO, C('A'), C('S'), + C('D'), C('F'), C('G'), C('H'), C('J'), C('K'), C('L'), NO, + NO, NO, NO, C('\\'), C('Z'), C('X'), C('C'), C('V'), + C('B'), C('N'), C('M'), NO, NO, C('/'), NO, NO, + [0x9C] '\r', // KP_Enter + [0xB5] C('/'), // KP_Div + [0xC8] KEY_UP, [0xD0] KEY_DN, + [0xC9] KEY_PGUP, [0xD1] KEY_PGDN, + [0xCB] KEY_LF, [0xCD] KEY_RT, + [0x97] KEY_HOME, [0xCF] KEY_END, + [0xD2] KEY_INS, [0xD3] KEY_DEL +}; + diff --git a/kernel.ld b/kernel.ld new file mode 100644 index 0000000..4e12e14 --- /dev/null +++ b/kernel.ld @@ -0,0 +1,64 @@ +/* Simple linker script for the JOS kernel. + See the GNU ld 'info' manual ("info ld") to learn the syntax. */ + +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) + +SECTIONS +{ + /* Link the kernel at this address: "." means the current address */ + /* Must be equal to KERNLINK */ + . = 0x80100000; + + .text : AT(0x100000) { + *(.text .stub .text.* .gnu.linkonce.t.*) + } + + PROVIDE(etext = .); /* Define the 'etext' symbol to this value */ + + .rodata : { + *(.rodata .rodata.* .gnu.linkonce.r.*) + } + + /* Include debugging information in kernel memory */ + .stab : { + PROVIDE(__STAB_BEGIN__ = .); + *(.stab); + PROVIDE(__STAB_END__ = .); + } + + .stabstr : { + PROVIDE(__STABSTR_BEGIN__ = .); + *(.stabstr); + PROVIDE(__STABSTR_END__ = .); + } + + /* Adjust the address for the data segment to the next page */ + . = ALIGN(0x1000); + + /* Conventionally, Unix linkers provide pseudo-symbols + * etext, edata, and end, at the end of the text, data, and bss. + * For the kernel mapping, we need the address at the beginning + * of the data section, but that's not one of the conventional + * symbols, because the convention started before there was a + * read-only rodata section between text and data. */ + PROVIDE(data = .); + + /* The data segment */ + .data : { + *(.data) + } + + PROVIDE(edata = .); + + .bss : { + *(.bss) + } + + PROVIDE(end = .); + + /DISCARD/ : { + *(.eh_frame .note.GNU-stack) + } +} diff --git a/kill.c b/kill.c new file mode 100644 index 0000000..bcca9a9 --- /dev/null +++ b/kill.c @@ -0,0 +1,16 @@ +#include "types.h" +#include "stat.h" +#include "user.h" + +int main(int argc, char **argv) { + int i; + + if (argc < 2) { + printf(2, "usage: kill pid...\n"); + exit(); + } + for (i = 1; i < argc; i++) { + kill(atoi(argv[i])); + } + exit(); +} diff --git a/lapic.c b/lapic.c new file mode 100644 index 0000000..946db80 --- /dev/null +++ b/lapic.c @@ -0,0 +1,218 @@ +// The local APIC manages internal (non-I/O) interrupts. +// See Chapter 8 & Appendix C of Intel processor manual volume 3. + +#include "param.h" +#include "types.h" +#include "defs.h" +#include "date.h" +#include "memlayout.h" +#include "traps.h" +#include "mmu.h" +#include "x86.h" + +// Local APIC registers, divided by 4 for use as uint[] indices. +#define ID (0x0020 / 4) // ID +#define VER (0x0030 / 4) // Version +#define TPR (0x0080 / 4) // Task Priority +#define EOI (0x00B0 / 4) // EOI +#define SVR (0x00F0 / 4) // Spurious Interrupt Vector + #define ENABLE 0x00000100 // Unit Enable +#define ESR (0x0280 / 4) // Error Status +#define ICRLO (0x0300 / 4) // Interrupt Command + #define INIT 0x00000500 // INIT/RESET + #define STARTUP 0x00000600 // Startup IPI + #define DELIVS 0x00001000 // Delivery status + #define ASSERT 0x00004000 // Assert interrupt (vs deassert) + #define DEASSERT 0x00000000 + #define LEVEL 0x00008000 // Level triggered + #define BCAST 0x00080000 // Send to all APICs, including self. + #define BUSY 0x00001000 + #define FIXED 0x00000000 +#define ICRHI (0x0310 / 4) // Interrupt Command [63:32] +#define TIMER (0x0320 / 4) // Local Vector Table 0 (TIMER) + #define X1 0x0000000B // divide counts by 1 + #define PERIODIC 0x00020000 // Periodic +#define PCINT (0x0340 / 4) // Performance Counter LVT +#define LINT0 (0x0350 / 4) // Local Vector Table 1 (LINT0) +#define LINT1 (0x0360 / 4) // Local Vector Table 2 (LINT1) +#define ERROR (0x0370 / 4) // Local Vector Table 3 (ERROR) + #define MASKED 0x00010000 // Interrupt masked +#define TICR (0x0380 / 4) // Timer Initial Count +#define TCCR (0x0390 / 4) // Timer Current Count +#define TDCR (0x03E0 / 4) // Timer Divide Configuration + +volatile uint *lapic; // Initialized in mp.c + + +static void lapicw(int index, int value) { + lapic[index] = value; + lapic[ID]; // wait for write to finish, by reading +} + +void lapicinit(void) { + if (!lapic) { + return; + } + + // Enable local APIC; set spurious interrupt vector. + lapicw(SVR, ENABLE | (T_IRQ0 + IRQ_SPURIOUS)); + + // The timer repeatedly counts down at bus frequency + // from lapic[TICR] and then issues an interrupt. + // If xv6 cared more about precise timekeeping, + // TICR would be calibrated using an external time source. + lapicw(TDCR, X1); + lapicw(TIMER, PERIODIC | (T_IRQ0 + IRQ_TIMER)); + lapicw(TICR, 10000000); + + // Disable logical interrupt lines. + lapicw(LINT0, MASKED); + lapicw(LINT1, MASKED); + + // Disable performance counter overflow interrupts + // on machines that provide that interrupt entry. + if (((lapic[VER] >> 16) & 0xFF) >= 4) { + lapicw(PCINT, MASKED); + } + + // Map error interrupt to IRQ_ERROR. + lapicw(ERROR, T_IRQ0 + IRQ_ERROR); + + // Clear error status register (requires back-to-back writes). + lapicw(ESR, 0); + lapicw(ESR, 0); + + // Ack any outstanding interrupts. + lapicw(EOI, 0); + + // Send an Init Level De-Assert to synchronise arbitration ID's. + lapicw(ICRHI, 0); + lapicw(ICRLO, BCAST | INIT | LEVEL); + while (lapic[ICRLO] & DELIVS) { + ; + } + + // Enable interrupts on the APIC (but not on the processor). + lapicw(TPR, 0); +} + +int lapicid(void) { + if (!lapic) { + return 0; + } + return lapic[ID] >> 24; +} + +// Acknowledge interrupt. +void lapiceoi(void) { + if (lapic) { + lapicw(EOI, 0); + } +} + +// Spin for a given number of microseconds. +// On real hardware would want to tune this dynamically. +void microdelay(int us) { +} + +#define CMOS_PORT 0x70 +#define CMOS_RETURN 0x71 + +// Start additional processor running entry code at addr. +// See Appendix B of MultiProcessor Specification. +void lapicstartap(uchar apicid, uint addr) { + int i; + ushort *wrv; + + // "The BSP must initialize CMOS shutdown code to 0AH + // and the warm reset vector (DWORD based at 40:67) to point at + // the AP startup code prior to the [universal startup algorithm]." + outb(CMOS_PORT, 0xF); // offset 0xF is shutdown code + outb(CMOS_PORT + 1, 0x0A); + wrv = (ushort*)P2V((0x40 << 4 | 0x67)); // Warm reset vector + wrv[0] = 0; + wrv[1] = addr >> 4; + + // "Universal startup algorithm." + // Send INIT (level-triggered) interrupt to reset other CPU. + lapicw(ICRHI, apicid << 24); + lapicw(ICRLO, INIT | LEVEL | ASSERT); + microdelay(200); + lapicw(ICRLO, INIT | LEVEL); + microdelay(100); // should be 10ms, but too slow in Bochs! + + // Send startup IPI (twice!) to enter code. + // Regular hardware is supposed to only accept a STARTUP + // when it is in the halted state due to an INIT. So the second + // should be ignored, but it is part of the official Intel algorithm. + // Bochs complains about the second one. Too bad for Bochs. + for (i = 0; i < 2; i++) { + lapicw(ICRHI, apicid << 24); + lapicw(ICRLO, STARTUP | (addr >> 12)); + microdelay(200); + } +} + +#define CMOS_STATA 0x0a +#define CMOS_STATB 0x0b +#define CMOS_UIP (1 << 7) // RTC update in progress + +#define SECS 0x00 +#define MINS 0x02 +#define HOURS 0x04 +#define DAY 0x07 +#define MONTH 0x08 +#define YEAR 0x09 + +static uint cmos_read(uint reg) { + outb(CMOS_PORT, reg); + microdelay(200); + + return inb(CMOS_RETURN); +} + +static void fill_rtcdate(struct rtcdate *r) { + r->second = cmos_read(SECS); + r->minute = cmos_read(MINS); + r->hour = cmos_read(HOURS); + r->day = cmos_read(DAY); + r->month = cmos_read(MONTH); + r->year = cmos_read(YEAR); +} + +// qemu seems to use 24-hour GWT and the values are BCD encoded +void cmostime(struct rtcdate *r) { + struct rtcdate t1, t2; + int sb, bcd; + + sb = cmos_read(CMOS_STATB); + + bcd = (sb & (1 << 2)) == 0; + + // make sure CMOS doesn't modify time while we read it + for (;;) { + fill_rtcdate(&t1); + if (cmos_read(CMOS_STATA) & CMOS_UIP) { + continue; + } + fill_rtcdate(&t2); + if (memcmp(&t1, &t2, sizeof(t1)) == 0) { + break; + } + } + + // convert + if (bcd) { +#define CONV(x) (t1.x = ((t1.x >> 4) * 10) + (t1.x & 0xf)) + CONV(second); + CONV(minute); + CONV(hour ); + CONV(day ); + CONV(month ); + CONV(year ); +#undef CONV + } + + *r = t1; + r->year += 2000; +} diff --git a/ln.c b/ln.c new file mode 100644 index 0000000..8883514 --- /dev/null +++ b/ln.c @@ -0,0 +1,14 @@ +#include "types.h" +#include "stat.h" +#include "user.h" + +int main(int argc, char *argv[]) { + if (argc != 3) { + printf(2, "Usage: ln old new\n"); + exit(); + } + if (link(argv[1], argv[2]) < 0) { + printf(2, "link %s %s: failed\n", argv[1], argv[2]); + } + exit(); +} diff --git a/log.c b/log.c new file mode 100644 index 0000000..a64c0f6 --- /dev/null +++ b/log.c @@ -0,0 +1,234 @@ +#include "types.h" +#include "defs.h" +#include "param.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" + +// Simple logging that allows concurrent FS system calls. +// +// A log transaction contains the updates of multiple FS system +// calls. The logging system only commits when there are +// no FS system calls active. Thus there is never +// any reasoning required about whether a commit might +// write an uncommitted system call's updates to disk. +// +// A system call should call begin_op()/end_op() to mark +// its start and end. Usually begin_op() just increments +// the count of in-progress FS system calls and returns. +// But if it thinks the log is close to running out, it +// sleeps until the last outstanding end_op() commits. +// +// The log is a physical re-do log containing disk blocks. +// The on-disk log format: +// header block, containing block #s for block A, B, C, ... +// block A +// block B +// block C +// ... +// Log appends are synchronous. + +// Contents of the header block, used for both the on-disk header block +// and to keep track in memory of logged block# before commit. +struct logheader { + int n; + int block[LOGSIZE]; +}; + +struct log { + struct spinlock lock; + int start; + int size; + int outstanding; // how many FS sys calls are executing. + int committing; // in commit(), please wait. + int dev; + struct logheader lh; +}; +struct log log; + +static void recover_from_log(void); +static void commit(); + +void +initlog(int dev) +{ + if (sizeof(struct logheader) >= BSIZE) + panic("initlog: too big logheader"); + + struct superblock sb; + initlock(&log.lock, "log"); + readsb(dev, &sb); + log.start = sb.logstart; + log.size = sb.nlog; + log.dev = dev; + recover_from_log(); +} + +// Copy committed blocks from log to their home location +static void +install_trans(void) +{ + int tail; + + for (tail = 0; tail < log.lh.n; tail++) { + struct buf *lbuf = bread(log.dev, log.start+tail+1); // read log block + struct buf *dbuf = bread(log.dev, log.lh.block[tail]); // read dst + memmove(dbuf->data, lbuf->data, BSIZE); // copy block to dst + bwrite(dbuf); // write dst to disk + brelse(lbuf); + brelse(dbuf); + } +} + +// Read the log header from disk into the in-memory log header +static void +read_head(void) +{ + struct buf *buf = bread(log.dev, log.start); + struct logheader *lh = (struct logheader *) (buf->data); + int i; + log.lh.n = lh->n; + for (i = 0; i < log.lh.n; i++) { + log.lh.block[i] = lh->block[i]; + } + brelse(buf); +} + +// Write in-memory log header to disk. +// This is the true point at which the +// current transaction commits. +static void +write_head(void) +{ + struct buf *buf = bread(log.dev, log.start); + struct logheader *hb = (struct logheader *) (buf->data); + int i; + hb->n = log.lh.n; + for (i = 0; i < log.lh.n; i++) { + hb->block[i] = log.lh.block[i]; + } + bwrite(buf); + brelse(buf); +} + +static void +recover_from_log(void) +{ + read_head(); + install_trans(); // if committed, copy from log to disk + log.lh.n = 0; + write_head(); // clear the log +} + +// called at the start of each FS system call. +void +begin_op(void) +{ + acquire(&log.lock); + while(1){ + if(log.committing){ + sleep(&log, &log.lock); + } else if(log.lh.n + (log.outstanding+1)*MAXOPBLOCKS > LOGSIZE){ + // this op might exhaust log space; wait for commit. + sleep(&log, &log.lock); + } else { + log.outstanding += 1; + release(&log.lock); + break; + } + } +} + +// called at the end of each FS system call. +// commits if this was the last outstanding operation. +void +end_op(void) +{ + int do_commit = 0; + + acquire(&log.lock); + log.outstanding -= 1; + if(log.committing) + panic("log.committing"); + if(log.outstanding == 0){ + do_commit = 1; + log.committing = 1; + } else { + // begin_op() may be waiting for log space, + // and decrementing log.outstanding has decreased + // the amount of reserved space. + wakeup(&log); + } + release(&log.lock); + + if(do_commit){ + // call commit w/o holding locks, since not allowed + // to sleep with locks. + commit(); + acquire(&log.lock); + log.committing = 0; + wakeup(&log); + release(&log.lock); + } +} + +// Copy modified blocks from cache to log. +static void +write_log(void) +{ + int tail; + + for (tail = 0; tail < log.lh.n; tail++) { + struct buf *to = bread(log.dev, log.start+tail+1); // log block + struct buf *from = bread(log.dev, log.lh.block[tail]); // cache block + memmove(to->data, from->data, BSIZE); + bwrite(to); // write the log + brelse(from); + brelse(to); + } +} + +static void +commit() +{ + if (log.lh.n > 0) { + write_log(); // Write modified blocks from cache to log + write_head(); // Write header to disk -- the real commit + install_trans(); // Now install writes to home locations + log.lh.n = 0; + write_head(); // Erase the transaction from the log + } +} + +// Caller has modified b->data and is done with the buffer. +// Record the block number and pin in the cache with B_DIRTY. +// commit()/write_log() will do the disk write. +// +// log_write() replaces bwrite(); a typical use is: +// bp = bread(...) +// modify bp->data[] +// log_write(bp) +// brelse(bp) +void +log_write(struct buf *b) +{ + int i; + + if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1) + panic("too big a transaction"); + if (log.outstanding < 1) + panic("log_write outside of trans"); + + acquire(&log.lock); + for (i = 0; i < log.lh.n; i++) { + if (log.lh.block[i] == b->blockno) // log absorbtion + break; + } + log.lh.block[i] = b->blockno; + if (i == log.lh.n) + log.lh.n++; + b->flags |= B_DIRTY; // prevent eviction + release(&log.lock); +} + diff --git a/ls.c b/ls.c new file mode 100644 index 0000000..2862913 --- /dev/null +++ b/ls.c @@ -0,0 +1,85 @@ +#include "types.h" +#include "stat.h" +#include "user.h" +#include "fs.h" + +char* +fmtname(char *path) +{ + static char buf[DIRSIZ+1]; + char *p; + + // Find first character after last slash. + for(p=path+strlen(path); p >= path && *p != '/'; p--) + ; + p++; + + // Return blank-padded name. + if(strlen(p) >= DIRSIZ) + return p; + memmove(buf, p, strlen(p)); + memset(buf+strlen(p), ' ', DIRSIZ-strlen(p)); + return buf; +} + +void +ls(char *path) +{ + char buf[512], *p; + int fd; + struct dirent de; + struct stat st; + + if((fd = open(path, 0)) < 0){ + printf(2, "ls: cannot open %s\n", path); + return; + } + + if(fstat(fd, &st) < 0){ + printf(2, "ls: cannot stat %s\n", path); + close(fd); + return; + } + + switch(st.type){ + case T_FILE: + printf(1, "%s %d %d %d\n", fmtname(path), st.type, st.ino, st.size); + break; + + case T_DIR: + if(strlen(path) + 1 + DIRSIZ + 1 > sizeof buf){ + printf(1, "ls: path too long\n"); + break; + } + strcpy(buf, path); + p = buf+strlen(buf); + *p++ = '/'; + while(read(fd, &de, sizeof(de)) == sizeof(de)){ + if(de.inum == 0) + continue; + memmove(p, de.name, DIRSIZ); + p[DIRSIZ] = 0; + if(stat(buf, &st) < 0){ + printf(1, "ls: cannot stat %s\n", buf); + continue; + } + printf(1, "%s %d %d %d\n", fmtname(buf), st.type, st.ino, st.size); + } + break; + } + close(fd); +} + +int +main(int argc, char *argv[]) +{ + int i; + + if(argc < 2){ + ls("."); + exit(); + } + for(i=1; istarted), 1); // tell startothers() we're up + scheduler(); // start running processes +} + +pde_t entrypgdir[]; // For entry.S + +// Start the non-boot (AP) processors. +static void startothers(void) { + extern uchar _binary_entryother_start[], _binary_entryother_size[]; + uchar *code; + struct cpu *c; + char *stack; + + // Write entry code to unused memory at 0x7000. + // The linker has placed the image of entryother.S in + // _binary_entryother_start. + code = P2V(0x7000); + memmove(code, _binary_entryother_start, (uint)_binary_entryother_size); + + for (c = cpus; c < cpus + ncpu; c++) { + if (c == mycpu()) { // We've started already. + continue; + } + + // Tell entryother.S what stack to use, where to enter, and what + // pgdir to use. We cannot use kpgdir yet, because the AP processor + // is running in low memory, so we use entrypgdir for the APs too. + stack = kalloc(); + *(void**)(code - 4) = stack + KSTACKSIZE; + *(void(**)(void))(code - 8) = mpenter; + *(int**)(code - 12) = (void *) V2P(entrypgdir); + + lapicstartap(c->apicid, V2P(code)); + + // wait for cpu to finish mpmain() + while (c->started == 0) { + ; + } + } +} + +// The boot page table used in entry.S and entryother.S. +// Page directories (and page tables) must start on page boundaries, +// hence the __aligned__ attribute. +// PTE_PS in a page directory entry enables 4Mbyte pages. + +__attribute__((__aligned__(PGSIZE))) +pde_t entrypgdir[NPDENTRIES] = { + // Map VA's [0, 4MB) to PA's [0, 4MB) + [0] = (0) | PTE_P | PTE_W | PTE_PS, + // Map VA's [KERNBASE, KERNBASE+4MB) to PA's [0, 4MB) + [KERNBASE >> PDXSHIFT] = (0) | PTE_P | PTE_W | PTE_PS, +}; + + + + + + + + diff --git a/memide.c b/memide.c new file mode 100644 index 0000000..d0a8cf8 --- /dev/null +++ b/memide.c @@ -0,0 +1,60 @@ +// Fake IDE disk; stores blocks in memory. +// Useful for running kernel without scratch disk. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "mmu.h" +#include "proc.h" +#include "x86.h" +#include "traps.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" + +extern uchar _binary_fs_img_start[], _binary_fs_img_size[]; + +static int disksize; +static uchar *memdisk; + +void ideinit(void) { + memdisk = _binary_fs_img_start; + disksize = (uint)_binary_fs_img_size / BSIZE; +} + +// Interrupt handler. +void ideintr(void) { + // no-op +} + +// Sync buf with disk. +// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID. +// Else if B_VALID is not set, read buf from disk, set B_VALID. +void iderw(struct buf *b) { + uchar *p; + + if (!holdingsleep(&b->lock)) { + panic("iderw: buf not locked"); + } + if ((b->flags & (B_VALID | B_DIRTY)) == B_VALID) { + panic("iderw: nothing to do"); + } + if (b->dev != 1) { + panic("iderw: request not for disk 1"); + } + if (b->blockno >= disksize) { + panic("iderw: block out of range"); + } + + p = memdisk + b->blockno * BSIZE; + + if (b->flags & B_DIRTY) { + b->flags &= ~B_DIRTY; + memmove(p, b->data, BSIZE); + } + else { + memmove(b->data, p, BSIZE); + } + b->flags |= B_VALID; +} diff --git a/memlayout.h b/memlayout.h new file mode 100644 index 0000000..8942340 --- /dev/null +++ b/memlayout.h @@ -0,0 +1,15 @@ +// Memory layout + +#define EXTMEM 0x100000 // Start of extended memory +#define PHYSTOP 0xE000000 // Top physical memory +#define DEVSPACE 0xFE000000 // Other devices are at high addresses + +// Key addresses for address space layout (see kmap in vm.c for layout) +#define KERNBASE 0x80000000 // First kernel virtual address +#define KERNLINK (KERNBASE + EXTMEM) // Address where kernel is linked + +#define V2P(a) (((uint) (a)) - KERNBASE) +#define P2V(a) ((void *)(((char *) (a)) + KERNBASE)) + +#define V2P_WO(x) ((x) - KERNBASE) // same as V2P, but without casts +#define P2V_WO(x) ((x) + KERNBASE) // same as P2V, but without casts diff --git a/mkdir.c b/mkdir.c new file mode 100644 index 0000000..c83dc84 --- /dev/null +++ b/mkdir.c @@ -0,0 +1,21 @@ +#include "types.h" +#include "stat.h" +#include "user.h" + +int main(int argc, char *argv[]) { + int i; + + if (argc < 2) { + printf(2, "Usage: mkdir files...\n"); + exit(); + } + + for (i = 1; i < argc; i++) { + if (mkdir(argv[i]) < 0) { + printf(2, "mkdir: %s failed to create\n", argv[i]); + break; + } + } + + exit(); +} diff --git a/mkfs.c b/mkfs.c new file mode 100644 index 0000000..a970b77 --- /dev/null +++ b/mkfs.c @@ -0,0 +1,283 @@ +#include +#include +#include +#include +#include +#include + +#define stat xv6_stat // avoid clash with host struct stat +#include "types.h" +#include "fs.h" +#include "stat.h" +#include "param.h" + +#ifndef static_assert +#define static_assert(a, b) do { switch (0) case 0: \ + case (a): \ + ; } while (0) +#endif + +#define NINODES 200 + +// Disk layout: +// [ boot block | sb block | log | inode blocks | free bit map | data blocks ] + +int nbitmap = FSSIZE / (BSIZE * 8) + 1; +int ninodeblocks = NINODES / IPB + 1; +int nlog = LOGSIZE; +int nmeta; // Number of meta blocks (boot, sb, nlog, inode, bitmap) +int nblocks; // Number of data blocks + +int fsfd; +struct superblock sb; +char zeroes[BSIZE]; +uint freeinode = 1; +uint freeblock; + + +void balloc(int); +void wsect(uint, void*); +void winode(uint, struct dinode*); +void rinode(uint inum, struct dinode *ip); +void rsect(uint sec, void *buf); +uint ialloc(ushort type); +void iappend(uint inum, void *p, int n); + +// convert to intel byte order +ushort xshort(ushort x) { + ushort y; + uchar *a = (uchar*)&y; + a[0] = x; + a[1] = x >> 8; + return y; +} + +uint xint(uint x) { + uint y; + uchar *a = (uchar*)&y; + a[0] = x; + a[1] = x >> 8; + a[2] = x >> 16; + a[3] = x >> 24; + return y; +} + +int main(int argc, char *argv[]) { + int i, cc, fd; + uint rootino, inum, off; + struct dirent de; + char buf[BSIZE]; + struct dinode din; + + + static_assert(sizeof(int) == 4, "Integers must be 4 bytes!"); + + if (argc < 2) { + fprintf(stderr, "Usage: mkfs fs.img files...\n"); + exit(1); + } + + assert((BSIZE % sizeof(struct dinode)) == 0); + assert((BSIZE % sizeof(struct dirent)) == 0); + + fsfd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, 0666); + if (fsfd < 0) { + perror(argv[1]); + exit(1); + } + + // 1 fs block = 1 disk sector + nmeta = 2 + nlog + ninodeblocks + nbitmap; + nblocks = FSSIZE - nmeta; + + sb.size = xint(FSSIZE); + sb.nblocks = xint(nblocks); + sb.ninodes = xint(NINODES); + sb.nlog = xint(nlog); + sb.logstart = xint(2); + sb.inodestart = xint(2 + nlog); + sb.bmapstart = xint(2 + nlog + ninodeblocks); + + printf("nmeta %d (boot, super, log blocks %u inode blocks %u, bitmap blocks %u) blocks %d total %d\n", + nmeta, nlog, ninodeblocks, nbitmap, nblocks, FSSIZE); + + freeblock = nmeta; // the first free block that we can allocate + + for (i = 0; i < FSSIZE; i++) { + wsect(i, zeroes); + } + + memset(buf, 0, sizeof(buf)); + memmove(buf, &sb, sizeof(sb)); + wsect(1, buf); + + rootino = ialloc(T_DIR); + assert(rootino == ROOTINO); + + bzero(&de, sizeof(de)); + de.inum = xshort(rootino); + strcpy(de.name, "."); + iappend(rootino, &de, sizeof(de)); + + bzero(&de, sizeof(de)); + de.inum = xshort(rootino); + strcpy(de.name, ".."); + iappend(rootino, &de, sizeof(de)); + + for (i = 2; i < argc; i++) { + assert(index(argv[i], '/') == 0); + + if ((fd = open(argv[i], 0)) < 0) { + perror(argv[i]); + exit(1); + } + + // Skip leading _ in name when writing to file system. + // The binaries are named _rm, _cat, etc. to keep the + // build operating system from trying to execute them + // in place of system binaries like rm and cat. + if (argv[i][0] == '_') { + ++argv[i]; + } + + inum = ialloc(T_FILE); + + bzero(&de, sizeof(de)); + de.inum = xshort(inum); + strncpy(de.name, argv[i], DIRSIZ); + iappend(rootino, &de, sizeof(de)); + + while ((cc = read(fd, buf, sizeof(buf))) > 0) { + iappend(inum, buf, cc); + } + + close(fd); + } + + // fix size of root inode dir + rinode(rootino, &din); + off = xint(din.size); + off = ((off / BSIZE) + 1) * BSIZE; + din.size = xint(off); + winode(rootino, &din); + + balloc(freeblock); + + exit(0); +} + +void wsect(uint sec, void *buf) { + if (lseek(fsfd, sec * BSIZE, 0) != sec * BSIZE) { + perror("lseek"); + exit(1); + } + if (write(fsfd, buf, BSIZE) != BSIZE) { + perror("write"); + exit(1); + } +} + +void winode(uint inum, struct dinode *ip) { + char buf[BSIZE]; + uint bn; + struct dinode *dip; + + bn = IBLOCK(inum, sb); + rsect(bn, buf); + dip = ((struct dinode*)buf) + (inum % IPB); + *dip = *ip; + wsect(bn, buf); +} + +void rinode(uint inum, struct dinode *ip) { + char buf[BSIZE]; + uint bn; + struct dinode *dip; + + bn = IBLOCK(inum, sb); + rsect(bn, buf); + dip = ((struct dinode*)buf) + (inum % IPB); + *ip = *dip; +} + +void rsect(uint sec, void *buf) { + if (lseek(fsfd, sec * BSIZE, 0) != sec * BSIZE) { + perror("lseek"); + exit(1); + } + if (read(fsfd, buf, BSIZE) != BSIZE) { + perror("read"); + exit(1); + } +} + +uint ialloc(ushort type) { + uint inum = freeinode++; + struct dinode din; + + bzero(&din, sizeof(din)); + din.type = xshort(type); + din.nlink = xshort(1); + din.size = xint(0); + winode(inum, &din); + return inum; +} + +void balloc(int used) { + uchar buf[BSIZE]; + int i; + + printf("balloc: first %d blocks have been allocated\n", used); + assert(used < BSIZE * 8); + bzero(buf, BSIZE); + for (i = 0; i < used; i++) { + buf[i / 8] = buf[i / 8] | (0x1 << (i % 8)); + } + printf("balloc: write bitmap block at sector %d\n", sb.bmapstart); + wsect(sb.bmapstart, buf); +} + +#define min(a, b) ((a) < (b) ? (a) : (b)) + +void iappend(uint inum, void *xp, int n) { + char *p = (char*)xp; + uint fbn, off, n1; + struct dinode din; + char buf[BSIZE]; + uint indirect[NINDIRECT]; + uint x; + + rinode(inum, &din); + off = xint(din.size); + // printf("append inum %d at off %d sz %d\n", inum, off, n); + while (n > 0) { + fbn = off / BSIZE; + assert(fbn < MAXFILE); + if (fbn < NDIRECT) { + if (xint(din.addrs[fbn]) == 0) { + din.addrs[fbn] = xint(freeblock++); + } + x = xint(din.addrs[fbn]); + } + else { + if (xint(din.addrs[NDIRECT]) == 0) { + din.addrs[NDIRECT] = xint(freeblock++); + } + rsect(xint(din.addrs[NDIRECT]), (char*)indirect); + if (indirect[fbn - NDIRECT] == 0) { + indirect[fbn - NDIRECT] = xint(freeblock++); + wsect(xint(din.addrs[NDIRECT]), (char*)indirect); + } + x = xint(indirect[fbn - NDIRECT]); + } + n1 = min(n, (fbn + 1) * BSIZE - off); + rsect(x, buf); + bcopy(p, buf + off - (fbn * BSIZE), n1); + wsect(x, buf); + n -= n1; + off += n1; + p += n1; + } + din.size = xint(off); + winode(inum, &din); +} diff --git a/mmu.h b/mmu.h new file mode 100644 index 0000000..b126119 --- /dev/null +++ b/mmu.h @@ -0,0 +1,181 @@ +// This file contains definitions for the +// x86 memory management unit (MMU). + +// Eflags register +#define FL_IF 0x00000200 // Interrupt Enable + +// Control Register flags +#define CR0_PE 0x00000001 // Protection Enable +#define CR0_WP 0x00010000 // Write Protect +#define CR0_PG 0x80000000 // Paging + +#define CR4_PSE 0x00000010 // Page size extension + +// various segment selectors. +#define SEG_KCODE 1 // kernel code +#define SEG_KDATA 2 // kernel data+stack +#define SEG_UCODE 3 // user code +#define SEG_UDATA 4 // user data+stack +#define SEG_TSS 5 // this process's task state + +// cpu->gdt[NSEGS] holds the above segments. +#define NSEGS 6 + +#ifndef __ASSEMBLER__ +// Segment Descriptor +struct segdesc { + uint lim_15_0 : 16; // Low bits of segment limit + uint base_15_0 : 16; // Low bits of segment base address + uint base_23_16 : 8; // Middle bits of segment base address + uint type : 4; // Segment type (see STS_ constants) + uint s : 1; // 0 = system, 1 = application + uint dpl : 2; // Descriptor Privilege Level + uint p : 1; // Present + uint lim_19_16 : 4; // High bits of segment limit + uint avl : 1; // Unused (available for software use) + uint rsv1 : 1; // Reserved + uint db : 1; // 0 = 16-bit segment, 1 = 32-bit segment + uint g : 1; // Granularity: limit scaled by 4K when set + uint base_31_24 : 8; // High bits of segment base address +}; + +// Normal segment +#define SEG(type, base, lim, dpl) (struct segdesc) \ + { ((lim) >> 12) & 0xffff, (uint)(base) & 0xffff, \ + ((uint)(base) >> 16) & 0xff, type, 1, dpl, 1, \ + (uint)(lim) >> 28, 0, 0, 1, 1, (uint)(base) >> 24 } +#define SEG16(type, base, lim, dpl) (struct segdesc) \ + { (lim) & 0xffff, (uint)(base) & 0xffff, \ + ((uint)(base) >> 16) & 0xff, type, 1, dpl, 1, \ + (uint)(lim) >> 16, 0, 0, 1, 0, (uint)(base) >> 24 } +#endif + +#define DPL_USER 0x3 // User DPL + +// Application segment type bits +#define STA_X 0x8 // Executable segment +#define STA_W 0x2 // Writeable (non-executable segments) +#define STA_R 0x2 // Readable (executable segments) + +// System segment type bits +#define STS_T32A 0x9 // Available 32-bit TSS +#define STS_IG32 0xE // 32-bit Interrupt Gate +#define STS_TG32 0xF // 32-bit Trap Gate + +// A virtual address 'la' has a three-part structure as follows: +// +// +--------10------+-------10-------+---------12----------+ +// | Page Directory | Page Table | Offset within Page | +// | Index | Index | | +// +----------------+----------------+---------------------+ +// \--- PDX(va) --/ \--- PTX(va) --/ + +// page directory index +#define PDX(va) (((uint)(va) >> PDXSHIFT) & 0x3FF) + +// page table index +#define PTX(va) (((uint)(va) >> PTXSHIFT) & 0x3FF) + +// construct virtual address from indexes and offset +#define PGADDR(d, t, o) ((uint)((d) << PDXSHIFT | (t) << PTXSHIFT | (o))) + +// Page directory and page table constants. +#define NPDENTRIES 1024 // # directory entries per page directory +#define NPTENTRIES 1024 // # PTEs per page table +#define PGSIZE 4096 // bytes mapped by a page + +#define PTXSHIFT 12 // offset of PTX in a linear address +#define PDXSHIFT 22 // offset of PDX in a linear address + +#define PGROUNDUP(sz) (((sz) + PGSIZE - 1) & ~(PGSIZE - 1)) +#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE - 1)) + +// Page table/directory entry flags. +#define PTE_P 0x001 // Present +#define PTE_W 0x002 // Writeable +#define PTE_U 0x004 // User +#define PTE_PS 0x080 // Page Size + +// Address in page table or page directory entry +#define PTE_ADDR(pte) ((uint)(pte) & ~0xFFF) +#define PTE_FLAGS(pte) ((uint)(pte) & 0xFFF) + +#ifndef __ASSEMBLER__ +typedef uint pte_t; + +// Task state segment format +struct taskstate { + uint link; // Old ts selector + uint esp0; // Stack pointers and segment selectors + ushort ss0; // after an increase in privilege level + ushort padding1; + uint *esp1; + ushort ss1; + ushort padding2; + uint *esp2; + ushort ss2; + ushort padding3; + void *cr3; // Page directory base + uint *eip; // Saved state from last task switch + uint eflags; + uint eax; // More saved state (registers) + uint ecx; + uint edx; + uint ebx; + uint *esp; + uint *ebp; + uint esi; + uint edi; + ushort es; // Even more saved state (segment selectors) + ushort padding4; + ushort cs; + ushort padding5; + ushort ss; + ushort padding6; + ushort ds; + ushort padding7; + ushort fs; + ushort padding8; + ushort gs; + ushort padding9; + ushort ldt; + ushort padding10; + ushort t; // Trap on task switch + ushort iomb; // I/O map base address +}; + +// Gate descriptors for interrupts and traps +struct gatedesc { + uint off_15_0 : 16; // low 16 bits of offset in segment + uint cs : 16; // code segment selector + uint args : 5; // # args, 0 for interrupt/trap gates + uint rsv1 : 3; // reserved(should be zero I guess) + uint type : 4; // type(STS_{IG32,TG32}) + uint s : 1; // must be 0 (system) + uint dpl : 2; // descriptor(meaning new) privilege level + uint p : 1; // Present + uint off_31_16 : 16; // high bits of offset in segment +}; + +// Set up a normal interrupt/trap gate descriptor. +// - istrap: 1 for a trap (= exception) gate, 0 for an interrupt gate. +// interrupt gate clears FL_IF, trap gate leaves FL_IF alone +// - sel: Code segment selector for interrupt/trap handler +// - off: Offset in code segment for interrupt/trap handler +// - dpl: Descriptor Privilege Level - +// the privilege level required for software to invoke +// this interrupt/trap gate explicitly using an int instruction. +#define SETGATE(gate, istrap, sel, off, d) \ + { \ + (gate).off_15_0 = (uint)(off) & 0xffff; \ + (gate).cs = (sel); \ + (gate).args = 0; \ + (gate).rsv1 = 0; \ + (gate).type = (istrap) ? STS_TG32 : STS_IG32; \ + (gate).s = 0; \ + (gate).dpl = (d); \ + (gate).p = 1; \ + (gate).off_31_16 = (uint)(off) >> 16; \ + } + +#endif diff --git a/mp.c b/mp.c new file mode 100644 index 0000000..bb1454d --- /dev/null +++ b/mp.c @@ -0,0 +1,141 @@ +// Multiprocessor support +// Search memory for MP description structures. +// http://developer.intel.com/design/pentium/datashts/24201606.pdf + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mp.h" +#include "x86.h" +#include "mmu.h" +#include "proc.h" + +struct cpu cpus[NCPU]; +int ncpu; +uchar ioapicid; + +static uchar sum(uchar *addr, int len) { + int i, sum; + + sum = 0; + for (i = 0; i < len; i++) { + sum += addr[i]; + } + return sum; +} + +// Look for an MP structure in the len bytes at addr. +static struct mp*mpsearch1(uint a, int len) { + uchar *e, *p, *addr; + + addr = P2V(a); + e = addr + len; + for (p = addr; p < e; p += sizeof(struct mp)) { + if (memcmp(p, "_MP_", 4) == 0 && sum(p, sizeof(struct mp)) == 0) { + return (struct mp*)p; + } + } + return 0; +} + +// Search for the MP Floating Pointer Structure, which according to the +// spec is in one of the following three locations: +// 1) in the first KB of the EBDA; +// 2) in the last KB of system base memory; +// 3) in the BIOS ROM between 0xE0000 and 0xFFFFF. +static struct mp*mpsearch(void) { + uchar *bda; + uint p; + struct mp *mp; + + bda = (uchar *) P2V(0x400); + if ((p = ((bda[0x0F] << 8) | bda[0x0E]) << 4)) { + if ((mp = mpsearch1(p, 1024))) { + return mp; + } + } + else { + p = ((bda[0x14] << 8) | bda[0x13]) * 1024; + if ((mp = mpsearch1(p - 1024, 1024))) { + return mp; + } + } + return mpsearch1(0xF0000, 0x10000); +} + +// Search for an MP configuration table. For now, +// don't accept the default configurations (physaddr == 0). +// Check for correct signature, calculate the checksum and, +// if correct, check the version. +// To do: check extended table checksum. +static struct mpconf*mpconfig(struct mp **pmp) { + struct mpconf *conf; + struct mp *mp; + + if ((mp = mpsearch()) == 0 || mp->physaddr == 0) { + return 0; + } + conf = (struct mpconf*) P2V((uint) mp->physaddr); + if (memcmp(conf, "PCMP", 4) != 0) { + return 0; + } + if (conf->version != 1 && conf->version != 4) { + return 0; + } + if (sum((uchar*)conf, conf->length) != 0) { + return 0; + } + *pmp = mp; + return conf; +} + +void mpinit(void) { + uchar *p, *e; + int ismp; + struct mp *mp; + struct mpconf *conf; + struct mpproc *proc; + struct mpioapic *ioapic; + + if ((conf = mpconfig(&mp)) == 0) { + panic("Expect to run on an SMP"); + } + ismp = 1; + lapic = (uint*)conf->lapicaddr; + for (p = (uchar*)(conf + 1), e = (uchar*)conf + conf->length; p < e;) { + switch (*p) { + case MPPROC: + proc = (struct mpproc*)p; + if (ncpu < NCPU) { + cpus[ncpu].apicid = proc->apicid; // apicid may differ from ncpu + ncpu++; + } + p += sizeof(struct mpproc); + continue; + case MPIOAPIC: + ioapic = (struct mpioapic*)p; + ioapicid = ioapic->apicno; + p += sizeof(struct mpioapic); + continue; + case MPBUS: + case MPIOINTR: + case MPLINTR: + p += 8; + continue; + default: + ismp = 0; + break; + } + } + if (!ismp) { + panic("Didn't find a suitable machine"); + } + + if (mp->imcrp) { + // Bochs doesn't support IMCR, so this doesn't run on Bochs. + // But it would on real hardware. + outb(0x22, 0x70); // Select IMCR + outb(0x23, inb(0x23) | 1); // Mask external interrupts. + } +} diff --git a/mp.h b/mp.h new file mode 100644 index 0000000..c316dde --- /dev/null +++ b/mp.h @@ -0,0 +1,56 @@ +// See MultiProcessor Specification Version 1.[14] + +struct mp { // floating pointer + uchar signature[4]; // "_MP_" + void *physaddr; // phys addr of MP config table + uchar length; // 1 + uchar specrev; // [14] + uchar checksum; // all bytes must add up to 0 + uchar type; // MP system config type + uchar imcrp; + uchar reserved[3]; +}; + +struct mpconf { // configuration table header + uchar signature[4]; // "PCMP" + ushort length; // total table length + uchar version; // [14] + uchar checksum; // all bytes must add up to 0 + uchar product[20]; // product id + uint *oemtable; // OEM table pointer + ushort oemlength; // OEM table length + ushort entry; // entry count + uint *lapicaddr; // address of local APIC + ushort xlength; // extended table length + uchar xchecksum; // extended table checksum + uchar reserved; +}; + +struct mpproc { // processor table entry + uchar type; // entry type (0) + uchar apicid; // local APIC id + uchar version; // local APIC verison + uchar flags; // CPU flags + #define MPBOOT 0x02 // This proc is the bootstrap processor. + uchar signature[4]; // CPU signature + uint feature; // feature flags from CPUID instruction + uchar reserved[8]; +}; + +struct mpioapic { // I/O APIC table entry + uchar type; // entry type (2) + uchar apicno; // I/O APIC id + uchar version; // I/O APIC version + uchar flags; // I/O APIC flags + uint *addr; // I/O APIC address +}; + +// Table entry types +#define MPPROC 0x00 // One per processor +#define MPBUS 0x01 // One per bus +#define MPIOAPIC 0x02 // One per I/O APIC +#define MPIOINTR 0x03 // One per bus interrupt source +#define MPLINTR 0x04 // One per system interrupt source + + + diff --git a/param.h b/param.h new file mode 100644 index 0000000..a7e90ef --- /dev/null +++ b/param.h @@ -0,0 +1,14 @@ +#define NPROC 64 // maximum number of processes +#define KSTACKSIZE 4096 // size of per-process kernel stack +#define NCPU 8 // maximum number of CPUs +#define NOFILE 16 // open files per process +#define NFILE 100 // open files per system +#define NINODE 50 // maximum number of active i-nodes +#define NDEV 10 // maximum major device number +#define ROOTDEV 1 // device number of file system root disk +#define MAXARG 32 // max exec arguments +#define MAXOPBLOCKS 10 // max # of blocks any FS op writes +#define LOGSIZE (MAXOPBLOCKS*3) // max data blocks in on-disk log +#define NBUF (MAXOPBLOCKS*3) // size of disk block cache +#define FSSIZE 1000 // size of file system in blocks + diff --git a/picirq.c b/picirq.c new file mode 100644 index 0000000..2abdc69 --- /dev/null +++ b/picirq.c @@ -0,0 +1,17 @@ +#include "types.h" +#include "x86.h" +#include "traps.h" + +// I/O Addresses of the two programmable interrupt controllers +#define IO_PIC1 0x20 // Master (IRQs 0-7) +#define IO_PIC2 0xA0 // Slave (IRQs 8-15) + +// Don't use the 8259A interrupt controllers. Xv6 assumes SMP hardware. +void picinit(void) { + // mask all interrupts + outb(IO_PIC1 + 1, 0xFF); + outb(IO_PIC2 + 1, 0xFF); +} + + + diff --git a/pipe.c b/pipe.c new file mode 100644 index 0000000..579d7f5 --- /dev/null +++ b/pipe.c @@ -0,0 +1,122 @@ +#include "types.h" +#include "defs.h" +#include "param.h" +#include "mmu.h" +#include "proc.h" +#include "fs.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "file.h" + +#define PIPESIZE 512 + +struct pipe { + struct spinlock lock; + char data[PIPESIZE]; + uint nread; // number of bytes read + uint nwrite; // number of bytes written + int readopen; // read fd is still open + int writeopen; // write fd is still open +}; + +void cleanuppipealloc(struct pipe *p, struct file **f0, struct file **f1) { + if (p) { + kfree((char*)p); + } + if (*f0) { + fileclose(*f0); + } + if (*f1) { + fileclose(*f1); + } +} + +int pipealloc(struct file **f0, struct file **f1) { + struct pipe *p; + + p = 0; + *f0 = *f1 = 0; + if ((*f0 = filealloc()) == 0 || (*f1 = filealloc()) == 0) { + cleanuppipealloc(p, f0, f1); + return -1; + } + if ((p = (struct pipe*)kalloc()) == 0) { + cleanuppipealloc(p, f0, f1); + return -1; + } + p->readopen = 1; + p->writeopen = 1; + p->nwrite = 0; + p->nread = 0; + initlock(&p->lock, "pipe"); + (*f0)->type = FD_PIPE; + (*f0)->readable = 1; + (*f0)->writable = 0; + (*f0)->pipe = p; + (*f1)->type = FD_PIPE; + (*f1)->readable = 0; + (*f1)->writable = 1; + (*f1)->pipe = p; + return 0; +} + +void pipeclose(struct pipe *p, int writable) { + acquire(&p->lock); + if (writable) { + p->writeopen = 0; + wakeup(&p->nread); + } + else { + p->readopen = 0; + wakeup(&p->nwrite); + } + if (p->readopen == 0 && p->writeopen == 0) { + release(&p->lock); + kfree((char*)p); + } + else { + release(&p->lock); + } +} + +int pipewrite(struct pipe *p, char *addr, int n) { + int i; + + acquire(&p->lock); + for (i = 0; i < n; i++) { + while (p->nwrite == p->nread + PIPESIZE) { //DOC: pipewrite-full + if (p->readopen == 0 || myproc()->killed) { + release(&p->lock); + return -1; + } + wakeup(&p->nread); + sleep(&p->nwrite, &p->lock); //DOC: pipewrite-sleep + } + p->data[p->nwrite++ % PIPESIZE] = addr[i]; + } + wakeup(&p->nread); //DOC: pipewrite-wakeup1 + release(&p->lock); + return n; +} + +int piperead(struct pipe *p, char *addr, int n) { + int i; + + acquire(&p->lock); + while (p->nread == p->nwrite && p->writeopen) { //DOC: pipe-empty + if (myproc()->killed) { + release(&p->lock); + return -1; + } + sleep(&p->nread, &p->lock); //DOC: piperead-sleep + } + for (i = 0; i < n; i++) { //DOC: piperead-copy + if (p->nread == p->nwrite) { + break; + } + addr[i] = p->data[p->nread++ % PIPESIZE]; + } + wakeup(&p->nwrite); //DOC: piperead-wakeup + release(&p->lock); + return i; +} diff --git a/printf.c b/printf.c new file mode 100644 index 0000000..6f3b091 --- /dev/null +++ b/printf.c @@ -0,0 +1,91 @@ +#include "types.h" +#include "stat.h" +#include "user.h" + +static void putc(int fd, char c) { + write(fd, &c, 1); +} + +static void printint(int fd, int xx, int base, int sgn) { + static char digits[] = "0123456789ABCDEF"; + char buf[16]; + int i, neg; + uint x; + + neg = 0; + if (sgn && xx < 0) { + neg = 1; + x = -xx; + } + else { + x = xx; + } + + i = 0; + do { + buf[i++] = digits[x % base]; + } + while ((x /= base) != 0); + if (neg) { + buf[i++] = '-'; + } + + while (--i >= 0) { + putc(fd, buf[i]); + } +} + +// Print to the given fd. Only understands %d, %x, %p, %s. +void printf(int fd, const char *fmt, ...) { + char *s; + int c, i, state; + uint *ap; + + state = 0; + ap = (uint*)(void*)&fmt + 1; + for (i = 0; fmt[i]; i++) { + c = fmt[i] & 0xff; + if (state == 0) { + if (c == '%') { + state = '%'; + } + else { + putc(fd, c); + } + } + else if (state == '%') { + if (c == 'd') { + printint(fd, *ap, 10, 1); + ap++; + } + else if (c == 'x' || c == 'p') { + printint(fd, *ap, 16, 0); + ap++; + } + else if (c == 's') { + s = (char*)*ap; + ap++; + if (s == 0) { + s = "(null)"; + } + while (*s != 0) { + putc(fd, *s); + s++; + } + } + else if (c == 'c') { + putc(fd, *ap); + ap++; + } + else if (c == '%') { + putc(fd, c); + } + else { + // Unknown % sequence. Print it to draw attention. + putc(fd, '%'); + putc(fd, c); + } + state = 0; + } + } +} diff --git a/proc.c b/proc.c new file mode 100644 index 0000000..dc3ea65 --- /dev/null +++ b/proc.c @@ -0,0 +1,527 @@ +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "x86.h" +#include "proc.h" +#include "spinlock.h" + +struct { + struct spinlock lock; + struct proc proc[NPROC]; +} ptable; + +static struct proc *initproc; + +int nextpid = 1; +extern void forkret(void); +extern void trapret(void); + +static void wakeup1(void *chan); + +void pinit(void) { + initlock(&ptable.lock, "ptable"); +} + +// Must be called with interrupts disabled +int cpuid() { + return mycpu() - cpus; +} + +// Must be called with interrupts disabled to avoid the caller being +// rescheduled between reading lapicid and running through the loop. +struct cpu*mycpu(void) { + int apicid, i; + + if (readeflags() & FL_IF) { + panic("mycpu called with interrupts enabled\n"); + } + + apicid = lapicid(); + // APIC IDs are not guaranteed to be contiguous. Maybe we should have + // a reverse map, or reserve a register to store &cpus[i]. + for (i = 0; i < ncpu; ++i) { + if (cpus[i].apicid == apicid) { + return &cpus[i]; + } + } + panic("unknown apicid\n"); +} + +// Disable interrupts so that we are not rescheduled +// while reading proc from the cpu structure +struct proc*myproc(void) { + struct cpu *c; + struct proc *p; + pushcli(); + c = mycpu(); + p = c->proc; + popcli(); + return p; +} + +// Look in the process table for an UNUSED proc. +// If found, change state to EMBRYO and initialize +// state required to run in the kernel. +// Otherwise return 0. +static struct proc* allocproc(void) { + struct proc *p; + char *sp; + int found = 0; + + acquire(&ptable.lock); + + p = ptable.proc; + while (p < &ptable.proc[NPROC] && !found) { + if (p->state == UNUSED) { + found = 1; + } + else { + p++; + } + + } + if (!found) { + release(&ptable.lock); + return 0; + } + + p->state = EMBRYO; + p->pid = nextpid++; + + release(&ptable.lock); + + // Allocate kernel stack. + if ((p->kstack = kalloc()) == 0) { + p->state = UNUSED; + return 0; + } + sp = p->kstack + KSTACKSIZE; + + // Leave room for trap frame. + sp -= sizeof *p->tf; + p->tf = (struct trapframe*)sp; + + // Set up new context to start executing at forkret, + // which returns to trapret. + sp -= 4; + *(uint*)sp = (uint)trapret; + + sp -= sizeof *p->context; + p->context = (struct context*)sp; + memset(p->context, 0, sizeof *p->context); + p->context->eip = (uint)forkret; + + return p; +} + +// Set up first user process. +void userinit(void) { + struct proc *p; + extern char _binary_initcode_start[], _binary_initcode_size[]; + + p = allocproc(); + + initproc = p; + if ((p->pgdir = setupkvm()) == 0) { + panic("userinit: out of memory?"); + } + inituvm(p->pgdir, _binary_initcode_start, (int)_binary_initcode_size); + p->sz = PGSIZE; + memset(p->tf, 0, sizeof(*p->tf)); + p->tf->cs = (SEG_UCODE << 3) | DPL_USER; + p->tf->ds = (SEG_UDATA << 3) | DPL_USER; + p->tf->es = p->tf->ds; + p->tf->ss = p->tf->ds; + p->tf->eflags = FL_IF; + p->tf->esp = PGSIZE; + p->tf->eip = 0; // beginning of initcode.S + + safestrcpy(p->name, "initcode", sizeof(p->name)); + p->cwd = namei("/"); + + // this assignment to p->state lets other cores + // run this process. the acquire forces the above + // writes to be visible, and the lock is also needed + // because the assignment might not be atomic. + acquire(&ptable.lock); + + p->state = RUNNABLE; + + release(&ptable.lock); +} + +// Grow current process's memory by n bytes. +// Return 0 on success, -1 on failure. +int growproc(int n) { + uint sz; + struct proc *curproc = myproc(); + + sz = curproc->sz; + if (n > 0) { + if ((sz = allocuvm(curproc->pgdir, sz, sz + n)) == 0) { + return -1; + } + } + else if (n < 0) { + if ((sz = deallocuvm(curproc->pgdir, sz, sz + n)) == 0) { + return -1; + } + } + curproc->sz = sz; + switchuvm(curproc); + return 0; +} + +// Create a new process copying p as the parent. +// Sets up stack to return as if from system call. +// Caller must set state of returned proc to RUNNABLE. +int fork(void) { + int i, pid; + struct proc *np; + struct proc *curproc = myproc(); + + // Allocate process. + if ((np = allocproc()) == 0) { + return -1; + } + + // Copy process state from proc. + if ((np->pgdir = copyuvm(curproc->pgdir, curproc->sz)) == 0) { + kfree(np->kstack); + np->kstack = 0; + np->state = UNUSED; + return -1; + } + np->sz = curproc->sz; + np->parent = curproc; + *np->tf = *curproc->tf; + + // Clear %eax so that fork returns 0 in the child. + np->tf->eax = 0; + + for (i = 0; i < NOFILE; i++) { + if (curproc->ofile[i]) { + np->ofile[i] = filedup(curproc->ofile[i]); + } + } + np->cwd = idup(curproc->cwd); + + safestrcpy(np->name, curproc->name, sizeof(curproc->name)); + + pid = np->pid; + + acquire(&ptable.lock); + + np->state = RUNNABLE; + + release(&ptable.lock); + + return pid; +} + +// Exit the current process. Does not return. +// An exited process remains in the zombie state +// until its parent calls wait() to find out it exited. +void exit(void) { + struct proc *curproc = myproc(); + struct proc *p; + int fd; + + if (curproc == initproc) { + panic("init exiting"); + } + + // Close all open files. + for (fd = 0; fd < NOFILE; fd++) { + if (curproc->ofile[fd]) { + fileclose(curproc->ofile[fd]); + curproc->ofile[fd] = 0; + } + } + + begin_op(); + iput(curproc->cwd); + end_op(); + curproc->cwd = 0; + + acquire(&ptable.lock); + + // Parent might be sleeping in wait(). + wakeup1(curproc->parent); + + // Pass abandoned children to init. + for (p = ptable.proc; p < &ptable.proc[NPROC]; p++) { + if (p->parent == curproc) { + p->parent = initproc; + if (p->state == ZOMBIE) { + wakeup1(initproc); + } + } + } + + // Jump into the scheduler, never to return. + curproc->state = ZOMBIE; + sched(); + panic("zombie exit"); +} + +// Wait for a child process to exit and return its pid. +// Return -1 if this process has no children. +int wait(void) { + struct proc *p; + int havekids, pid; + struct proc *curproc = myproc(); + + acquire(&ptable.lock); + for (;;) { + // Scan through table looking for exited children. + havekids = 0; + for (p = ptable.proc; p < &ptable.proc[NPROC]; p++) { + if (p->parent != curproc) { + continue; + } + havekids = 1; + if (p->state == ZOMBIE) { + // Found one. + pid = p->pid; + kfree(p->kstack); + p->kstack = 0; + freevm(p->pgdir); + p->pid = 0; + p->parent = 0; + p->name[0] = 0; + p->killed = 0; + p->state = UNUSED; + release(&ptable.lock); + return pid; + } + } + + // No point waiting if we don't have any children. + if (!havekids || curproc->killed) { + release(&ptable.lock); + return -1; + } + + // Wait for children to exit. (See wakeup1 call in proc_exit.) + sleep(curproc, &ptable.lock); //DOC: wait-sleep + } +} + +// Per-CPU process scheduler. +// Each CPU calls scheduler() after setting itself up. +// Scheduler never returns. It loops, doing: +// - choose a process to run +// - swtch to start running that process +// - eventually that process transfers control +// via swtch back to the scheduler. +void scheduler(void) { + struct proc *p; + struct cpu *c = mycpu(); + c->proc = 0; + + for (;;) { + // Enable interrupts on this processor. + sti(); + + // Loop over process table looking for process to run. + acquire(&ptable.lock); + for (p = ptable.proc; p < &ptable.proc[NPROC]; p++) { + if (p->state != RUNNABLE) { + continue; + } + + // Switch to chosen process. It is the process's job + // to release ptable.lock and then reacquire it + // before jumping back to us. + c->proc = p; + switchuvm(p); + p->state = RUNNING; + + swtch(&(c->scheduler), p->context); + switchkvm(); + + // Process is done running for now. + // It should have changed its p->state before coming back. + c->proc = 0; + } + release(&ptable.lock); + + } +} + +// Enter scheduler. Must hold only ptable.lock +// and have changed proc->state. Saves and restores +// intena because intena is a property of this +// kernel thread, not this CPU. It should +// be proc->intena and proc->ncli, but that would +// break in the few places where a lock is held but +// there's no process. +void sched(void) { + int intena; + struct proc *p = myproc(); + + if (!holding(&ptable.lock)) { + panic("sched ptable.lock"); + } + if (mycpu()->ncli != 1) { + panic("sched locks"); + } + if (p->state == RUNNING) { + panic("sched running"); + } + if (readeflags() & FL_IF) { + panic("sched interruptible"); + } + intena = mycpu()->intena; + swtch(&p->context, mycpu()->scheduler); + mycpu()->intena = intena; +} + +// Give up the CPU for one scheduling round. +void yield(void) { + acquire(&ptable.lock); //DOC: yieldlock + myproc()->state = RUNNABLE; + sched(); + release(&ptable.lock); +} + +// A fork child's very first scheduling by scheduler() +// will swtch here. "Return" to user space. +void forkret(void) { + static int first = 1; + // Still holding ptable.lock from scheduler. + release(&ptable.lock); + + if (first) { + // Some initialization functions must be run in the context + // of a regular process (e.g., they call sleep), and thus cannot + // be run from main(). + first = 0; + iinit(ROOTDEV); + initlog(ROOTDEV); + } + + // Return to "caller", actually trapret (see allocproc). +} + +// Atomically release lock and sleep on chan. +// Reacquires lock when awakened. +void sleep(void *chan, struct spinlock *lk) { + struct proc *p = myproc(); + + if (p == 0) { + panic("sleep"); + } + + if (lk == 0) { + panic("sleep without lk"); + } + + // Must acquire ptable.lock in order to + // change p->state and then call sched. + // Once we hold ptable.lock, we can be + // guaranteed that we won't miss any wakeup + // (wakeup runs with ptable.lock locked), + // so it's okay to release lk. + if (lk != &ptable.lock) { //DOC: sleeplock0 + acquire(&ptable.lock); //DOC: sleeplock1 + release(lk); + } + // Go to sleep. + p->chan = chan; + p->state = SLEEPING; + + sched(); + + // Tidy up. + p->chan = 0; + + // Reacquire original lock. + if (lk != &ptable.lock) { //DOC: sleeplock2 + release(&ptable.lock); + acquire(lk); + } +} + + +// Wake up all processes sleeping on chan. +// The ptable lock must be held. +static void wakeup1(void *chan) { + struct proc *p; + + for (p = ptable.proc; p < &ptable.proc[NPROC]; p++) { + if (p->state == SLEEPING && p->chan == chan) { + p->state = RUNNABLE; + } + } +} + +// Wake up all processes sleeping on chan. +void wakeup(void *chan) { + acquire(&ptable.lock); + wakeup1(chan); + release(&ptable.lock); +} + +// Kill the process with the given pid. +// Process won't exit until it returns +// to user space (see trap in trap.c). +int kill(int pid) { + struct proc *p; + + acquire(&ptable.lock); + for (p = ptable.proc; p < &ptable.proc[NPROC]; p++) { + if (p->pid == pid) { + p->killed = 1; + // Wake process from sleep if necessary. + if (p->state == SLEEPING) { + p->state = RUNNABLE; + } + release(&ptable.lock); + return 0; + } + } + release(&ptable.lock); + return -1; +} + +// Print a process listing to console. For debugging. +// Runs when user types ^P on console. +// No lock to avoid wedging a stuck machine further. +void procdump(void) { + static char *states[] = { + [UNUSED] "unused", + [EMBRYO] "embryo", + [SLEEPING] "sleep ", + [RUNNABLE] "runble", + [RUNNING] "run ", + [ZOMBIE] "zombie" + }; + int i; + struct proc *p; + char *state; + uint pc[10]; + + for (p = ptable.proc; p < &ptable.proc[NPROC]; p++) { + if (p->state == UNUSED) { + continue; + } + if (p->state >= 0 && p->state < NELEM(states) && states[p->state]) { + state = states[p->state]; + } + else { + state = "???"; + } + cprintf("%d %s %s", p->pid, state, p->name); + if (p->state == SLEEPING) { + getcallerpcs((uint*)p->context->ebp + 2, pc); + for (i = 0; i < 10 && pc[i] != 0; i++) { + cprintf(" %p", pc[i]); + } + } + cprintf("\n"); + } +} diff --git a/proc.h b/proc.h new file mode 100644 index 0000000..bed7825 --- /dev/null +++ b/proc.h @@ -0,0 +1,57 @@ +// Per-CPU state +struct cpu { + uchar apicid; // Local APIC ID + struct context *scheduler; // swtch() here to enter scheduler + struct taskstate ts; // Used by x86 to find stack for interrupt + struct segdesc gdt[NSEGS]; // x86 global descriptor table + volatile uint started; // Has the CPU started? + int ncli; // Depth of pushcli nesting. + int intena; // Were interrupts enabled before pushcli? + struct proc *proc; // The process running on this cpu or null +}; + +extern struct cpu cpus[NCPU]; +extern int ncpu; + +// Saved registers for kernel context switches. +// Don't need to save all the segment registers (%cs, etc), +// because they are constant across kernel contexts. +// Don't need to save %eax, %ecx, %edx, because the +// x86 convention is that the caller has saved them. +// Contexts are stored at the bottom of the stack they +// describe; the stack pointer is the address of the context. +// The layout of the context matches the layout of the stack in swtch.S +// at the "Switch stacks" comment. Switch doesn't save eip explicitly, +// but it is on the stack and allocproc() manipulates it. +struct context { + uint edi; + uint esi; + uint ebx; + uint ebp; + uint eip; +}; + +enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; + +// Per-process state +struct proc { + uint sz; // Size of process memory (bytes) + pde_t* pgdir; // Page table + char *kstack; // Bottom of kernel stack for this process + enum procstate state; // Process state + int pid; // Process ID + struct proc *parent; // Parent process + struct trapframe *tf; // Trap frame for current syscall + struct context *context; // swtch() here to run process + void *chan; // If non-zero, sleeping on chan + int killed; // If non-zero, have been killed + struct file *ofile[NOFILE]; // Open files + struct inode *cwd; // Current directory + char name[16]; // Process name (debugging) +}; + +// Process memory is laid out contiguously, low addresses first: +// text +// original data and bss +// fixed-size stack +// expandable heap diff --git a/rm.c b/rm.c new file mode 100644 index 0000000..022afba --- /dev/null +++ b/rm.c @@ -0,0 +1,21 @@ +#include "types.h" +#include "stat.h" +#include "user.h" + +int main(int argc, char *argv[]) { + int i; + + if (argc < 2) { + printf(2, "Usage: rm files...\n"); + exit(); + } + + for (i = 1; i < argc; i++) { + if (unlink(argv[i]) < 0) { + printf(2, "rm: %s failed to delete\n", argv[i]); + break; + } + } + + exit(); +} diff --git a/sh.c b/sh.c new file mode 100644 index 0000000..1cf99a6 --- /dev/null +++ b/sh.c @@ -0,0 +1,482 @@ +// Shell. + +#include "types.h" +#include "user.h" +#include "fcntl.h" + +// Parsed command representation +#define EXEC 1 +#define REDIR 2 +#define PIPE 3 +#define LIST 4 +#define BACK 5 + +#define MAXARGS 10 + +struct cmd { + int type; +}; + +struct execcmd { + int type; + char *argv[MAXARGS]; + char *eargv[MAXARGS]; +}; + +struct redircmd { + int type; + struct cmd *cmd; + char *file; + char *efile; + int mode; + int fd; +}; + +struct pipecmd { + int type; + struct cmd *left; + struct cmd *right; +}; + +struct listcmd { + int type; + struct cmd *left; + struct cmd *right; +}; + +struct backcmd { + int type; + struct cmd *cmd; +}; + +int fork1(void); // Fork but panics on failure. +void panic(char*); +struct cmd *parsecmd(char*); + +// Execute cmd. Never returns. +void runcmd(struct cmd *cmd) { + int p[2]; + struct backcmd *bcmd; + struct execcmd *ecmd; + struct listcmd *lcmd; + struct pipecmd *pcmd; + struct redircmd *rcmd; + + if (cmd == 0) { + exit(); + } + + switch (cmd->type) { + default: + panic("runcmd"); + + case EXEC: + ecmd = (struct execcmd*)cmd; + if (ecmd->argv[0] == 0) { + exit(); + } + exec(ecmd->argv[0], ecmd->argv); + printf(2, "exec %s failed\n", ecmd->argv[0]); + break; + + case REDIR: + rcmd = (struct redircmd*)cmd; + close(rcmd->fd); + if (open(rcmd->file, rcmd->mode) < 0) { + printf(2, "open %s failed\n", rcmd->file); + exit(); + } + runcmd(rcmd->cmd); + break; + + case LIST: + lcmd = (struct listcmd*)cmd; + if (fork1() == 0) { + runcmd(lcmd->left); + } + wait(); + runcmd(lcmd->right); + break; + + case PIPE: + pcmd = (struct pipecmd*)cmd; + if (pipe(p) < 0) { + panic("pipe"); + } + if (fork1() == 0) { + close(1); + dup(p[1]); + close(p[0]); + close(p[1]); + runcmd(pcmd->left); + } + if (fork1() == 0) { + close(0); + dup(p[0]); + close(p[0]); + close(p[1]); + runcmd(pcmd->right); + } + close(p[0]); + close(p[1]); + wait(); + wait(); + break; + + case BACK: + bcmd = (struct backcmd*)cmd; + if (fork1() == 0) { + runcmd(bcmd->cmd); + } + break; + } + exit(); +} + +int getcmd(char *buf, int nbuf) { + printf(2, "$ "); + memset(buf, 0, nbuf); + gets(buf, nbuf); + if (buf[0] == 0) { // EOF + return -1; + } + return 0; +} + +int main(int argc, char* argv[]) { + static char buf[100]; + int fd; + + // Ensure that three file descriptors are open. + while ((fd = open("console", O_RDWR)) >= 0) { + if (fd >= 3) { + close(fd); + break; + } + } + + // Read and run input commands. + while (getcmd(buf, sizeof(buf)) >= 0) { + if (buf[0] == 'e' && buf[1] == 'x' && buf[2] == 'i' && buf[3] == 't') { + exit(); + } + if (buf[0] == 'c' && buf[1] == 'd' && buf[2] == ' ') { + // Chdir must be called by the parent, not the child. + buf[strlen(buf) - 1] = 0; // chop \n + if (chdir(buf + 3) < 0) { + printf(2, "cannot cd %s\n", buf + 3); + } + continue; + } + if (fork1() == 0) { + runcmd(parsecmd(buf)); + } + wait(); + } + exit(); +} + +void panic(char *s) { + printf(2, "%s\n", s); + exit(); +} + +int fork1(void) { + int pid; + + pid = fork(); + if (pid == -1) { + panic("fork"); + } + return pid; +} + + +// Constructors + +struct cmd* execcmd(void) { + struct execcmd *cmd; + + cmd = malloc(sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->type = EXEC; + return (struct cmd*)cmd; +} + +struct cmd* redircmd(struct cmd *subcmd, char *file, char *efile, int mode, int fd) { + struct redircmd *cmd; + + cmd = malloc(sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->type = REDIR; + cmd->cmd = subcmd; + cmd->file = file; + cmd->efile = efile; + cmd->mode = mode; + cmd->fd = fd; + return (struct cmd*)cmd; +} + +struct cmd* pipecmd(struct cmd *left, struct cmd *right) { + struct pipecmd *cmd; + + cmd = malloc(sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->type = PIPE; + cmd->left = left; + cmd->right = right; + return (struct cmd*)cmd; +} + +struct cmd* listcmd(struct cmd *left, struct cmd *right) { + struct listcmd *cmd; + + cmd = malloc(sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->type = LIST; + cmd->left = left; + cmd->right = right; + return (struct cmd*)cmd; +} + +struct cmd* backcmd(struct cmd *subcmd) { + struct backcmd *cmd; + + cmd = malloc(sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->type = BACK; + cmd->cmd = subcmd; + return (struct cmd*)cmd; +} + +// Parsing + +char whitespace[] = " \t\r\n\v"; +char symbols[] = "<|>&;()"; + +int gettoken(char **ps, char *es, char **q, char **eq) { + char *s; + int ret; + + s = *ps; + while (s < es && strchr(whitespace, *s)) { + s++; + } + if (q) { + *q = s; + } + ret = *s; + switch (*s) { + case 0: + break; + case '|': + case '(': + case ')': + case ';': + case '&': + case '<': + s++; + break; + case '>': + s++; + if (*s == '>') { + ret = '+'; + s++; + } + break; + default: + ret = 'a'; + while (s < es && !strchr(whitespace, *s) && !strchr(symbols, *s)) { + s++; + } + break; + } + if (eq) { + *eq = s; + } + + while (s < es && strchr(whitespace, *s)) { + s++; + } + *ps = s; + return ret; +} + +int peek(char **ps, char *es, char *toks) { + char *s; + + s = *ps; + while (s < es && strchr(whitespace, *s)) { + s++; + } + *ps = s; + return *s && strchr(toks, *s); +} + +struct cmd *parseline(char**, char*); +struct cmd *parsepipe(char**, char*); +struct cmd *parseexec(char**, char*); +struct cmd *nulterminate(struct cmd*); + +struct cmd* parsecmd(char *s) { + char *es; + struct cmd *cmd; + + es = s + strlen(s); + cmd = parseline(&s, es); + peek(&s, es, ""); + if (s != es) { + printf(2, "leftovers: %s\n", s); + panic("syntax"); + } + nulterminate(cmd); + return cmd; +} + +struct cmd* parseline(char **ps, char *es) { + struct cmd *cmd; + + cmd = parsepipe(ps, es); + while (peek(ps, es, "&")) { + gettoken(ps, es, 0, 0); + cmd = backcmd(cmd); + } + if (peek(ps, es, ";")) { + gettoken(ps, es, 0, 0); + cmd = listcmd(cmd, parseline(ps, es)); + } + return cmd; +} + +struct cmd* parsepipe(char **ps, char *es) { + struct cmd *cmd; + + cmd = parseexec(ps, es); + if (peek(ps, es, "|")) { + gettoken(ps, es, 0, 0); + cmd = pipecmd(cmd, parsepipe(ps, es)); + } + return cmd; +} + +struct cmd* parseredirs(struct cmd *cmd, char **ps, char *es) { + int tok; + char *q, *eq; + + while (peek(ps, es, "<>")) { + tok = gettoken(ps, es, 0, 0); + if (gettoken(ps, es, &q, &eq) != 'a') { + panic("missing file for redirection"); + } + switch (tok) { + case '<': + cmd = redircmd(cmd, q, eq, O_RDONLY, 0); + break; + case '>': + cmd = redircmd(cmd, q, eq, O_WRONLY | O_CREATE, 1); + break; + case '+': // >> + cmd = redircmd(cmd, q, eq, O_WRONLY | O_CREATE, 1); + break; + } + } + return cmd; +} + +struct cmd* parseblock(char **ps, char *es) { + struct cmd *cmd; + + if (!peek(ps, es, "(")) { + panic("parseblock"); + } + gettoken(ps, es, 0, 0); + cmd = parseline(ps, es); + if (!peek(ps, es, ")")) { + panic("syntax - missing )"); + } + gettoken(ps, es, 0, 0); + cmd = parseredirs(cmd, ps, es); + return cmd; +} + +struct cmd* parseexec(char **ps, char *es) { + char *q, *eq; + int tok, argc; + struct execcmd *cmd; + struct cmd *ret; + + if (peek(ps, es, "(")) { + return parseblock(ps, es); + } + + ret = execcmd(); + cmd = (struct execcmd*)ret; + + argc = 0; + ret = parseredirs(ret, ps, es); + while (!peek(ps, es, "|)&;")) { + if ((tok = gettoken(ps, es, &q, &eq)) == 0) { + break; + } + if (tok != 'a') { + panic("syntax"); + } + cmd->argv[argc] = q; + cmd->eargv[argc] = eq; + argc++; + if (argc >= MAXARGS) { + panic("too many args"); + } + ret = parseredirs(ret, ps, es); + } + cmd->argv[argc] = 0; + cmd->eargv[argc] = 0; + return ret; +} + +// NUL-terminate all the counted strings. +struct cmd* nulterminate(struct cmd *cmd) { + int i; + struct backcmd *bcmd; + struct execcmd *ecmd; + struct listcmd *lcmd; + struct pipecmd *pcmd; + struct redircmd *rcmd; + + if (cmd == 0) { + return 0; + } + + switch (cmd->type) { + case EXEC: + ecmd = (struct execcmd*)cmd; + for (i = 0; ecmd->argv[i]; i++) { + *ecmd->eargv[i] = 0; + } + break; + + case REDIR: + rcmd = (struct redircmd*)cmd; + nulterminate(rcmd->cmd); + *rcmd->efile = 0; + break; + + case PIPE: + pcmd = (struct pipecmd*)cmd; + nulterminate(pcmd->left); + nulterminate(pcmd->right); + break; + + case LIST: + lcmd = (struct listcmd*)cmd; + nulterminate(lcmd->left); + nulterminate(lcmd->right); + break; + + case BACK: + bcmd = (struct backcmd*)cmd; + nulterminate(bcmd->cmd); + break; + } + return cmd; +} diff --git a/sign.pl b/sign.pl new file mode 100755 index 0000000..d793035 --- /dev/null +++ b/sign.pl @@ -0,0 +1,19 @@ +#!/usr/bin/perl + +open(SIG, $ARGV[0]) || die "open $ARGV[0]: $!"; + +$n = sysread(SIG, $buf, 1000); + +if($n > 510){ + print STDERR "boot block too large: $n bytes (max 510)\n"; + exit 1; +} + +print STDERR "boot block is $n bytes (max 510)\n"; + +$buf .= "\0" x (510-$n); +$buf .= "\x55\xAA"; + +open(SIG, ">$ARGV[0]") || die "open >$ARGV[0]: $!"; +print SIG $buf; +close SIG; diff --git a/sleeplock.c b/sleeplock.c new file mode 100644 index 0000000..f45e910 --- /dev/null +++ b/sleeplock.c @@ -0,0 +1,48 @@ +// Sleeping locks + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "x86.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "spinlock.h" +#include "sleeplock.h" + +void initsleeplock(struct sleeplock *lk, char *name) { + initlock(&lk->lk, "sleep lock"); + lk->name = name; + lk->locked = 0; + lk->pid = 0; +} + +void acquiresleep(struct sleeplock *lk) { + acquire(&lk->lk); + while (lk->locked) { + sleep(lk, &lk->lk); + } + lk->locked = 1; + lk->pid = myproc()->pid; + release(&lk->lk); +} + +void releasesleep(struct sleeplock *lk) { + acquire(&lk->lk); + lk->locked = 0; + lk->pid = 0; + wakeup(lk); + release(&lk->lk); +} + +int holdingsleep(struct sleeplock *lk) { + int r; + + acquire(&lk->lk); + r = lk->locked && (lk->pid == myproc()->pid); + release(&lk->lk); + return r; +} + + + diff --git a/sleeplock.h b/sleeplock.h new file mode 100644 index 0000000..832e871 --- /dev/null +++ b/sleeplock.h @@ -0,0 +1,10 @@ +// Long-term locks for processes +struct sleeplock { + uint locked; // Is the lock held? + struct spinlock lk; // spinlock protecting this sleep lock + + // For debugging: + char *name; // Name of lock. + int pid; // Process holding lock +}; + diff --git a/spinlock.c b/spinlock.c new file mode 100644 index 0000000..f0b345f --- /dev/null +++ b/spinlock.c @@ -0,0 +1,121 @@ +// Mutual exclusion spin locks. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "x86.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "spinlock.h" + +void initlock(struct spinlock *lk, char *name) { + lk->name = name; + lk->locked = 0; + lk->cpu = 0; +} + +// Acquire the lock. +// Loops (spins) until the lock is acquired. +// Holding a lock for a long time may cause +// other CPUs to waste time spinning to acquire it. +void acquire(struct spinlock *lk) { + pushcli(); // disable interrupts to avoid deadlock. + if (holding(lk)) { + panic("acquire"); + } + + // The xchg is atomic. + while (xchg(&lk->locked, 1) != 0) { + ; + } + + // Tell the C compiler and the processor to not move loads or stores + // past this point, to ensure that the critical section's memory + // references happen after the lock is acquired. + __sync_synchronize(); + + // Record info about lock acquisition for debugging. + lk->cpu = mycpu(); + getcallerpcs(&lk, lk->pcs); +} + +// Release the lock. +void release(struct spinlock *lk) { + if (!holding(lk)) { + panic("release"); + } + + lk->pcs[0] = 0; + lk->cpu = 0; + + // Tell the C compiler and the processor to not move loads or stores + // past this point, to ensure that all the stores in the critical + // section are visible to other cores before the lock is released. + // Both the C compiler and the hardware may re-order loads and + // stores; __sync_synchronize() tells them both not to. + __sync_synchronize(); + + // Release the lock, equivalent to lk->locked = 0. + // This code can't use a C assignment, since it might + // not be atomic. A real OS would use C atomics here. + asm volatile ("movl $0, %0" : "+m" (lk->locked) :); + + popcli(); +} + +// Record the current call stack in pcs[] by following the %ebp chain. +void getcallerpcs(void *v, uint pcs[]) { + uint *ebp; + int i; + + ebp = (uint*)v - 2; + for (i = 0; i < 10; i++) { + if (ebp == 0 || ebp < (uint*)KERNBASE || ebp == (uint*)0xffffffff) { + break; + } + pcs[i] = ebp[1]; // saved %eip + ebp = (uint*)ebp[0]; // saved %ebp + } + for (; i < 10; i++) { + pcs[i] = 0; + } +} + +// Check whether this cpu is holding the lock. +int holding(struct spinlock *lock) { + int r; + pushcli(); + r = lock->locked && lock->cpu == mycpu(); + popcli(); + return r; +} + + +// Pushcli/popcli are like cli/sti except that they are matched: +// it takes two popcli to undo two pushcli. Also, if interrupts +// are off, then pushcli, popcli leaves them off. + +void pushcli(void) { + int eflags; + + eflags = readeflags(); + cli(); + if (mycpu()->ncli == 0) { + mycpu()->intena = eflags & FL_IF; + } + mycpu()->ncli += 1; +} + +void popcli(void) { + if (readeflags() & FL_IF) { + panic("popcli - interruptible"); + } + if (--mycpu()->ncli < 0) { + panic("popcli"); + } + if (mycpu()->ncli == 0 && mycpu()->intena) { + sti(); + } +} + diff --git a/spinlock.h b/spinlock.h new file mode 100644 index 0000000..d719bac --- /dev/null +++ b/spinlock.h @@ -0,0 +1,11 @@ +// Mutual exclusion lock. +struct spinlock { + uint locked; // Is the lock held? + + // For debugging: + char *name; // Name of lock. + struct cpu *cpu; // The cpu holding the lock. + uint pcs[10]; // The call stack (an array of program counters) + // that locked the lock. +}; + diff --git a/stat.h b/stat.h new file mode 100644 index 0000000..e54ba86 --- /dev/null +++ b/stat.h @@ -0,0 +1,11 @@ +#define T_DIR 1 // Directory +#define T_FILE 2 // File +#define T_DEV 3 // Device + +struct stat { + short type; // Type of file + int dev; // File system's disk device + uint ino; // Inode number + short nlink; // Number of links to file + uint size; // Size of file in bytes +}; diff --git a/stressfs.c b/stressfs.c new file mode 100644 index 0000000..87f10c1 --- /dev/null +++ b/stressfs.c @@ -0,0 +1,51 @@ +// Demonstrate that moving the "acquire" in iderw after the loop that +// appends to the idequeue results in a race. + +// For this to work, you should also add a spin within iderw's +// idequeue traversal loop. Adding the following demonstrated a panic +// after about 5 runs of stressfs in QEMU on a 2.1GHz CPU: +// for (i = 0; i < 40000; i++) +// asm volatile(""); + +#include "types.h" +#include "stat.h" +#include "user.h" +#include "fs.h" +#include "fcntl.h" + +int main(int argc, char *argv[]) { + int fd, i; + char path[] = "stressfs0"; + char data[512]; + + printf(1, "stressfs starting\n"); + memset(data, 'a', sizeof(data)); + + for (i = 0; i < 4; i++) { + if (fork() > 0) { + break; + } + } + + printf(1, "write %d\n", i); + + path[8] += i; + fd = open(path, O_CREATE | O_RDWR); + for (i = 0; i < 20; i++) { +// printf(fd, "%d\n", i); + write(fd, data, sizeof(data)); + } + close(fd); + + printf(1, "read\n"); + + fd = open(path, O_RDONLY); + for (i = 0; i < 20; i++) { + read(fd, data, sizeof(data)); + } + close(fd); + + wait(); + + exit(); +} diff --git a/string.c b/string.c new file mode 100644 index 0000000..ffdf897 --- /dev/null +++ b/string.c @@ -0,0 +1,103 @@ +#include "types.h" +#include "x86.h" + +void* memset(void *dst, int c, uint n) { + if ((int)dst % 4 == 0 && n % 4 == 0) { + c &= 0xFF; + stosl(dst, (c << 24) | (c << 16) | (c << 8) | c, n / 4); + } + else { + stosb(dst, c, n); + } + return dst; +} + +int memcmp(const void *v1, const void *v2, uint n) { + const uchar *s1, *s2; + + s1 = v1; + s2 = v2; + while (n-- > 0) { + if (*s1 != *s2) { + return *s1 - *s2; + } + s1++, s2++; + } + + return 0; +} + +void* memmove(void *dst, const void *src, uint n) { + const char *s; + char *d; + + s = src; + d = dst; + if (s < d && s + n > d) { + s += n; + d += n; + while (n-- > 0) { + *--d = *--s; + } + } + else { + while (n-- > 0) { + *d++ = *s++; + } + } + + return dst; +} + +// memcpy exists to placate GCC. Use memmove. +void* memcpy(void *dst, const void *src, uint n) { + return memmove(dst, src, n); +} + +int strncmp(const char *p, const char *q, uint n) { + while (n > 0 && *p && *p == *q) { + n--, p++, q++; + } + if (n == 0) { + return 0; + } + return (uchar) * p - (uchar) * q; +} + +char* strncpy(char *s, const char *t, int n) { + char *os; + + os = s; + while (n-- > 0 && (*s++ = *t++) != 0) { + ; + } + while (n-- > 0) { + *s++ = 0; + } + return os; +} + +// Like strncpy but guaranteed to NUL-terminate. +char* safestrcpy(char *s, const char *t, int n) { + char *os; + + os = s; + if (n <= 0) { + return os; + } + while (--n > 0 && (*s++ = *t++) != 0) { + ; + } + *s = 0; + return os; +} + +int strlen(const char *s) { + int n; + + for (n = 0; s[n]; n++) { + ; + } + return n; +} + diff --git a/swtch.S b/swtch.S new file mode 100644 index 0000000..c395b87 --- /dev/null +++ b/swtch.S @@ -0,0 +1,29 @@ +# Context switch +# +# void swtch(struct context **old, struct context *new); +# +# Save the current registers on the stack, creating +# a struct context, and save its address in *old. +# Switch stacks to new and pop previously-saved registers. + +.globl swtch +swtch: + movl 4(%esp), %eax + movl 8(%esp), %edx + + # Save old callee-saved registers + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + # Switch stacks + movl %esp, (%eax) + movl %edx, %esp + + # Load new callee-saved registers + popl %edi + popl %esi + popl %ebx + popl %ebp + ret diff --git a/syscall.c b/syscall.c new file mode 100644 index 0000000..5086c9a --- /dev/null +++ b/syscall.c @@ -0,0 +1,95 @@ +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "x86.h" +#include "syscall.h" +#include "syscalltable.h" + +// User code makes a system call with INT T_SYSCALL. +// System call number in %eax. +// Arguments on the stack, from the user call to the C +// library system call function. The saved user %esp points +// to a saved program counter, and then the first argument. + +// Fetch the int at addr from the current process. +int fetchint(uint addr, int *ip) { + struct proc *curproc = myproc(); + + if (addr >= curproc->sz || addr + 4 > curproc->sz) { + return -1; + } + *ip = *(int*)(addr); + return 0; +} + +// Fetch the nul-terminated string at addr from the current process. +// Doesn't actually copy the string - just sets *pp to point at it. +// Returns length of string, not including nul. +int fetchstr(uint addr, char **pp) { + char *s, *ep; + struct proc *curproc = myproc(); + + if (addr >= curproc->sz) { + return -1; + } + *pp = (char*)addr; + ep = (char*)curproc->sz; + for (s = *pp; s < ep; s++) { + if (*s == 0) { + return s - *pp; + } + } + return -1; +} + +// Fetch the nth 32-bit system call argument. +int argint(int n, int *ip) { + return fetchint((myproc()->tf->esp) + 4 + 4 * n, ip); +} + +// Fetch the nth word-sized system call argument as a pointer +// to a block of memory of size bytes. Check that the pointer +// lies within the process address space. +int argptr(int n, char **pp, int size) { + int i; + struct proc *curproc = myproc(); + + if (argint(n, &i) < 0) { + return -1; + } + if (size < 0 || (uint)i >= curproc->sz || (uint)i + size > curproc->sz) { + return -1; + } + *pp = (char*)i; + return 0; +} + +// Fetch the nth word-sized system call argument as a string pointer. +// Check that the pointer is valid and the string is nul-terminated. +// (There is no shared writable memory, so the string can't change +// between this check and being used by the kernel.) +int argstr(int n, char **pp) { + int addr; + if (argint(n, &addr) < 0) { + return -1; + } + return fetchstr(addr, pp); +} + +void syscall(void) { + int num; + struct proc *curproc = myproc(); + + num = curproc->tf->eax; + if (num > 0 && num < NELEM(syscalls) && syscalls[num]) { + curproc->tf->eax = syscalls[num](); + } + else { + cprintf("%d %s: unknown sys call %d\n", + curproc->pid, curproc->name, num); + curproc->tf->eax = -1; + } +} diff --git a/sysfile.c b/sysfile.c new file mode 100644 index 0000000..faf89dc --- /dev/null +++ b/sysfile.c @@ -0,0 +1,450 @@ +// +// File-system system calls. +// Mostly argument checking, since we don't trust +// user code, and calls into file.c and fs.c. +// + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "stat.h" +#include "mmu.h" +#include "proc.h" +#include "fs.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "file.h" +#include "fcntl.h" + +// Fetch the nth word-sized system call argument as a file descriptor +// and return both the descriptor and the corresponding struct file. +static int argfd(int n, int *pfd, struct file **pf) { + int fd; + struct file *f; + + if (argint(n, &fd) < 0) { + return -1; + } + if (fd < 0 || fd >= NOFILE || (f = myproc()->ofile[fd]) == 0) { + return -1; + } + if (pfd) { + *pfd = fd; + } + if (pf) { + *pf = f; + } + return 0; +} + +// Allocate a file descriptor for the given file. +// Takes over file reference from caller on success. +static int fdalloc(struct file *f) { + int fd; + struct proc *curproc = myproc(); + + for (fd = 0; fd < NOFILE; fd++) { + if (curproc->ofile[fd] == 0) { + curproc->ofile[fd] = f; + return fd; + } + } + return -1; +} + +int sys_dup(void) { + struct file *f; + int fd; + + if (argfd(0, 0, &f) < 0) { + return -1; + } + if ((fd = fdalloc(f)) < 0) { + return -1; + } + filedup(f); + return fd; +} + +int sys_read(void) { + struct file *f; + int n; + char *p; + + if (argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argptr(1, &p, n) < 0) { + return -1; + } + return fileread(f, p, n); +} + +int sys_write(void) { + struct file *f; + int n; + char *p; + + if (argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argptr(1, &p, n) < 0) { + return -1; + } + return filewrite(f, p, n); +} + +int sys_close(void) { + int fd; + struct file *f; + + if (argfd(0, &fd, &f) < 0) { + return -1; + } + myproc()->ofile[fd] = 0; + fileclose(f); + return 0; +} + +int sys_fstat(void) { + struct file *f; + struct stat *st; + + if (argfd(0, 0, &f) < 0 || argptr(1, (void*)&st, sizeof(*st)) < 0) { + return -1; + } + return filestat(f, st); +} + +void cleanupsyslink(struct inode * ip) { + ilock(ip); + ip->nlink--; + iupdate(ip); + iunlockput(ip); + end_op(); +} + +// Create the path new as a link to the same inode as old. +int sys_link(void) { + char name[DIRSIZ], *new, *old; + struct inode *dp, *ip; + + if (argstr(0, &old) < 0 || argstr(1, &new) < 0) { + return -1; + } + + begin_op(); + if ((ip = namei(old)) == 0) { + end_op(); + return -1; + } + + ilock(ip); + if (ip->type == T_DIR) { + iunlockput(ip); + end_op(); + return -1; + } + + ip->nlink++; + iupdate(ip); + iunlock(ip); + + if ((dp = nameiparent(new, name)) == 0) { + cleanupsyslink(ip); + return -1; + } + ilock(dp); + if (dp->dev != ip->dev || dirlink(dp, name, ip->inum) < 0) { + iunlockput(dp); + cleanupsyslink(ip); + return -1; + } + iunlockput(dp); + iput(ip); + + end_op(); + + return 0; +} + +// Is the directory dp empty except for "." and ".." ? +static int isdirempty(struct inode *dp) { + int off; + struct dirent de; + + for (off = 2 * sizeof(de); off < dp->size; off += sizeof(de)) { + if (readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) { + panic("isdirempty: readi"); + } + if (de.inum != 0) { + return 0; + } + } + return 1; +} + + +int sys_unlink(void) { + struct inode *ip, *dp; + struct dirent de; + char name[DIRSIZ], *path; + uint off; + + if (argstr(0, &path) < 0) { + return -1; + } + + begin_op(); + if ((dp = nameiparent(path, name)) == 0) { + end_op(); + return -1; + } + + ilock(dp); + + // Cannot unlink "." or "..". + if (namecmp(name, ".") == 0 || namecmp(name, "..") == 0) { + iunlockput(dp); + end_op(); + return -1; + } + + if ((ip = dirlookup(dp, name, &off)) == 0) { + iunlockput(dp); + end_op(); + return -1; + } + ilock(ip); + + if (ip->nlink < 1) { + panic("unlink: nlink < 1"); + } + if (ip->type == T_DIR && !isdirempty(ip)) { + iunlockput(ip); + iunlockput(dp); + end_op(); + return -1; + } + + memset(&de, 0, sizeof(de)); + if (writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) { + panic("unlink: writei"); + } + if (ip->type == T_DIR) { + dp->nlink--; + iupdate(dp); + } + iunlockput(dp); + + ip->nlink--; + iupdate(ip); + iunlockput(ip); + + end_op(); + + return 0; +} + +static struct inode* create(char *path, short type, short major, short minor) { + struct inode *ip, *dp; + char name[DIRSIZ]; + + if ((dp = nameiparent(path, name)) == 0) { + return 0; + } + ilock(dp); + + if ((ip = dirlookup(dp, name, 0)) != 0) { + iunlockput(dp); + ilock(ip); + if (type == T_FILE && ip->type == T_FILE) { + return ip; + } + iunlockput(ip); + return 0; + } + + if ((ip = ialloc(dp->dev, type)) == 0) { + panic("create: ialloc"); + } + + ilock(ip); + ip->major = major; + ip->minor = minor; + ip->nlink = 1; + iupdate(ip); + + if (type == T_DIR) { // Create . and .. entries. + dp->nlink++; // for ".." + iupdate(dp); + // No ip->nlink++ for ".": avoid cyclic ref count. + if (dirlink(ip, ".", ip->inum) < 0 || dirlink(ip, "..", dp->inum) < 0) { + panic("create dots"); + } + } + + if (dirlink(dp, name, ip->inum) < 0) { + panic("create: dirlink"); + } + + iunlockput(dp); + + return ip; +} + +int sys_open(void) { + char *path; + int fd, omode; + struct file *f; + struct inode *ip; + + if (argstr(0, &path) < 0 || argint(1, &omode) < 0) { + return -1; + } + + begin_op(); + + if (omode & O_CREATE) { + ip = create(path, T_FILE, 0, 0); + if (ip == 0) { + end_op(); + return -1; + } + } + else { + if ((ip = namei(path)) == 0) { + end_op(); + return -1; + } + ilock(ip); + if (ip->type == T_DIR && omode != O_RDONLY) { + iunlockput(ip); + end_op(); + return -1; + } + } + + if ((f = filealloc()) == 0 || (fd = fdalloc(f)) < 0) { + if (f) { + fileclose(f); + } + iunlockput(ip); + end_op(); + return -1; + } + iunlock(ip); + end_op(); + + f->type = FD_INODE; + f->ip = ip; + f->off = 0; + f->readable = !(omode & O_WRONLY); + f->writable = (omode & O_WRONLY) || (omode & O_RDWR); + return fd; +} + +int sys_mkdir(void) { + char *path; + struct inode *ip; + + begin_op(); + if (argstr(0, &path) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0) { + end_op(); + return -1; + } + iunlockput(ip); + end_op(); + return 0; +} + +int sys_mknod(void) { + struct inode *ip; + char *path; + int major, minor; + + begin_op(); + if ((argstr(0, &path)) < 0 || + argint(1, &major) < 0 || + argint(2, &minor) < 0 || + (ip = create(path, T_DEV, major, minor)) == 0) { + end_op(); + return -1; + } + iunlockput(ip); + end_op(); + return 0; +} + +int sys_chdir(void) { + char *path; + struct inode *ip; + struct proc *curproc = myproc(); + + begin_op(); + if (argstr(0, &path) < 0 || (ip = namei(path)) == 0) { + end_op(); + return -1; + } + ilock(ip); + if (ip->type != T_DIR) { + iunlockput(ip); + end_op(); + return -1; + } + iunlock(ip); + iput(curproc->cwd); + end_op(); + curproc->cwd = ip; + return 0; +} + +int sys_exec(void) { + char *path, *argv[MAXARG]; + int i; + uint uargv, uarg; + + if (argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0) { + return -1; + } + memset(argv, 0, sizeof(argv)); + for (i = 0;; i++) { + if (i >= NELEM(argv)) { + return -1; + } + if (fetchint(uargv + 4 * i, (int*)&uarg) < 0) { + return -1; + } + if (uarg == 0) { + argv[i] = 0; + break; + } + if (fetchstr(uarg, &argv[i]) < 0) { + return -1; + } + } + return exec(path, argv); +} + +int sys_pipe(void) { + int *fd; + struct file *rf, *wf; + int fd0, fd1; + + if (argptr(0, (void*)&fd, 2 * sizeof(fd[0])) < 0) { + return -1; + } + if (pipealloc(&rf, &wf) < 0) { + return -1; + } + fd0 = -1; + if ((fd0 = fdalloc(rf)) < 0 || (fd1 = fdalloc(wf)) < 0) { + if (fd0 >= 0) { + myproc()->ofile[fd0] = 0; + } + fileclose(rf); + fileclose(wf); + return -1; + } + fd[0] = fd0; + fd[1] = fd1; + return 0; +} + +int sys_getch(void) { + return consoleget(); +} \ No newline at end of file diff --git a/sysproc.c b/sysproc.c new file mode 100644 index 0000000..30af968 --- /dev/null +++ b/sysproc.c @@ -0,0 +1,79 @@ +#include "types.h" +#include "x86.h" +#include "defs.h" +#include "date.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" + +int sys_fork(void) { + return fork(); +} + +int sys_exit(void) { + exit(); + return 0; // not reached +} + +int sys_wait(void) { + return wait(); +} + +int sys_kill(void) { + int pid; + + if (argint(0, &pid) < 0) { + return -1; + } + return kill(pid); +} + +int sys_getpid(void) { + return myproc()->pid; +} + +int sys_sbrk(void) { + int addr; + int n; + + if (argint(0, &n) < 0) { + return -1; + } + addr = myproc()->sz; + if (growproc(n) < 0) { + return -1; + } + return addr; +} + +int sys_sleep(void) { + int n; + uint ticks0; + + if (argint(0, &n) < 0) { + return -1; + } + acquire(&tickslock); + ticks0 = ticks; + while (ticks - ticks0 < n) { + if (myproc()->killed) { + release(&tickslock); + return -1; + } + sleep(&ticks, &tickslock); + } + release(&tickslock); + return 0; +} + +// return how many clock tick interrupts have occurred +// since start. +int sys_uptime(void) { + uint xticks; + + acquire(&tickslock); + xticks = ticks; + release(&tickslock); + return xticks; +} diff --git a/trap.c b/trap.c new file mode 100644 index 0000000..fe81ba6 --- /dev/null +++ b/trap.c @@ -0,0 +1,111 @@ +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "x86.h" +#include "traps.h" +#include "spinlock.h" + +// Interrupt descriptor table (shared by all CPUs). +struct gatedesc idt[256]; +extern uint vectors[]; // in vectors.S: array of 256 entry pointers +struct spinlock tickslock; +uint ticks; + +void tvinit(void) { + int i; + + for (i = 0; i < 256; i++) { + SETGATE(idt[i], 0, SEG_KCODE << 3, vectors[i], 0); + } + SETGATE(idt[T_SYSCALL], 1, SEG_KCODE << 3, vectors[T_SYSCALL], DPL_USER); + + initlock(&tickslock, "time"); +} + +void idtinit(void) { + lidt(idt, sizeof(idt)); +} + +void trap(struct trapframe *tf) { + if (tf->trapno == T_SYSCALL) { + if (myproc()->killed) { + exit(); + } + myproc()->tf = tf; + syscall(); + if (myproc()->killed) { + exit(); + } + return; + } + + switch (tf->trapno) { + case T_IRQ0 + IRQ_TIMER: + if (cpuid() == 0) { + acquire(&tickslock); + ticks++; + wakeup(&ticks); + release(&tickslock); + } + lapiceoi(); + break; + case T_IRQ0 + IRQ_IDE: + ideintr(); + lapiceoi(); + break; + case T_IRQ0 + IRQ_IDE + 1: + // Bochs generates spurious IDE1 interrupts. + break; + case T_IRQ0 + IRQ_KBD: + kbdintr(); + lapiceoi(); + break; + case T_IRQ0 + IRQ_COM1: + uartintr(); + lapiceoi(); + break; + case T_IRQ0 + 7: + case T_IRQ0 + IRQ_SPURIOUS: + cprintf("cpu%d: spurious interrupt at %x:%x\n", + cpuid(), tf->cs, tf->eip); + lapiceoi(); + break; + + + default: + if (myproc() == 0 || (tf->cs & 3) == 0) { + // In kernel, it must be our mistake. + cprintf("unexpected trap %d from cpu %d eip %x (cr2=0x%x)\n", + tf->trapno, cpuid(), tf->eip, rcr2()); + panic("trap"); + } + // In user space, assume process misbehaved. + cprintf("pid %d %s: trap %d err %d on cpu %d " + "eip 0x%x addr 0x%x--kill proc\n", + myproc()->pid, myproc()->name, tf->trapno, + tf->err, cpuid(), tf->eip, rcr2()); + myproc()->killed = 1; + } + + // Force process exit if it has been killed and is in user space. + // (If it is still executing in the kernel, let it keep running + // until it gets to the regular system call return.) + if (myproc() && myproc()->killed && (tf->cs & 3) == DPL_USER) { + exit(); + } + + // Force process to give up CPU on clock tick. + // If interrupts were on while locks held, would need to check nlock. + if (myproc() && myproc()->state == RUNNING && + tf->trapno == T_IRQ0 + IRQ_TIMER) { + yield(); + } + + // Check if the process has been killed since we yielded + if (myproc() && myproc()->killed && (tf->cs & 3) == DPL_USER) { + exit(); + } +} diff --git a/trapasm.S b/trapasm.S new file mode 100644 index 0000000..d70262e --- /dev/null +++ b/trapasm.S @@ -0,0 +1,32 @@ +#include "mmu.h" + + # vectors.S sends all traps here. +.globl alltraps +alltraps: + # Build trap frame. + pushl %ds + pushl %es + pushl %fs + pushl %gs + pushal + + # Set up data segments. + movw $(SEG_KDATA<<3), %ax + movw %ax, %ds + movw %ax, %es + + # Call trap(tf), where tf=%esp + pushl %esp + call trap + addl $4, %esp + + # Return falls through to trapret... +.globl trapret +trapret: + popal + popl %gs + popl %fs + popl %es + popl %ds + addl $0x8, %esp # trapno and errcode + iret diff --git a/traps.h b/traps.h new file mode 100644 index 0000000..0bd1fd8 --- /dev/null +++ b/traps.h @@ -0,0 +1,38 @@ +// x86 trap and interrupt constants. + +// Processor-defined: +#define T_DIVIDE 0 // divide error +#define T_DEBUG 1 // debug exception +#define T_NMI 2 // non-maskable interrupt +#define T_BRKPT 3 // breakpoint +#define T_OFLOW 4 // overflow +#define T_BOUND 5 // bounds check +#define T_ILLOP 6 // illegal opcode +#define T_DEVICE 7 // device not available +#define T_DBLFLT 8 // double fault +// #define T_COPROC 9 // reserved (not used since 486) +#define T_TSS 10 // invalid task switch segment +#define T_SEGNP 11 // segment not present +#define T_STACK 12 // stack exception +#define T_GPFLT 13 // general protection fault +#define T_PGFLT 14 // page fault +// #define T_RES 15 // reserved +#define T_FPERR 16 // floating point error +#define T_ALIGN 17 // aligment check +#define T_MCHK 18 // machine check +#define T_SIMDERR 19 // SIMD floating point error + +// These are arbitrarily chosen, but with care not to overlap +// processor defined exceptions or interrupt vectors. +#define T_SYSCALL 64 // system call +#define T_DEFAULT 500 // catchall + +#define T_IRQ0 32 // IRQ 0 corresponds to int T_IRQ + +#define IRQ_TIMER 0 +#define IRQ_KBD 1 +#define IRQ_COM1 4 +#define IRQ_IDE 14 +#define IRQ_ERROR 19 +#define IRQ_SPURIOUS 31 + diff --git a/types.h b/types.h new file mode 100644 index 0000000..e4adf64 --- /dev/null +++ b/types.h @@ -0,0 +1,4 @@ +typedef unsigned int uint; +typedef unsigned short ushort; +typedef unsigned char uchar; +typedef uint pde_t; diff --git a/uart.c b/uart.c new file mode 100644 index 0000000..fba91b1 --- /dev/null +++ b/uart.c @@ -0,0 +1,75 @@ +// Intel 8250 serial port (UART). + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "traps.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "file.h" +#include "mmu.h" +#include "proc.h" +#include "x86.h" + +#define COM1 0x3f8 + +static int uart; // is there a uart? + +void uartinit(void) { + char *p; + + // Turn off the FIFO + outb(COM1 + 2, 0); + + // 9600 baud, 8 data bits, 1 stop bit, parity off. + outb(COM1 + 3, 0x80); // Unlock divisor + outb(COM1 + 0, 115200 / 9600); + outb(COM1 + 1, 0); + outb(COM1 + 3, 0x03); // Lock divisor, 8 data bits. + outb(COM1 + 4, 0); + outb(COM1 + 1, 0x01); // Enable receive interrupts. + + // If status is 0xFF, no serial port. + if (inb(COM1 + 5) == 0xFF) { + return; + } + uart = 1; + + // Acknowledge pre-existing interrupt conditions; + // enable interrupts. + inb(COM1 + 2); + inb(COM1 + 0); + ioapicenable(IRQ_COM1, 0); + + // Announce that we're here. + for (p = "xv6...\n"; *p; p++) { + uartputc(*p); + } +} + +void uartputc(int c) { + int i; + + if (!uart) { + return; + } + for (i = 0; i < 128 && !(inb(COM1 + 5) & 0x20); i++) { + microdelay(10); + } + outb(COM1 + 0, c); +} + +static int uartgetc(void) { + if (!uart) { + return -1; + } + if (!(inb(COM1 + 5) & 0x01)) { + return -1; + } + return inb(COM1 + 0); +} + +void uartintr(void) { + consoleintr(uartgetc); +} diff --git a/ulib.c b/ulib.c new file mode 100644 index 0000000..6287a7b --- /dev/null +++ b/ulib.c @@ -0,0 +1,106 @@ +#include "types.h" +#include "stat.h" +#include "fcntl.h" +#include "user.h" +#include "x86.h" + +char*strcpy(char *s, const char *t) { + char *os; + + os = s; + while ((*s++ = *t++) != 0) { + ; + } + return os; +} + +int strcmp(const char *p, const char *q) { + while (*p && *p == *q) { + p++, q++; + } + return (uchar) * p - (uchar) * q; +} + +uint strlen(const char *s) { + int n; + + for (n = 0; s[n]; n++) { + ; + } + return n; +} + +void* memset(void *dst, int c, uint n) { + stosb(dst, c, n); + return dst; +} + +char* strchr(const char *s, char c) { + for (; *s; s++) { + if (*s == c) { + return (char*)s; + } + } + return 0; +} + +char* gets(char *buf, int max) { + int i, cc; + char c; + + for (i = 0; i + 1 < max;) { + cc = read(0, &c, 1); + if (cc < 1) { + break; + } + buf[i++] = c; + if (c == '\n' || c == '\r') { + break; + } + } + buf[i] = '\0'; + return buf; +} + +int stat(const char *n, struct stat *st) { + int fd; + int r; + + fd = open(n, O_RDONLY); + if (fd < 0) { + return -1; + } + r = fstat(fd, st); + close(fd); + return r; +} + +int atoi(const char *s) { + int n; + + n = 0; + while ('0' <= *s && *s <= '9') { + n = n * 10 + *s++ - '0'; + } + return n; +} + +void* memmove(void *vdst, const void *vsrc, int n) { + char *dst; + const char *src; + + dst = vdst; + src = vsrc; + while (n-- > 0) { + *dst++ = *src++; + } + return vdst; +} + + +__attribute__((noreturn)) +int _init(int argc, char* argv[]) { + main(argc, argv); + exit(); +} + diff --git a/umalloc.c b/umalloc.c new file mode 100644 index 0000000..5ee8a47 --- /dev/null +++ b/umalloc.c @@ -0,0 +1,95 @@ +#include "types.h" +#include "stat.h" +#include "user.h" +#include "param.h" + +// Memory allocator by Kernighan and Ritchie, +// The C programming Language, 2nd ed. Section 8.7. + +typedef long Align; + +union header { + struct { + union header *ptr; + uint size; + } s; + Align x; +}; + +typedef union header Header; + +static Header base; +static Header *freep; + +void free(void *ap) { + Header *bp, *p; + + bp = (Header*)ap - 1; + for (p = freep; !(bp > p && bp < p->s.ptr); p = p->s.ptr) { + if (p >= p->s.ptr && (bp > p || bp < p->s.ptr)) { + break; + } + } + if (bp + bp->s.size == p->s.ptr) { + bp->s.size += p->s.ptr->s.size; + bp->s.ptr = p->s.ptr->s.ptr; + } + else { + bp->s.ptr = p->s.ptr; + } + if (p + p->s.size == bp) { + p->s.size += bp->s.size; + p->s.ptr = bp->s.ptr; + } + else { + p->s.ptr = bp; + } + freep = p; +} + +static Header* morecore(uint nu) { + char *p; + Header *hp; + + if (nu < 4096) { + nu = 4096; + } + p = sbrk(nu * sizeof(Header)); + if (p == (char*)-1) { + return 0; + } + hp = (Header*)p; + hp->s.size = nu; + free((void*)(hp + 1)); + return freep; +} + +void* malloc(uint nbytes) { + Header *p, *prevp; + uint nunits; + + nunits = (nbytes + sizeof(Header) - 1) / sizeof(Header) + 1; + if ((prevp = freep) == 0) { + base.s.ptr = freep = prevp = &base; + base.s.size = 0; + } + for (p = prevp->s.ptr;; prevp = p, p = p->s.ptr) { + if (p->s.size >= nunits) { + if (p->s.size == nunits) { + prevp->s.ptr = p->s.ptr; + } + else { + p->s.size -= nunits; + p += p->s.size; + p->s.size = nunits; + } + freep = prevp; + return (void*)(p + 1); + } + if (p == freep) { + if ((p = morecore(nunits)) == 0) { + return 0; + } + } + } +} diff --git a/user.h b/user.h new file mode 100644 index 0000000..37dac1f --- /dev/null +++ b/user.h @@ -0,0 +1,43 @@ +struct stat; +struct rtcdate; + +// system calls +int fork(void); +int exit(void) __attribute__((noreturn)); +int wait(void); +int pipe(int*); +int write(int, const void*, int); +int read(int, void*, int); +int close(int); +int kill(int); +int exec(char*, char**); +int open(const char*, int); +int mknod(const char*, short, short); +int unlink(const char*); +int fstat(int fd, struct stat*); +int link(const char*, const char*); +int mkdir(const char*); +int chdir(const char*); +int dup(int); +int getpid(void); +char* sbrk(int); +int sleep(int); +int uptime(void); +int getch(void); + +// ulib.c +int stat(const char*, struct stat*); +char* strcpy(char*, const char*); +void *memmove(void*, const void*, int); +char* strchr(const char*, char c); +int strcmp(const char*, const char*); +void printf(int, const char*, ...); +char* gets(char*, int max); +uint strlen(const char*); +void* memset(void*, int, uint); +void* malloc(uint); +void free(void*); +int atoi(const char*); + +// entry-point +extern int main(int, char*[]); diff --git a/usertests.c b/usertests.c new file mode 100644 index 0000000..a8a8d92 --- /dev/null +++ b/usertests.c @@ -0,0 +1,1779 @@ +#include "param.h" +#include "types.h" +#include "stat.h" +#include "user.h" +#include "fs.h" +#include "fcntl.h" +#include "syscall.h" +#include "traps.h" +#include "memlayout.h" + +char buf[8192]; +char name[3]; +char *echoargv[] = { "echo", "ALL", "TESTS", "PASSED", 0 }; +int stdout = 1; + +// does chdir() call iput(p->cwd) in a transaction? +void iputtest(void) { + printf(stdout, "iput test\n"); + + if (mkdir("iputdir") < 0) { + printf(stdout, "mkdir failed\n"); + exit(); + } + if (chdir("iputdir") < 0) { + printf(stdout, "chdir iputdir failed\n"); + exit(); + } + if (unlink("../iputdir") < 0) { + printf(stdout, "unlink ../iputdir failed\n"); + exit(); + } + if (chdir("/") < 0) { + printf(stdout, "chdir / failed\n"); + exit(); + } + printf(stdout, "iput test ok\n"); +} + +// does exit() call iput(p->cwd) in a transaction? +void exitiputtest(void) { + int pid; + + printf(stdout, "exitiput test\n"); + + pid = fork(); + if (pid < 0) { + printf(stdout, "fork failed\n"); + exit(); + } + if (pid == 0) { + if (mkdir("iputdir") < 0) { + printf(stdout, "mkdir failed\n"); + exit(); + } + if (chdir("iputdir") < 0) { + printf(stdout, "child chdir failed\n"); + exit(); + } + if (unlink("../iputdir") < 0) { + printf(stdout, "unlink ../iputdir failed\n"); + exit(); + } + exit(); + } + wait(); + printf(stdout, "exitiput test ok\n"); +} + +// does the error path in open() for attempt to write a +// directory call iput() in a transaction? +// needs a hacked kernel that pauses just after the namei() +// call in sys_open(): +// if((ip = namei(path)) == 0) +// return -1; +// { +// int i; +// for(i = 0; i < 10000; i++) +// yield(); +// } + +void openiputtest(void) { + int pid; + + printf(stdout, "openiput test\n"); + if (mkdir("oidir") < 0) { + printf(stdout, "mkdir oidir failed\n"); + exit(); + } + pid = fork(); + if (pid < 0) { + printf(stdout, "fork failed\n"); + exit(); + } + if (pid == 0) { + int fd = open("oidir", O_RDWR); + if (fd >= 0) { + printf(stdout, "open directory for write succeeded\n"); + exit(); + } + exit(); + } + sleep(1); + if (unlink("oidir") != 0) { + printf(stdout, "unlink failed\n"); + exit(); + } + wait(); + printf(stdout, "openiput test ok\n"); +} + +// simple file system tests + +void opentest(void) { + int fd; + + printf(stdout, "open test\n"); + fd = open("echo", 0); + if (fd < 0) { + printf(stdout, "open echo failed!\n"); + exit(); + } + close(fd); + fd = open("doesnotexist", 0); + if (fd >= 0) { + printf(stdout, "open doesnotexist succeeded!\n"); + exit(); + } + printf(stdout, "open test ok\n"); +} + +void writetest(void) { + int fd; + int i; + + printf(stdout, "small file test\n"); + fd = open("small", O_CREATE | O_RDWR); + if (fd >= 0) { + printf(stdout, "creat small succeeded; ok\n"); + } + else { + printf(stdout, "error: creat small failed!\n"); + exit(); + } + for (i = 0; i < 100; i++) { + if (write(fd, "aaaaaaaaaa", 10) != 10) { + printf(stdout, "error: write aa %d new file failed\n", i); + exit(); + } + if (write(fd, "bbbbbbbbbb", 10) != 10) { + printf(stdout, "error: write bb %d new file failed\n", i); + exit(); + } + } + printf(stdout, "writes ok\n"); + close(fd); + fd = open("small", O_RDONLY); + if (fd >= 0) { + printf(stdout, "open small succeeded ok\n"); + } + else { + printf(stdout, "error: open small failed!\n"); + exit(); + } + i = read(fd, buf, 2000); + if (i == 2000) { + printf(stdout, "read succeeded ok\n"); + } + else { + printf(stdout, "read failed\n"); + exit(); + } + close(fd); + + if (unlink("small") < 0) { + printf(stdout, "unlink small failed\n"); + exit(); + } + printf(stdout, "small file test ok\n"); +} + +void writetest1(void) { + int i, fd, n; + + printf(stdout, "big files test\n"); + + fd = open("big", O_CREATE | O_RDWR); + if (fd < 0) { + printf(stdout, "error: creat big failed!\n"); + exit(); + } + + for (i = 0; i < MAXFILE; i++) { + ((int*)buf)[0] = i; + if (write(fd, buf, 512) != 512) { + printf(stdout, "error: write big file failed\n", i); + exit(); + } + } + + close(fd); + + fd = open("big", O_RDONLY); + if (fd < 0) { + printf(stdout, "error: open big failed!\n"); + exit(); + } + + n = 0; + for (;;) { + i = read(fd, buf, 512); + if (i == 0) { + if (n == MAXFILE - 1) { + printf(stdout, "read only %d blocks from big", n); + exit(); + } + break; + } + else if (i != 512) { + printf(stdout, "read failed %d\n", i); + exit(); + } + if (((int*)buf)[0] != n) { + printf(stdout, "read content of block %d is %d\n", + n, ((int*)buf)[0]); + exit(); + } + n++; + } + close(fd); + if (unlink("big") < 0) { + printf(stdout, "unlink big failed\n"); + exit(); + } + printf(stdout, "big files ok\n"); +} + +void createtest(void) { + int i, fd; + + printf(stdout, "many creates, followed by unlink test\n"); + + name[0] = 'a'; + name[2] = '\0'; + for (i = 0; i < 52; i++) { + name[1] = '0' + i; + fd = open(name, O_CREATE | O_RDWR); + close(fd); + } + name[0] = 'a'; + name[2] = '\0'; + for (i = 0; i < 52; i++) { + name[1] = '0' + i; + unlink(name); + } + printf(stdout, "many creates, followed by unlink; ok\n"); +} + +void dirtest(void){ + printf(stdout, "mkdir test\n"); + + if (mkdir("dir0") < 0) { + printf(stdout, "mkdir failed\n"); + exit(); + } + + if (chdir("dir0") < 0) { + printf(stdout, "chdir dir0 failed\n"); + exit(); + } + + if (chdir("..") < 0) { + printf(stdout, "chdir .. failed\n"); + exit(); + } + + if (unlink("dir0") < 0) { + printf(stdout, "unlink dir0 failed\n"); + exit(); + } + printf(stdout, "mkdir test ok\n"); +} + +void exectest(void) { + printf(stdout, "exec test\n"); + if (exec("echo", echoargv) < 0) { + printf(stdout, "exec echo failed\n"); + exit(); + } +} + +// simple fork and pipe read/write + +void pipe1(void) { + int fds[2], pid; + int seq, i, n, cc, total; + + if (pipe(fds) != 0) { + printf(1, "pipe() failed\n"); + exit(); + } + pid = fork(); + seq = 0; + if (pid == 0) { + close(fds[0]); + for (n = 0; n < 5; n++) { + for (i = 0; i < 1033; i++) { + buf[i] = seq++; + } + if (write(fds[1], buf, 1033) != 1033) { + printf(1, "pipe1 oops 1\n"); + exit(); + } + } + exit(); + } + else if (pid > 0) { + close(fds[1]); + total = 0; + cc = 1; + while ((n = read(fds[0], buf, cc)) > 0) { + for (i = 0; i < n; i++) { + if ((buf[i] & 0xff) != (seq++ & 0xff)) { + printf(1, "pipe1 oops 2\n"); + return; + } + } + total += n; + cc = cc * 2; + if (cc > sizeof(buf)) { + cc = sizeof(buf); + } + } + if (total != 5 * 1033) { + printf(1, "pipe1 oops 3 total %d\n", total); + exit(); + } + close(fds[0]); + wait(); + } + else { + printf(1, "fork() failed\n"); + exit(); + } + printf(1, "pipe1 ok\n"); +} + +// meant to be run w/ at most two CPUs +void preempt(void) { + int pid1, pid2, pid3; + int pfds[2]; + + printf(1, "preempt: "); + pid1 = fork(); + if (pid1 == 0) { + for (;;) { + ; + } + } + + pid2 = fork(); + if (pid2 == 0) { + for (;;) { + ; + } + } + + pipe(pfds); + pid3 = fork(); + if (pid3 == 0) { + close(pfds[0]); + if (write(pfds[1], "x", 1) != 1) { + printf(1, "preempt write error"); + } + close(pfds[1]); + for (;;) { + ; + } + } + + close(pfds[1]); + if (read(pfds[0], buf, sizeof(buf)) != 1) { + printf(1, "preempt read error"); + return; + } + close(pfds[0]); + printf(1, "kill... "); + kill(pid1); + kill(pid2); + kill(pid3); + printf(1, "wait... "); + wait(); + wait(); + wait(); + printf(1, "preempt ok\n"); +} + +// try to find any races between exit and wait +void exitwait(void) { + int i, pid; + + for (i = 0; i < 100; i++) { + pid = fork(); + if (pid < 0) { + printf(1, "fork failed\n"); + return; + } + if (pid) { + if (wait() != pid) { + printf(1, "wait wrong pid\n"); + return; + } + } + else { + exit(); + } + } + printf(1, "exitwait ok\n"); +} + +void mem(void) { + void *m1, *m2; + int pid, ppid; + + printf(1, "mem test\n"); + ppid = getpid(); + if ((pid = fork()) == 0) { + m1 = 0; + while ((m2 = malloc(10001)) != 0) { + *(char**)m2 = m1; + m1 = m2; + } + while (m1) { + m2 = *(char**)m1; + free(m1); + m1 = m2; + } + m1 = malloc(1024 * 20); + if (m1 == 0) { + printf(1, "couldn't allocate mem?!!\n"); + kill(ppid); + exit(); + } + free(m1); + printf(1, "mem ok\n"); + exit(); + } + else { + wait(); + } +} + +// More file system tests + +// two processes write to the same file descriptor +// is the offset shared? does inode locking work? +void sharedfd(void) { + int fd, pid, i, n, nc, np; + char buf[10]; + + printf(1, "sharedfd test\n"); + + unlink("sharedfd"); + fd = open("sharedfd", O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "fstests: cannot open sharedfd for writing"); + return; + } + pid = fork(); + memset(buf, pid == 0 ? 'c' : 'p', sizeof(buf)); + for (i = 0; i < 1000; i++) { + if (write(fd, buf, sizeof(buf)) != sizeof(buf)) { + printf(1, "fstests: write sharedfd failed\n"); + break; + } + } + if (pid == 0) { + exit(); + } + else { + wait(); + } + close(fd); + fd = open("sharedfd", 0); + if (fd < 0) { + printf(1, "fstests: cannot open sharedfd for reading\n"); + return; + } + nc = np = 0; + while ((n = read(fd, buf, sizeof(buf))) > 0) { + for (i = 0; i < sizeof(buf); i++) { + if (buf[i] == 'c') { + nc++; + } + if (buf[i] == 'p') { + np++; + } + } + } + close(fd); + unlink("sharedfd"); + if (nc == 10000 && np == 10000) { + printf(1, "sharedfd ok\n"); + } + else { + printf(1, "sharedfd oops %d %d\n", nc, np); + exit(); + } +} + +// four processes write different files at the same +// time, to test block allocation. +void fourfiles(void) { + int fd, pid, i, j, n, total, pi; + char *names[] = { "f0", "f1", "f2", "f3" }; + char *fname; + + printf(1, "fourfiles test\n"); + + for (pi = 0; pi < 4; pi++) { + fname = names[pi]; + unlink(fname); + + pid = fork(); + if (pid < 0) { + printf(1, "fork failed\n"); + exit(); + } + + if (pid == 0) { + fd = open(fname, O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "create failed\n"); + exit(); + } + + memset(buf, '0' + pi, 512); + for (i = 0; i < 12; i++) { + if ((n = write(fd, buf, 500)) != 500) { + printf(1, "write failed %d\n", n); + exit(); + } + } + exit(); + } + } + + for (pi = 0; pi < 4; pi++) { + wait(); + } + + for (i = 0; i < 2; i++) { + fname = names[i]; + fd = open(fname, 0); + total = 0; + while ((n = read(fd, buf, sizeof(buf))) > 0) { + for (j = 0; j < n; j++) { + if (buf[j] != '0' + i) { + printf(1, "wrong char\n"); + exit(); + } + } + total += n; + } + close(fd); + if (total != 12 * 500) { + printf(1, "wrong length %d\n", total); + exit(); + } + unlink(fname); + } + + printf(1, "fourfiles ok\n"); +} + +// four processes create and delete different files in same directory +void createdelete(void) { + enum { N = 20 }; + int pid, i, fd, pi; + char name[32]; + + printf(1, "createdelete test\n"); + + for (pi = 0; pi < 4; pi++) { + pid = fork(); + if (pid < 0) { + printf(1, "fork failed\n"); + exit(); + } + + if (pid == 0) { + name[0] = 'p' + pi; + name[2] = '\0'; + for (i = 0; i < N; i++) { + name[1] = '0' + i; + fd = open(name, O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "create failed\n"); + exit(); + } + close(fd); + if (i > 0 && (i % 2) == 0) { + name[1] = '0' + (i / 2); + if (unlink(name) < 0) { + printf(1, "unlink failed\n"); + exit(); + } + } + } + exit(); + } + } + + for (pi = 0; pi < 4; pi++) { + wait(); + } + + name[0] = name[1] = name[2] = 0; + for (i = 0; i < N; i++) { + for (pi = 0; pi < 4; pi++) { + name[0] = 'p' + pi; + name[1] = '0' + i; + fd = open(name, 0); + if ((i == 0 || i >= N / 2) && fd < 0) { + printf(1, "oops createdelete %s didn't exist\n", name); + exit(); + } + else if ((i >= 1 && i < N / 2) && fd >= 0) { + printf(1, "oops createdelete %s did exist\n", name); + exit(); + } + if (fd >= 0) { + close(fd); + } + } + } + + for (i = 0; i < N; i++) { + for (pi = 0; pi < 4; pi++) { + name[0] = 'p' + i; + name[1] = '0' + i; + unlink(name); + } + } + + printf(1, "createdelete ok\n"); +} + +// can I unlink a file and still read it? +void unlinkread(void) { + int fd, fd1; + + printf(1, "unlinkread test\n"); + fd = open("unlinkread", O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "create unlinkread failed\n"); + exit(); + } + write(fd, "hello", 5); + close(fd); + + fd = open("unlinkread", O_RDWR); + if (fd < 0) { + printf(1, "open unlinkread failed\n"); + exit(); + } + if (unlink("unlinkread") != 0) { + printf(1, "unlink unlinkread failed\n"); + exit(); + } + + fd1 = open("unlinkread", O_CREATE | O_RDWR); + write(fd1, "yyy", 3); + close(fd1); + + if (read(fd, buf, sizeof(buf)) != 5) { + printf(1, "unlinkread read failed"); + exit(); + } + if (buf[0] != 'h') { + printf(1, "unlinkread wrong data\n"); + exit(); + } + if (write(fd, buf, 10) != 10) { + printf(1, "unlinkread write failed\n"); + exit(); + } + close(fd); + unlink("unlinkread"); + printf(1, "unlinkread ok\n"); +} + +void linktest(void) { + int fd; + + printf(1, "linktest\n"); + + unlink("lf1"); + unlink("lf2"); + + fd = open("lf1", O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "create lf1 failed\n"); + exit(); + } + if (write(fd, "hello", 5) != 5) { + printf(1, "write lf1 failed\n"); + exit(); + } + close(fd); + + if (link("lf1", "lf2") < 0) { + printf(1, "link lf1 lf2 failed\n"); + exit(); + } + unlink("lf1"); + + if (open("lf1", 0) >= 0) { + printf(1, "unlinked lf1 but it is still there!\n"); + exit(); + } + + fd = open("lf2", 0); + if (fd < 0) { + printf(1, "open lf2 failed\n"); + exit(); + } + if (read(fd, buf, sizeof(buf)) != 5) { + printf(1, "read lf2 failed\n"); + exit(); + } + close(fd); + + if (link("lf2", "lf2") >= 0) { + printf(1, "link lf2 lf2 succeeded! oops\n"); + exit(); + } + + unlink("lf2"); + if (link("lf2", "lf1") >= 0) { + printf(1, "link non-existant succeeded! oops\n"); + exit(); + } + + if (link(".", "lf1") >= 0) { + printf(1, "link . lf1 succeeded! oops\n"); + exit(); + } + + printf(1, "linktest ok\n"); +} + +// test concurrent create/link/unlink of the same file +void concreate(void) { + char file[3]; + int i, pid, n, fd; + char fa[40]; + struct { + ushort inum; + char name[14]; + } de; + + printf(1, "concreate test\n"); + file[0] = 'C'; + file[2] = '\0'; + for (i = 0; i < 40; i++) { + file[1] = '0' + i; + unlink(file); + pid = fork(); + if (pid && (i % 3) == 1) { + link("C0", file); + } + else if (pid == 0 && (i % 5) == 1) { + link("C0", file); + } + else { + fd = open(file, O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "concreate create %s failed\n", file); + exit(); + } + close(fd); + } + if (pid == 0) { + exit(); + } + else { + wait(); + } + } + + memset(fa, 0, sizeof(fa)); + fd = open(".", 0); + n = 0; + while (read(fd, &de, sizeof(de)) > 0) { + if (de.inum == 0) { + continue; + } + if (de.name[0] == 'C' && de.name[2] == '\0') { + i = de.name[1] - '0'; + if (i < 0 || i >= sizeof(fa)) { + printf(1, "concreate weird file %s\n", de.name); + exit(); + } + if (fa[i]) { + printf(1, "concreate duplicate file %s\n", de.name); + exit(); + } + fa[i] = 1; + n++; + } + } + close(fd); + + if (n != 40) { + printf(1, "concreate not enough files in directory listing\n"); + exit(); + } + + for (i = 0; i < 40; i++) { + file[1] = '0' + i; + pid = fork(); + if (pid < 0) { + printf(1, "fork failed\n"); + exit(); + } + if (((i % 3) == 0 && pid == 0) || + ((i % 3) == 1 && pid != 0)) { + close(open(file, 0)); + close(open(file, 0)); + close(open(file, 0)); + close(open(file, 0)); + } + else { + unlink(file); + unlink(file); + unlink(file); + unlink(file); + } + if (pid == 0) { + exit(); + } + else { + wait(); + } + } + + printf(1, "concreate ok\n"); +} + +// another concurrent link/unlink/create test, +// to look for deadlocks. +void linkunlink() { + int pid, i; + + printf(1, "linkunlink test\n"); + + unlink("x"); + pid = fork(); + if (pid < 0) { + printf(1, "fork failed\n"); + exit(); + } + + unsigned int x = (pid ? 1 : 97); + for (i = 0; i < 100; i++) { + x = x * 1103515245 + 12345; + if ((x % 3) == 0) { + close(open("x", O_RDWR | O_CREATE)); + } + else if ((x % 3) == 1) { + link("cat", "x"); + } + else { + unlink("x"); + } + } + + if (pid) { + wait(); + } + else { + exit(); + } + + printf(1, "linkunlink ok\n"); +} + +// directory that uses indirect blocks +void bigdir(void) { + int i, fd; + char name[10]; + + printf(1, "bigdir test\n"); + unlink("bd"); + + fd = open("bd", O_CREATE); + if (fd < 0) { + printf(1, "bigdir create failed\n"); + exit(); + } + close(fd); + + for (i = 0; i < 500; i++) { + name[0] = 'x'; + name[1] = '0' + (i / 64); + name[2] = '0' + (i % 64); + name[3] = '\0'; + if (link("bd", name) != 0) { + printf(1, "bigdir link failed\n"); + exit(); + } + } + + unlink("bd"); + for (i = 0; i < 500; i++) { + name[0] = 'x'; + name[1] = '0' + (i / 64); + name[2] = '0' + (i % 64); + name[3] = '\0'; + if (unlink(name) != 0) { + printf(1, "bigdir unlink failed"); + exit(); + } + } + + printf(1, "bigdir ok\n"); +} + +void subdir(void) { + int fd, cc; + + printf(1, "subdir test\n"); + + unlink("ff"); + if (mkdir("dd") != 0) { + printf(1, "subdir mkdir dd failed\n"); + exit(); + } + + fd = open("dd/ff", O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "create dd/ff failed\n"); + exit(); + } + write(fd, "ff", 2); + close(fd); + + if (unlink("dd") >= 0) { + printf(1, "unlink dd (non-empty dir) succeeded!\n"); + exit(); + } + + if (mkdir("/dd/dd") != 0) { + printf(1, "subdir mkdir dd/dd failed\n"); + exit(); + } + + fd = open("dd/dd/ff", O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "create dd/dd/ff failed\n"); + exit(); + } + write(fd, "FF", 2); + close(fd); + + fd = open("dd/dd/../ff", 0); + if (fd < 0) { + printf(1, "open dd/dd/../ff failed\n"); + exit(); + } + cc = read(fd, buf, sizeof(buf)); + if (cc != 2 || buf[0] != 'f') { + printf(1, "dd/dd/../ff wrong content\n"); + exit(); + } + close(fd); + + if (link("dd/dd/ff", "dd/dd/ffff") != 0) { + printf(1, "link dd/dd/ff dd/dd/ffff failed\n"); + exit(); + } + + if (unlink("dd/dd/ff") != 0) { + printf(1, "unlink dd/dd/ff failed\n"); + exit(); + } + if (open("dd/dd/ff", O_RDONLY) >= 0) { + printf(1, "open (unlinked) dd/dd/ff succeeded\n"); + exit(); + } + + if (chdir("dd") != 0) { + printf(1, "chdir dd failed\n"); + exit(); + } + if (chdir("dd/../../dd") != 0) { + printf(1, "chdir dd/../../dd failed\n"); + exit(); + } + if (chdir("dd/../../../dd") != 0) { + printf(1, "chdir dd/../../dd failed\n"); + exit(); + } + if (chdir("./..") != 0) { + printf(1, "chdir ./.. failed\n"); + exit(); + } + + fd = open("dd/dd/ffff", 0); + if (fd < 0) { + printf(1, "open dd/dd/ffff failed\n"); + exit(); + } + if (read(fd, buf, sizeof(buf)) != 2) { + printf(1, "read dd/dd/ffff wrong len\n"); + exit(); + } + close(fd); + + if (open("dd/dd/ff", O_RDONLY) >= 0) { + printf(1, "open (unlinked) dd/dd/ff succeeded!\n"); + exit(); + } + + if (open("dd/ff/ff", O_CREATE | O_RDWR) >= 0) { + printf(1, "create dd/ff/ff succeeded!\n"); + exit(); + } + if (open("dd/xx/ff", O_CREATE | O_RDWR) >= 0) { + printf(1, "create dd/xx/ff succeeded!\n"); + exit(); + } + if (open("dd", O_CREATE) >= 0) { + printf(1, "create dd succeeded!\n"); + exit(); + } + if (open("dd", O_RDWR) >= 0) { + printf(1, "open dd rdwr succeeded!\n"); + exit(); + } + if (open("dd", O_WRONLY) >= 0) { + printf(1, "open dd wronly succeeded!\n"); + exit(); + } + if (link("dd/ff/ff", "dd/dd/xx") == 0) { + printf(1, "link dd/ff/ff dd/dd/xx succeeded!\n"); + exit(); + } + if (link("dd/xx/ff", "dd/dd/xx") == 0) { + printf(1, "link dd/xx/ff dd/dd/xx succeeded!\n"); + exit(); + } + if (link("dd/ff", "dd/dd/ffff") == 0) { + printf(1, "link dd/ff dd/dd/ffff succeeded!\n"); + exit(); + } + if (mkdir("dd/ff/ff") == 0) { + printf(1, "mkdir dd/ff/ff succeeded!\n"); + exit(); + } + if (mkdir("dd/xx/ff") == 0) { + printf(1, "mkdir dd/xx/ff succeeded!\n"); + exit(); + } + if (mkdir("dd/dd/ffff") == 0) { + printf(1, "mkdir dd/dd/ffff succeeded!\n"); + exit(); + } + if (unlink("dd/xx/ff") == 0) { + printf(1, "unlink dd/xx/ff succeeded!\n"); + exit(); + } + if (unlink("dd/ff/ff") == 0) { + printf(1, "unlink dd/ff/ff succeeded!\n"); + exit(); + } + if (chdir("dd/ff") == 0) { + printf(1, "chdir dd/ff succeeded!\n"); + exit(); + } + if (chdir("dd/xx") == 0) { + printf(1, "chdir dd/xx succeeded!\n"); + exit(); + } + + if (unlink("dd/dd/ffff") != 0) { + printf(1, "unlink dd/dd/ff failed\n"); + exit(); + } + if (unlink("dd/ff") != 0) { + printf(1, "unlink dd/ff failed\n"); + exit(); + } + if (unlink("dd") == 0) { + printf(1, "unlink non-empty dd succeeded!\n"); + exit(); + } + if (unlink("dd/dd") < 0) { + printf(1, "unlink dd/dd failed\n"); + exit(); + } + if (unlink("dd") < 0) { + printf(1, "unlink dd failed\n"); + exit(); + } + + printf(1, "subdir ok\n"); +} + +// test writes that are larger than the log. +void bigwrite(void) { + int fd, sz; + + printf(1, "bigwrite test\n"); + + unlink("bigwrite"); + for (sz = 499; sz < 12 * 512; sz += 471) { + fd = open("bigwrite", O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "cannot create bigwrite\n"); + exit(); + } + int i; + for (i = 0; i < 2; i++) { + int cc = write(fd, buf, sz); + if (cc != sz) { + printf(1, "write(%d) ret %d\n", sz, cc); + exit(); + } + } + close(fd); + unlink("bigwrite"); + } + + printf(1, "bigwrite ok\n"); +} + +void bigfile(void) { + int fd, i, total, cc; + + printf(1, "bigfile test\n"); + + unlink("bigfile"); + fd = open("bigfile", O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "cannot create bigfile"); + exit(); + } + for (i = 0; i < 20; i++) { + memset(buf, i, 600); + if (write(fd, buf, 600) != 600) { + printf(1, "write bigfile failed\n"); + exit(); + } + } + close(fd); + + fd = open("bigfile", 0); + if (fd < 0) { + printf(1, "cannot open bigfile\n"); + exit(); + } + total = 0; + for (i = 0;; i++) { + cc = read(fd, buf, 300); + if (cc < 0) { + printf(1, "read bigfile failed\n"); + exit(); + } + if (cc == 0) { + break; + } + if (cc != 300) { + printf(1, "short read bigfile\n"); + exit(); + } + if (buf[0] != i / 2 || buf[299] != i / 2) { + printf(1, "read bigfile wrong data\n"); + exit(); + } + total += cc; + } + close(fd); + if (total != 20 * 600) { + printf(1, "read bigfile wrong total\n"); + exit(); + } + unlink("bigfile"); + + printf(1, "bigfile test ok\n"); +} + +void fourteen(void) { + int fd; + + // DIRSIZ is 14. + printf(1, "fourteen test\n"); + + if (mkdir("12345678901234") != 0) { + printf(1, "mkdir 12345678901234 failed\n"); + exit(); + } + if (mkdir("12345678901234/123456789012345") != 0) { + printf(1, "mkdir 12345678901234/123456789012345 failed\n"); + exit(); + } + fd = open("123456789012345/123456789012345/123456789012345", O_CREATE); + if (fd < 0) { + printf(1, "create 123456789012345/123456789012345/123456789012345 failed\n"); + exit(); + } + close(fd); + fd = open("12345678901234/12345678901234/12345678901234", 0); + if (fd < 0) { + printf(1, "open 12345678901234/12345678901234/12345678901234 failed\n"); + exit(); + } + close(fd); + + if (mkdir("12345678901234/12345678901234") == 0) { + printf(1, "mkdir 12345678901234/12345678901234 succeeded!\n"); + exit(); + } + if (mkdir("123456789012345/12345678901234") == 0) { + printf(1, "mkdir 12345678901234/123456789012345 succeeded!\n"); + exit(); + } + + printf(1, "fourteen ok\n"); +} + +void rmdot(void) { + printf(1, "rmdot test\n"); + if (mkdir("dots") != 0) { + printf(1, "mkdir dots failed\n"); + exit(); + } + if (chdir("dots") != 0) { + printf(1, "chdir dots failed\n"); + exit(); + } + if (unlink(".") == 0) { + printf(1, "rm . worked!\n"); + exit(); + } + if (unlink("..") == 0) { + printf(1, "rm .. worked!\n"); + exit(); + } + if (chdir("/") != 0) { + printf(1, "chdir / failed\n"); + exit(); + } + if (unlink("dots/.") == 0) { + printf(1, "unlink dots/. worked!\n"); + exit(); + } + if (unlink("dots/..") == 0) { + printf(1, "unlink dots/.. worked!\n"); + exit(); + } + if (unlink("dots") != 0) { + printf(1, "unlink dots failed!\n"); + exit(); + } + printf(1, "rmdot ok\n"); +} + +void dirfile(void) { + int fd; + + printf(1, "dir vs file\n"); + + fd = open("dirfile", O_CREATE); + if (fd < 0) { + printf(1, "create dirfile failed\n"); + exit(); + } + close(fd); + if (chdir("dirfile") == 0) { + printf(1, "chdir dirfile succeeded!\n"); + exit(); + } + fd = open("dirfile/xx", 0); + if (fd >= 0) { + printf(1, "create dirfile/xx succeeded!\n"); + exit(); + } + fd = open("dirfile/xx", O_CREATE); + if (fd >= 0) { + printf(1, "create dirfile/xx succeeded!\n"); + exit(); + } + if (mkdir("dirfile/xx") == 0) { + printf(1, "mkdir dirfile/xx succeeded!\n"); + exit(); + } + if (unlink("dirfile/xx") == 0) { + printf(1, "unlink dirfile/xx succeeded!\n"); + exit(); + } + if (link("README", "dirfile/xx") == 0) { + printf(1, "link to dirfile/xx succeeded!\n"); + exit(); + } + if (unlink("dirfile") != 0) { + printf(1, "unlink dirfile failed!\n"); + exit(); + } + + fd = open(".", O_RDWR); + if (fd >= 0) { + printf(1, "open . for writing succeeded!\n"); + exit(); + } + fd = open(".", 0); + if (write(fd, "x", 1) > 0) { + printf(1, "write . succeeded!\n"); + exit(); + } + close(fd); + + printf(1, "dir vs file OK\n"); +} + +// test that iput() is called at the end of _namei() +void iref(void) { + int i, fd; + + printf(1, "empty file name\n"); + + // the 50 is NINODE + for (i = 0; i < 50 + 1; i++) { + if (mkdir("irefd") != 0) { + printf(1, "mkdir irefd failed\n"); + exit(); + } + if (chdir("irefd") != 0) { + printf(1, "chdir irefd failed\n"); + exit(); + } + + mkdir(""); + link("README", ""); + fd = open("", O_CREATE); + if (fd >= 0) { + close(fd); + } + fd = open("xx", O_CREATE); + if (fd >= 0) { + close(fd); + } + unlink("xx"); + } + + chdir("/"); + printf(1, "empty file name OK\n"); +} + +// test that fork fails gracefully +// the forktest binary also does this, but it runs out of proc entries first. +// inside the bigger usertests binary, we run out of memory first. +void forktest(void) { + int n, pid; + + printf(1, "fork test\n"); + + for (n = 0; n < 1000; n++) { + pid = fork(); + if (pid < 0) { + break; + } + if (pid == 0) { + exit(); + } + } + + if (n == 1000) { + printf(1, "fork claimed to work 1000 times!\n"); + exit(); + } + + for (; n > 0; n--) { + if (wait() < 0) { + printf(1, "wait stopped early\n"); + exit(); + } + } + + if (wait() != -1) { + printf(1, "wait got too many\n"); + exit(); + } + + printf(1, "fork test OK\n"); +} + +void sbrktest(void) { + int fds[2], pid, pids[10], ppid; + char *a, *b, *c, *lastaddr, *oldbrk, *p, scratch; + uint amt; + + printf(stdout, "sbrk test\n"); + oldbrk = sbrk(0); + + // can one sbrk() less than a page? + a = sbrk(0); + int i; + for (i = 0; i < 5000; i++) { + b = sbrk(1); + if (b != a) { + printf(stdout, "sbrk test failed %d %x %x\n", i, a, b); + exit(); + } + *b = 1; + a = b + 1; + } + pid = fork(); + if (pid < 0) { + printf(stdout, "sbrk test fork failed\n"); + exit(); + } + c = sbrk(1); + c = sbrk(1); + if (c != a + 1) { + printf(stdout, "sbrk test failed post-fork\n"); + exit(); + } + if (pid == 0) { + exit(); + } + wait(); + + // can one grow address space to something big? +#define BIG (100 * 1024 * 1024) + a = sbrk(0); + amt = (BIG) -(uint)a; + p = sbrk(amt); + if (p != a) { + printf(stdout, "sbrk test failed to grow big address space; enough phys mem?\n"); + exit(); + } + lastaddr = (char*) (BIG - 1); +#pragma GCC diagnostic ignored "-Wstringop-overflow" + *lastaddr = 99; + + // can one de-allocate? + a = sbrk(0); + c = sbrk(-4096); + if (c == (char*)0xffffffff) { + printf(stdout, "sbrk could not deallocate\n"); + exit(); + } + c = sbrk(0); + if (c != a - 4096) { + printf(stdout, "sbrk deallocation produced wrong address, a %x c %x\n", a, c); + exit(); + } + + // can one re-allocate that page? + a = sbrk(0); + c = sbrk(4096); + if (c != a || sbrk(0) != a + 4096) { + printf(stdout, "sbrk re-allocation failed, a %x c %x\n", a, c); + exit(); + } + if (*lastaddr == 99) { + // should be zero + printf(stdout, "sbrk de-allocation didn't really deallocate\n"); + exit(); + } + + a = sbrk(0); + c = sbrk(-(sbrk(0) - oldbrk)); + if (c != a) { + printf(stdout, "sbrk downsize failed, a %x c %x\n", a, c); + exit(); + } + + // can we read the kernel's memory? + for (a = (char*)(KERNBASE); a < (char*) (KERNBASE + 2000000); a += 50000) { + ppid = getpid(); + pid = fork(); + if (pid < 0) { + printf(stdout, "fork failed\n"); + exit(); + } + if (pid == 0) { + printf(stdout, "oops could read %x = %x\n", a, *a); + kill(ppid); + exit(); + } + wait(); + } + + // if we run the system out of memory, does it clean up the last + // failed allocation? + if (pipe(fds) != 0) { + printf(1, "pipe() failed\n"); + exit(); + } + for (i = 0; i < sizeof(pids) / sizeof(pids[0]); i++) { + if ((pids[i] = fork()) == 0) { + // allocate a lot of memory + sbrk(BIG - (uint)sbrk(0)); + write(fds[1], "x", 1); + // sit around until killed + for (;;) { sleep(1000); + } + } + if (pids[i] != -1) { + read(fds[0], &scratch, 1); + } + } + // if those failed allocations freed up the pages they did allocate, + // we'll be able to allocate here + c = sbrk(4096); + for (i = 0; i < sizeof(pids) / sizeof(pids[0]); i++) { + if (pids[i] == -1) { + continue; + } + kill(pids[i]); + wait(); + } + if (c == (char*)0xffffffff) { + printf(stdout, "failed sbrk leaked memory\n"); + exit(); + } + + if (sbrk(0) > oldbrk) { + sbrk(-(sbrk(0) - oldbrk)); + } + + printf(stdout, "sbrk test OK\n"); +} + +void validateint(int *p) { + int res; + asm ("mov %%esp, %%ebx\n\t" + "mov %3, %%esp\n\t" + "int %2\n\t" + "mov %%ebx, %%esp" : + "=a" (res) : + "a" (SYS_sleep), "n" (T_SYSCALL), "c" (p) : + "ebx"); +} + +void validatetest(void) { + int hi, pid; + uint p; + + printf(stdout, "validate test\n"); + hi = 1100 * 1024; + + for (p = 0; p <= (uint)hi; p += 4096) { + if ((pid = fork()) == 0) { + // try to crash the kernel by passing in a badly placed integer + validateint((int*)p); + exit(); + } + sleep(0); + sleep(0); + kill(pid); + wait(); + + // try to crash the kernel by passing in a bad string pointer + if (link("nosuchfile", (char*)p) != -1) { + printf(stdout, "link should not succeed\n"); + exit(); + } + } + + printf(stdout, "validate ok\n"); +} + +// does unintialized data start out zero? +char uninit[10000]; +void bsstest(void) { + int i; + + printf(stdout, "bss test\n"); + for (i = 0; i < sizeof(uninit); i++) { + if (uninit[i] != '\0') { + printf(stdout, "bss test failed\n"); + exit(); + } + } + printf(stdout, "bss test ok\n"); +} + +// does exec return an error if the arguments +// are larger than a page? or does it write +// below the stack and wreck the instructions/data? +void bigargtest(void) { + int pid, fd; + + unlink("bigarg-ok"); + pid = fork(); + if (pid == 0) { + static char *args[MAXARG]; + int i; + for (i = 0; i < MAXARG - 1; i++) { + args[i] = "bigargs test: failed\n "; + } + args[MAXARG - 1] = 0; + printf(stdout, "bigarg test\n"); + exec("echo", args); + printf(stdout, "bigarg test ok\n"); + fd = open("bigarg-ok", O_CREATE); + close(fd); + exit(); + } + else if (pid < 0) { + printf(stdout, "bigargtest: fork failed\n"); + exit(); + } + wait(); + fd = open("bigarg-ok", 0); + if (fd < 0) { + printf(stdout, "bigarg test failed!\n"); + exit(); + } + close(fd); + unlink("bigarg-ok"); +} + +// what happens when the file system runs out of blocks? +// answer: balloc panics, so this test is not useful. +void fsfull() { + int nfiles; + int fsblocks = 0; + + printf(1, "fsfull test\n"); + + for (nfiles = 0;; nfiles++) { + char name[64]; + name[0] = 'f'; + name[1] = '0' + nfiles / 1000; + name[2] = '0' + (nfiles % 1000) / 100; + name[3] = '0' + (nfiles % 100) / 10; + name[4] = '0' + (nfiles % 10); + name[5] = '\0'; + printf(1, "writing %s\n", name); + int fd = open(name, O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "open %s failed\n", name); + break; + } + int total = 0; + while (1) { + int cc = write(fd, buf, 512); + if (cc < 512) { + break; + } + total += cc; + fsblocks++; + } + printf(1, "wrote %d bytes\n", total); + close(fd); + if (total == 0) { + break; + } + } + + while (nfiles >= 0) { + char name[64]; + name[0] = 'f'; + name[1] = '0' + nfiles / 1000; + name[2] = '0' + (nfiles % 1000) / 100; + name[3] = '0' + (nfiles % 100) / 10; + name[4] = '0' + (nfiles % 10); + name[5] = '\0'; + unlink(name); + nfiles--; + } + + printf(1, "fsfull test finished\n"); +} + +void uio() { + #define RTC_ADDR 0x70 + #define RTC_DATA 0x71 + + ushort port = 0; + uchar val = 0; + int pid; + + printf(1, "uio test\n"); + pid = fork(); + if (pid == 0) { + port = RTC_ADDR; + val = 0x09; /* year */ + /* http://wiki.osdev.org/Inline_Assembly/Examples */ + asm volatile ("outb %0,%1" : : "a" (val), "d" (port)); + port = RTC_DATA; + asm volatile ("inb %1,%0" : "=a" (val) : "d" (port)); + printf(1, "uio: uio succeeded; test FAILED\n"); + exit(); + } + else if (pid < 0) { + printf(1, "fork failed\n"); + exit(); + } + wait(); + printf(1, "uio test done\n"); +} + +void argptest(){ + int fd; + fd = open("init", O_RDONLY); + if (fd < 0) { + printf(2, "open failed\n"); + exit(); + } + read(fd, sbrk(0) - 1, -1); + close(fd); + printf(1, "arg test passed\n"); +} + +unsigned long randstate = 1; +unsigned int rand() { + randstate = randstate * 1664525 + 1013904223; + return randstate; +} + +int main(int argc, char *argv[]) { + printf(1, "usertests starting\n"); + + if (open("usertests.ran", 0) >= 0) { + printf(1, "already ran user tests -- rebuild fs.img\n"); + exit(); + } + close(open("usertests.ran", O_CREATE)); + + argptest(); + createdelete(); + linkunlink(); + concreate(); + fourfiles(); + sharedfd(); + + bigargtest(); + bigwrite(); + bigargtest(); + bsstest(); + sbrktest(); + validatetest(); + + opentest(); + writetest(); + writetest1(); + createtest(); + + openiputtest(); + exitiputtest(); + iputtest(); + + mem(); + pipe1(); + preempt(); + exitwait(); + + rmdot(); + fourteen(); + bigfile(); + subdir(); + linktest(); + unlinkread(); + dirfile(); + iref(); + forktest(); + bigdir(); // slow + + uio(); + + exectest(); + + exit(); +} diff --git a/vectors.pl b/vectors.pl new file mode 100755 index 0000000..57b49dd --- /dev/null +++ b/vectors.pl @@ -0,0 +1,47 @@ +#!/usr/bin/perl -w + +# Generate vectors.S, the trap/interrupt entry points. +# There has to be one entry point per interrupt number +# since otherwise there's no way for trap() to discover +# the interrupt number. + +print "# generated by vectors.pl - do not edit\n"; +print "# handlers\n"; +print ".globl alltraps\n"; +for(my $i = 0; $i < 256; $i++){ + print ".globl vector$i\n"; + print "vector$i:\n"; + if(!($i == 8 || ($i >= 10 && $i <= 14) || $i == 17)){ + print " pushl \$0\n"; + } + print " pushl \$$i\n"; + print " jmp alltraps\n"; +} + +print "\n# vector table\n"; +print ".data\n"; +print ".globl vectors\n"; +print "vectors:\n"; +for(my $i = 0; $i < 256; $i++){ + print " .long vector$i\n"; +} + +# sample output: +# # handlers +# .globl alltraps +# .globl vector0 +# vector0: +# pushl $0 +# pushl $0 +# jmp alltraps +# ... +# +# # vector table +# .data +# .globl vectors +# vectors: +# .long vector0 +# .long vector1 +# .long vector2 +# ... + diff --git a/vm.c b/vm.c new file mode 100644 index 0000000..5ac4945 --- /dev/null +++ b/vm.c @@ -0,0 +1,392 @@ +#include "param.h" +#include "types.h" +#include "defs.h" +#include "x86.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "elf.h" + +extern char data[]; // defined by kernel.ld +pde_t *kpgdir; // for use in scheduler() + +// Set up CPU's kernel segment descriptors. +// Run once on entry on each CPU. +void seginit(void) { + struct cpu *c; + + // Map "logical" addresses to virtual addresses using identity map. + // Cannot share a CODE descriptor for both kernel and user + // because it would have to have DPL_USR, but the CPU forbids + // an interrupt from CPL=0 to DPL=3. + c = &cpus[cpuid()]; + c->gdt[SEG_KCODE] = SEG(STA_X | STA_R, 0, 0xffffffff, 0); + c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); + c->gdt[SEG_UCODE] = SEG(STA_X | STA_R, 0, 0xffffffff, DPL_USER); + c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER); + lgdt(c->gdt, sizeof(c->gdt)); +} + +// Return the address of the PTE in page table pgdir +// that corresponds to virtual address va. If alloc!=0, +// create any required page table pages. +static pte_t * walkpgdir(pde_t *pgdir, const void *va, int alloc) { + pde_t *pde; + pte_t *pgtab; + + pde = &pgdir[PDX(va)]; + if (*pde & PTE_P) { + pgtab = (pte_t*)P2V(PTE_ADDR(*pde)); + } + else { + if (!alloc || (pgtab = (pte_t*)kalloc()) == 0) { + return 0; + } + // Make sure all those PTE_P bits are zero. + memset(pgtab, 0, PGSIZE); + // The permissions here are overly generous, but they can + // be further restricted by the permissions in the page table + // entries, if necessary. + *pde = V2P(pgtab) | PTE_P | PTE_W | PTE_U; + } + return &pgtab[PTX(va)]; +} + +// Create PTEs for virtual addresses starting at va that refer to +// physical addresses starting at pa. va and size might not +// be page-aligned. +static int mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm) { + char *a, *last; + pte_t *pte; + + a = (char*)PGROUNDDOWN((uint)va); + last = (char*)PGROUNDDOWN(((uint)va) + size - 1); + for (;;) { + if ((pte = walkpgdir(pgdir, a, 1)) == 0) { + return -1; + } + if (*pte & PTE_P) { + panic("remap"); + } + *pte = pa | perm | PTE_P; + if (a == last) { + break; + } + a += PGSIZE; + pa += PGSIZE; + } + return 0; +} + +// There is one page table per process, plus one that's used when +// a CPU is not running any process (kpgdir). The kernel uses the +// current process's page table during system calls and interrupts; +// page protection bits prevent user code from using the kernel's +// mappings. +// +// setupkvm() and exec() set up every page table like this: +// +// 0..KERNBASE: user memory (text+data+stack+heap), mapped to +// phys memory allocated by the kernel +// KERNBASE..KERNBASE+EXTMEM: mapped to 0..EXTMEM (for I/O space) +// KERNBASE+EXTMEM..data: mapped to EXTMEM..V2P(data) +// for the kernel's instructions and r/o data +// data..KERNBASE+PHYSTOP: mapped to V2P(data)..PHYSTOP, +// rw data + free physical memory +// 0xfe000000..0: mapped direct (devices such as ioapic) +// +// The kernel allocates physical memory for its heap and for user memory +// between V2P(end) and the end of physical memory (PHYSTOP) +// (directly addressable from end..P2V(PHYSTOP)). + +// This table defines the kernel's mappings, which are present in +// every process's page table. +static struct kmap { + void *virt; + uint phys_start; + uint phys_end; + int perm; +} kmap[] = { + { (void*)KERNBASE, 0, EXTMEM, PTE_W}, // I/O space + { (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0}, // kern text+rodata + { (void*)data, V2P(data), PHYSTOP, PTE_W}, // kern data+memory + { (void*)DEVSPACE, DEVSPACE, 0, PTE_W}, // more devices +}; + +// Set up kernel part of a page table. +pde_t*setupkvm(void) { + pde_t *pgdir; + struct kmap *k; + + if ((pgdir = (pde_t*)kalloc()) == 0) { + return 0; + } + memset(pgdir, 0, PGSIZE); + if (P2V(PHYSTOP) > (void*)DEVSPACE) { + panic("PHYSTOP too high"); + } + for (k = kmap; k < &kmap[NELEM(kmap)]; k++) { + if (mappages(pgdir, k->virt, k->phys_end - k->phys_start, + (uint)k->phys_start, k->perm) < 0) { + freevm(pgdir); + return 0; + } + } + return pgdir; +} + +// Allocate one page table for the machine for the kernel address +// space for scheduler processes. +void kvmalloc(void) { + kpgdir = setupkvm(); + switchkvm(); +} + +// Switch h/w page table register to the kernel-only page table, +// for when no process is running. +void switchkvm(void) { + lcr3(V2P(kpgdir)); // switch to the kernel page table +} + +// Switch TSS and h/w page table to correspond to process p. +void switchuvm(struct proc *p) { + if (p == 0) { + panic("switchuvm: no process"); + } + if (p->kstack == 0) { + panic("switchuvm: no kstack"); + } + if (p->pgdir == 0) { + panic("switchuvm: no pgdir"); + } + + pushcli(); + mycpu()->gdt[SEG_TSS] = SEG16(STS_T32A, &mycpu()->ts, + sizeof(mycpu()->ts) - 1, 0); + mycpu()->gdt[SEG_TSS].s = 0; + mycpu()->ts.ss0 = SEG_KDATA << 3; + mycpu()->ts.esp0 = (uint)p->kstack + KSTACKSIZE; + // setting IOPL=0 in eflags *and* iomb beyond the tss segment limit + // forbids I/O instructions (e.g., inb and outb) from user space + mycpu()->ts.iomb = (ushort) 0xFFFF; + ltr(SEG_TSS << 3); + lcr3(V2P(p->pgdir)); // switch to process's address space + popcli(); +} + +// Load the initcode into address 0 of pgdir. +// sz must be less than a page. +void inituvm(pde_t *pgdir, char *init, uint sz) { + char *mem; + + if (sz >= PGSIZE) { + panic("inituvm: more than a page"); + } + mem = kalloc(); + memset(mem, 0, PGSIZE); + mappages(pgdir, 0, PGSIZE, V2P(mem), PTE_W | PTE_U); + memmove(mem, init, sz); +} + +// Load a program segment into pgdir. addr must be page-aligned +// and the pages from addr to addr+sz must already be mapped. +int loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz) { + uint i, pa, n; + pte_t *pte; + + if ((uint) addr % PGSIZE != 0) { + panic("loaduvm: addr must be page aligned"); + } + for (i = 0; i < sz; i += PGSIZE) { + if ((pte = walkpgdir(pgdir, addr + i, 0)) == 0) { + panic("loaduvm: address should exist"); + } + pa = PTE_ADDR(*pte); + if (sz - i < PGSIZE) { + n = sz - i; + } + else { + n = PGSIZE; + } + if (readi(ip, P2V(pa), offset + i, n) != n) { + return -1; + } + } + return 0; +} + +// Allocate page tables and physical memory to grow process from oldsz to +// newsz, which need not be page aligned. Returns new size or 0 on error. +int allocuvm(pde_t *pgdir, uint oldsz, uint newsz) { + char *mem; + uint a; + + if (newsz >= KERNBASE) { + return 0; + } + if (newsz < oldsz) { + return oldsz; + } + + a = PGROUNDUP(oldsz); + for (; a < newsz; a += PGSIZE) { + mem = kalloc(); + if (mem == 0) { + cprintf("allocuvm out of memory\n"); + deallocuvm(pgdir, newsz, oldsz); + return 0; + } + memset(mem, 0, PGSIZE); + if (mappages(pgdir, (char*)a, PGSIZE, V2P(mem), PTE_W | PTE_U) < 0) { + cprintf("allocuvm out of memory (2)\n"); + deallocuvm(pgdir, newsz, oldsz); + kfree(mem); + return 0; + } + } + return newsz; +} + +// Deallocate user pages to bring the process size from oldsz to +// newsz. oldsz and newsz need not be page-aligned, nor does newsz +// need to be less than oldsz. oldsz can be larger than the actual +// process size. Returns the new process size. +int deallocuvm(pde_t *pgdir, uint oldsz, uint newsz) { + pte_t *pte; + uint a, pa; + + if (newsz >= oldsz) { + return oldsz; + } + + a = PGROUNDUP(newsz); + for (; a < oldsz; a += PGSIZE) { + pte = walkpgdir(pgdir, (char*)a, 0); + if (!pte) { + a = PGADDR(PDX(a) + 1, 0, 0) - PGSIZE; + } + else if ((*pte & PTE_P) != 0) { + pa = PTE_ADDR(*pte); + if (pa == 0) { + panic("kfree"); + } + char *v = P2V(pa); + kfree(v); + *pte = 0; + } + } + return newsz; +} + +// Free a page table and all the physical memory pages +// in the user part. +void freevm(pde_t *pgdir) { + uint i; + + if (pgdir == 0) { + panic("freevm: no pgdir"); + } + deallocuvm(pgdir, KERNBASE, 0); + for (i = 0; i < NPDENTRIES; i++) { + if (pgdir[i] & PTE_P) { + char * v = P2V(PTE_ADDR(pgdir[i])); + kfree(v); + } + } + kfree((char*)pgdir); +} + +// Clear PTE_U on a page. Used to create an inaccessible +// page beneath the user stack. +void clearpteu(pde_t *pgdir, char *uva) { + pte_t *pte; + + pte = walkpgdir(pgdir, uva, 0); + if (pte == 0) { + panic("clearpteu"); + } + *pte &= ~PTE_U; +} + +// Given a parent process's page table, create a copy +// of it for a child. +pde_t* copyuvm(pde_t *pgdir, uint sz) { + pde_t *d; + pte_t *pte; + uint pa, i, flags; + char *mem; + + if ((d = setupkvm()) == 0) { + return 0; + } + for (i = 0; i < sz; i += PGSIZE) { + if ((pte = walkpgdir(pgdir, (void *) i, 0)) == 0) { + panic("copyuvm: pte should exist"); + } + if (!(*pte & PTE_P)) { + panic("copyuvm: page not present"); + } + pa = PTE_ADDR(*pte); + flags = PTE_FLAGS(*pte); + if ((mem = kalloc()) == 0) { + freevm(d); + return 0; + } + memmove(mem, (char*)P2V(pa), PGSIZE); + if (mappages(d, (void*)i, PGSIZE, V2P(mem), flags) < 0) { + kfree(mem); + freevm(d); + return 0; + } + } + return d; +} + + +// Map user virtual address to kernel address. +char*uva2ka(pde_t *pgdir, char *uva) { + pte_t *pte; + + pte = walkpgdir(pgdir, uva, 0); + if ((*pte & PTE_P) == 0) { + return 0; + } + if ((*pte & PTE_U) == 0) { + return 0; + } + return (char*)P2V(PTE_ADDR(*pte)); +} + +// Copy len bytes from p to user address va in page table pgdir. +// Most useful when pgdir is not the current page table. +// uva2ka ensures this only works for PTE_U pages. +int copyout(pde_t *pgdir, uint va, void *p, uint len) { + char *buf, *pa0; + uint n, va0; + + buf = (char*)p; + while (len > 0) { + va0 = (uint)PGROUNDDOWN(va); + pa0 = uva2ka(pgdir, (char*)va0); + if (pa0 == 0) { + return -1; + } + n = PGSIZE - (va - va0); + if (n > len) { + n = len; + } + memmove(pa0 + (va - va0), buf, n); + len -= n; + buf += n; + va = va0 + PGSIZE; + } + return 0; +} + + + + + + + + diff --git a/wc.c b/wc.c new file mode 100644 index 0000000..0869cf8 --- /dev/null +++ b/wc.c @@ -0,0 +1,52 @@ +#include "types.h" +#include "stat.h" +#include "user.h" + +char buf[512]; + +void wc(int fd, char *name) { + int i, n; + int l, w, c, inword; + + l = w = c = 0; + inword = 0; + while ((n = read(fd, buf, sizeof(buf))) > 0) { + for (i = 0; i < n; i++) { + c++; + if (buf[i] == '\n') { + l++; + } + if (strchr(" \r\t\n\v", buf[i])) { + inword = 0; + } + else if (!inword) { + w++; + inword = 1; + } + } + } + if (n < 0) { + printf(1, "wc: read error\n"); + exit(); + } + printf(1, "%d %d %d %s\n", l, w, c, name); +} + +int main(int argc, char *argv[]) { + int fd, i; + + if (argc <= 1) { + wc(0, ""); + exit(); + } + + for (i = 1; i < argc; i++) { + if ((fd = open(argv[i], 0)) < 0) { + printf(1, "wc: cannot open %s\n", argv[i]); + exit(); + } + wc(fd, argv[i]); + close(fd); + } + exit(); +} diff --git a/x86.h b/x86.h new file mode 100644 index 0000000..118f725 --- /dev/null +++ b/x86.h @@ -0,0 +1,148 @@ +// Routines to let C code use special x86 instructions. + +static inline uchar inb(ushort port) { + uchar data; + + asm volatile ("in %1,%0" : "=a" (data) : "d" (port)); + return data; +} + +static inline void insl(int port, void *addr, int cnt) { + asm volatile ("cld; rep insl" : + "=D" (addr), "=c" (cnt) : + "d" (port), "0" (addr), "1" (cnt) : + "memory", "cc"); +} + +static inline void outb(ushort port, uchar data) { + asm volatile ("out %0,%1" : : "a" (data), "d" (port)); +} + +static inline void outw(ushort port, ushort data) { + asm volatile ("out %0,%1" : : "a" (data), "d" (port)); +} + +static inline void outsl(int port, const void *addr, int cnt) { + asm volatile ("cld; rep outsl" : + "=S" (addr), "=c" (cnt) : + "d" (port), "0" (addr), "1" (cnt) : + "cc"); +} + +static inline void stosb(void *addr, int data, int cnt) { + asm volatile ("cld; rep stosb" : + "=D" (addr), "=c" (cnt) : + "0" (addr), "1" (cnt), "a" (data) : + "memory", "cc"); +} + +static inline void stosl(void *addr, int data, int cnt) { + asm volatile ("cld; rep stosl" : + "=D" (addr), "=c" (cnt) : + "0" (addr), "1" (cnt), "a" (data) : + "memory", "cc"); +} + +struct segdesc; + +static inline void lgdt(struct segdesc *p, int size) { + volatile ushort pd[3]; + + pd[0] = size - 1; + pd[1] = (uint)p; + pd[2] = (uint)p >> 16; + + asm volatile ("lgdt (%0)" : : "r" (pd)); +} + +struct gatedesc; + +static inline void lidt(struct gatedesc *p, int size) { + volatile ushort pd[3]; + + pd[0] = size - 1; + pd[1] = (uint)p; + pd[2] = (uint)p >> 16; + + asm volatile ("lidt (%0)" : : "r" (pd)); +} + +static inline void ltr(ushort sel) { + asm volatile ("ltr %0" : : "r" (sel)); +} + +static inline uint readeflags(void) { + uint eflags; + asm volatile ("pushfl; popl %0" : "=r" (eflags)); + return eflags; +} + +static inline void loadgs(ushort v) { + asm volatile ("movw %0, %%gs" : : "r" (v)); +} + +static inline void cli(void) { + asm volatile ("cli"); +} + +static inline void sti(void) { + asm volatile ("sti"); +} + +static inline uint xchg(volatile uint *addr, uint newval) { + uint result; + + // The + in "+m" denotes a read-modify-write operand. + asm volatile ("lock; xchgl %0, %1" : + "+m" (*addr), "=a" (result) : + "1" (newval) : + "cc"); + return result; +} + +static inline uint rcr2(void) { + uint val; + asm volatile ("movl %%cr2,%0" : "=r" (val)); + return val; +} + +static inline void lcr3(uint val) { + asm volatile ("movl %0,%%cr3" : : "r" (val)); +} + +// Layout of the trap frame built on the stack by the +// hardware and by trapasm.S, and passed to trap(). +struct trapframe { + // registers as pushed by pusha + uint edi; + uint esi; + uint ebp; + uint oesp; // useless & ignored + uint ebx; + uint edx; + uint ecx; + uint eax; + + // rest of trap frame + ushort gs; + ushort padding1; + ushort fs; + ushort padding2; + ushort es; + ushort padding3; + ushort ds; + ushort padding4; + uint trapno; + + // below here defined by x86 hardware + uint err; + uint eip; + ushort cs; + ushort padding5; + uint eflags; + + // below here only when crossing rings, such as from user to kernel + uint esp; + ushort ss; + ushort padding6; +}; diff --git a/zombie.c b/zombie.c new file mode 100644 index 0000000..a340cf3 --- /dev/null +++ b/zombie.c @@ -0,0 +1,13 @@ +// Create a zombie process that +// must be reparented at exit. + +#include "types.h" +#include "stat.h" +#include "user.h" + +int main(int argc, char* argv[]) { + if (fork() > 0) { + sleep(5); // Let child exit before parent. + } + exit(); +}