commit 7424a59fa161a22483ea7898f2c5c00bcceaacf1 Author: kayomn Date: Wed Jan 4 16:04:31 2023 +0000 Initial commit diff --git a/.cvsignore b/.cvsignore new file mode 100644 index 0000000..081a43c --- /dev/null +++ b/.cvsignore @@ -0,0 +1,16 @@ +*.asm +*.d +*.sym +_* +kernel +user1 +userfs +usertests +xv6.img +vectors.S +bochsout.txt +bootblock +bootother +bootother.out +parport.out +fmt diff --git a/.dir-locals.el b/.dir-locals.el new file mode 100644 index 0000000..da72247 --- /dev/null +++ b/.dir-locals.el @@ -0,0 +1,4 @@ +((c-mode + (indent-tabs-mode . nil) + (c-file-style . "bsd") + (c-basic-offset . 2))) diff --git a/.gdbinit.tmpl b/.gdbinit.tmpl new file mode 100644 index 0000000..f71681a --- /dev/null +++ b/.gdbinit.tmpl @@ -0,0 +1,27 @@ +set $lastcs = -1 + +define hook-stop + # There doesn't seem to be a good way to detect if we're in 16- or + # 32-bit mode, but in 32-bit mode we always run with CS == 8 in the + # kernel and CS == 35 in user space + if $cs == 8 || $cs == 35 + if $lastcs != 8 && $lastcs != 35 + set architecture i386 + end + x/i $pc + else + if $lastcs == -1 || $lastcs == 8 || $lastcs == 35 + set architecture i8086 + end + # Translate the segment:offset into a physical address + printf "[%4x:%4x] ", $cs, $eip + x/i $cs*16+$eip + end + set $lastcs = $cs +end + +echo + target remote localhost:1234\n +target remote localhost:1234 + +echo + symbol-file kernel\n +symbol-file kernel diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..85b11d0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,19 @@ +*~ +_* +*.o +*.d +*.asm +*.sym +*.img +vectors.S +bootblock +entryother +initcode +initcode.out +kernel +kernelmemfs +mkfs +.gdbinit +syscalltable.h +syscall.h +usys.S diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..8653e61 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,28 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "(gdb) Launch", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/kernel", + "args": [], + "stopAtEntry": true, + "cwd": "${workspaceFolder}", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "miDebuggerPath": "/usr/bin/gdb" + } + ] +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..1471a31 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,35 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=733558 + // for the documentation about the tasks.json format + "version": "2.0.0", + "tasks": [ + { + "label": "Build Xv6 operating system", + "type": "shell", + "command": "make", + "problemMatcher": [], + "group": { + "kind": "build", + "isDefault": true + } + }, + { + "label": "Run Xv6 under QEMU", + "type": "shell", + "command": "make qemu", + "problemMatcher": [] + }, + { + "label": "Run Xv6 under QEMU for debugging", + "type": "shell", + "command": "make qemu-gdb", + "problemMatcher": [] + }, + { + "label": "Clean non-source files", + "type": "shell", + "command": "make clean", + "problemMatcher": [] + } +] +} \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..422c0cc --- /dev/null +++ b/LICENSE @@ -0,0 +1,24 @@ +The xv6 software is: + +Copyright (c) 2006-2018 Frans Kaashoek, Robert Morris, Russ Cox, + Massachusetts Institute of Technology + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6a41d37 --- /dev/null +++ b/Makefile @@ -0,0 +1,246 @@ +OBJS = \ + bio.o\ + console.o\ + exec.o\ + file.o\ + fs.o\ + ide.o\ + ioapic.o\ + kalloc.o\ + kbd.o\ + lapic.o\ + log.o\ + main.o\ + mp.o\ + picirq.o\ + pipe.o\ + proc.o\ + sleeplock.o\ + spinlock.o\ + string.o\ + swtch.o\ + syscall.o\ + sysfile.o\ + sysproc.o\ + trapasm.o\ + trap.o\ + uart.o\ + vectors.o\ + vm.o\ + +# Cross-compiling (e.g., on Mac OS X) +# TOOLPREFIX = i386-jos-elf + +# Using native tools (e.g., on X86 Linux) +#TOOLPREFIX = + +# Try to infer the correct TOOLPREFIX if not set +ifndef TOOLPREFIX +TOOLPREFIX := $(shell if i386-jos-elf-objdump -i 2>&1 | grep '^elf32-i386$$' >/dev/null 2>&1; \ + then echo 'i386-jos-elf-'; \ + elif objdump -i 2>&1 | grep 'elf32-i386' >/dev/null 2>&1; \ + then echo ''; \ + else echo "***" 1>&2; \ + echo "*** Error: Couldn't find an i386-*-elf version of GCC/binutils." 1>&2; \ + echo "*** Is the directory with i386-jos-elf-gcc in your PATH?" 1>&2; \ + echo "*** If your i386-*-elf toolchain is installed with a command" 1>&2; \ + echo "*** prefix other than 'i386-jos-elf-', set your TOOLPREFIX" 1>&2; \ + echo "*** environment variable to that prefix and run 'make' again." 1>&2; \ + echo "*** To turn off this error, run 'gmake TOOLPREFIX= ...'." 1>&2; \ + echo "***" 1>&2; exit 1; fi) +endif + +# If the makefile can't find QEMU, specify its path here +# QEMU = qemu-system-i386 + +# Try to infer the correct QEMU +ifndef QEMU +QEMU = $(shell if which qemu > /dev/null; \ + then echo qemu; exit; \ + elif which qemu-system-i386 > /dev/null; \ + then echo qemu-system-i386; exit; \ + elif which qemu-system-x86_64 > /dev/null; \ + then echo qemu-system-x86_64; exit; \ + else \ + qemu=/Applications/Q.app/Contents/MacOS/i386-softmmu.app/Contents/MacOS/i386-softmmu; \ + if test -x $$qemu; then echo $$qemu; exit; fi; fi; \ + echo "***" 1>&2; \ + echo "*** Error: Couldn't find a working QEMU executable." 1>&2; \ + echo "*** Is the directory containing the qemu binary in your PATH" 1>&2; \ + echo "*** or have you tried setting the QEMU variable in Makefile?" 1>&2; \ + echo "***" 1>&2; exit 1) +endif + +CC = $(TOOLPREFIX)gcc +AS = $(TOOLPREFIX)gas +LD = $(TOOLPREFIX)ld +OBJCOPY = $(TOOLPREFIX)objcopy +OBJDUMP = $(TOOLPREFIX)objdump +CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 -Werror -fno-omit-frame-pointer +CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector) +ASFLAGS = -m32 -gdwarf-2 -Wa,-divide +# FreeBSD ld wants ``elf_i386_fbsd'' +LDFLAGS += -m $(shell $(LD) -V | grep elf_i386 2>/dev/null | head -n 1) + +# Disable PIE when possible (for Ubuntu 16.10 toolchain) +ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]no-pie'),) +CFLAGS += -fno-pie -no-pie +endif +ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]nopie'),) +CFLAGS += -fno-pie -nopie +endif + +xv6.img: bootblock kernel + dd if=/dev/zero of=xv6.img count=10000 + dd if=bootblock of=xv6.img conv=notrunc + dd if=kernel of=xv6.img seek=1 conv=notrunc + +xv6memfs.img: bootblock kernelmemfs + dd if=/dev/zero of=xv6memfs.img count=10000 + dd if=bootblock of=xv6memfs.img conv=notrunc + dd if=kernelmemfs of=xv6memfs.img seek=1 conv=notrunc + +bootblock: bootasm.S bootmain.c + $(CC) $(CFLAGS) -fno-pic -O -nostdinc -I. -c bootmain.c + $(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c bootasm.S + $(LD) $(LDFLAGS) -N -e start -Ttext 0x7C00 -o bootblock.o bootasm.o bootmain.o + $(OBJDUMP) -S bootblock.o > bootblock.asm + $(OBJCOPY) -S -O binary -j .text bootblock.o bootblock + # The following line is here since it has been noticed that if you use Explorer to + # copy folders on wsl, sometimees the execute permissions can be removed from perl scripts. + # Uncomment if needed, but it will flag as a change for git. + # chmod +x sign.pl + ./sign.pl bootblock + +syscall.h: gensyscalls.pl + ./gensyscalls.pl -h > syscall.h + +syscalltable.h: gensyscalls.pl + ./gensyscalls.pl -c > syscalltable.h + +usys.S: gensyscalls.pl + ./gensyscalls.pl -a > usys.S + +entryother: entryother.S + $(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c entryother.S + $(LD) $(LDFLAGS) -N -e start -Ttext 0x7000 -o bootblockother.o entryother.o + $(OBJCOPY) -S -O binary -j .text bootblockother.o entryother + $(OBJDUMP) -S bootblockother.o > entryother.asm + +initcode: initcode.S + $(CC) $(CFLAGS) -nostdinc -I. -c initcode.S + $(LD) $(LDFLAGS) -N -e start -Ttext 0 -o initcode.out initcode.o + $(OBJCOPY) -S -O binary initcode.out initcode + $(OBJDUMP) -S initcode.o > initcode.asm + +kernel: syscall.h syscalltable.h $(OBJS) entry.o entryother initcode kernel.ld + $(LD) $(LDFLAGS) -T kernel.ld -o kernel entry.o $(OBJS) -b binary initcode entryother + $(OBJDUMP) -S kernel > kernel.asm + $(OBJDUMP) -t kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernel.sym + +# kernelmemfs is a copy of kernel that maintains the +# disk image in memory instead of writing to a disk. +# This is not so useful for testing persistent storage or +# exploring disk buffering implementations, but it is +# great for testing the kernel on real hardware without +# needing a scratch disk. +MEMFSOBJS = $(filter-out ide.o,$(OBJS)) memide.o +kernelmemfs: $(MEMFSOBJS) entry.o entryother initcode kernel.ld fs.img + $(LD) $(LDFLAGS) -T kernel.ld -o kernelmemfs entry.o $(MEMFSOBJS) -b binary initcode entryother fs.img + $(OBJDUMP) -S kernelmemfs > kernelmemfs.asm + $(OBJDUMP) -t kernelmemfs | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernelmemfs.sym + +tags: $(OBJS) entryother.S _init + etags *.S *.c + +vectors.S: vectors.pl + # chmod +x vectors.pl + ./vectors.pl > vectors.S + +ULIB = ulib.o usys.o printf.o umalloc.o + +_%: %.o $(ULIB) + $(LD) $(LDFLAGS) -N -e _init -Ttext 0 -o $@ $^ + $(OBJDUMP) -S $@ > $*.asm + $(OBJDUMP) -t $@ | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > $*.sym + +_forktest: forktest.o $(ULIB) + # forktest has less library code linked in - needs to be small + # in order to be able to max out the proc table. + $(LD) $(LDFLAGS) -N -e _init -Ttext 0 -o _forktest forktest.o ulib.o usys.o + $(OBJDUMP) -S _forktest > forktest.asm + +mkfs: mkfs.c fs.h + gcc -Werror -Wall -o mkfs mkfs.c + +# Prevent deletion of intermediate files, e.g. cat.o, after first build, so +# that disk image changes after first build are persistent until clean. More +# details: +# http://www.gnu.org/software/make/manual/html_node/Chained-Rules.html +.PRECIOUS: %.o + +UPROGS=\ + _cat\ + _echo\ + _forktest\ + _grep\ + _init\ + _kill\ + _ln\ + _ls\ + _mkdir\ + _rm\ + _sh\ + _stressfs\ + _usertests\ + _wc\ + _zombie\ + +fs.img: mkfs $(UPROGS) + ./mkfs fs.img $(UPROGS) + +-include *.d + +clean: + rm -f *.tex *.dvi *.idx *.aux *.log *.ind *.ilg \ + *.o *.d *.asm *.sym vectors.S bootblock entryother \ + initcode initcode.out kernel xv6.img fs.img kernelmemfs \ + xv6memfs.img mkfs .gdbinit \ + syscall.h syscalltable.h usys.S \ + $(UPROGS) + +# run in emulators + +# try to generate a unique GDB port +GDBPORT = $(shell expr `id -u` % 5000 + 25000) +# QEMU's gdb stub command line changed in 0.11 +QEMUGDB = $(shell if $(QEMU) -help | grep -q '^-gdb'; \ + then echo "-gdb tcp::$(GDBPORT)"; \ + else echo "-s -p $(GDBPORT)"; fi) +ifndef CPUS +CPUS := 2 +endif +QEMUOPTS = -drive file=fs.img,index=1,media=disk,format=raw -drive file=xv6.img,index=0,media=disk,format=raw -smp $(CPUS) -m 512 $(QEMUEXTRA) + +qemu: fs.img xv6.img + $(QEMU) -vga std -serial mon:stdio $(QEMUOPTS) + +qemu-memfs: xv6memfs.img + $(QEMU) -vga std -drive file=xv6memfs.img,index=0,media=disk,format=raw -smp $(CPUS) -m 256 + +qemu-nox: fs.img xv6.img + $(QEMU) -nographic $(QEMUOPTS) + +qemu-curses: fs.img xv6.img + $(QEMU) -curses $(QEMUOPTS) + +.gdbinit: .gdbinit.tmpl + sed "s/localhost:1234/localhost:$(GDBPORT)/" < $^ > $@ + +qemu-gdb: fs.img xv6.img .gdbinit + @echo "*** Now run 'gdb'." 1>&2 + $(QEMU) -vga std -serial mon:stdio $(QEMUOPTS) -S $(QEMUGDB) + +qemu-nox-gdb: fs.img xv6.img .gdbinit + @echo "*** Now run 'gdb'." 1>&2 + $(QEMU) -nographic $(QEMUOPTS) -S $(QEMUGDB) diff --git a/asm.h b/asm.h new file mode 100644 index 0000000..4a92aad --- /dev/null +++ b/asm.h @@ -0,0 +1,18 @@ +// +// assembler macros to create x86 segments +// + +#define SEG_NULLASM \ + .word 0, 0; \ + .byte 0, 0, 0, 0 + +// The 0xC0 means the limit is in 4096-byte units +// and (for executable segments) 32-bit mode. +#define SEG_ASM(type, base, lim) \ + .word(((lim) >> 12) & 0xffff), ((base) & 0xffff); \ + .byte(((base) >> 16) & 0xff), (0x90 | (type)), \ + (0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff) + +#define STA_X 0x8 // Executable segment +#define STA_W 0x2 // Writeable (non-executable segments) +#define STA_R 0x2 // Readable (executable segments) diff --git a/bio.c b/bio.c new file mode 100644 index 0000000..8faaf8c --- /dev/null +++ b/bio.c @@ -0,0 +1,140 @@ +// Buffer cache. +// +// The buffer cache is a linked list of buf structures holding +// cached copies of disk block contents. Caching disk blocks +// in memory reduces the number of disk reads and also provides +// a synchronization point for disk blocks used by multiple processes. +// +// Interface: +// * To get a buffer for a particular disk block, call bread. +// * After changing buffer data, call bwrite to write it to disk. +// * When done with the buffer, call brelse. +// * Do not use the buffer after calling brelse. +// * Only one process at a time can use a buffer, +// so do not keep them longer than necessary. +// +// The implementation uses two state flags internally: +// * B_VALID: the buffer data has been read from the disk. +// * B_DIRTY: the buffer data has been modified +// and needs to be written to disk. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" + +struct { + struct spinlock lock; + struct buf buf[NBUF]; + + // Linked list of all buffers, through prev/next. + // head.next is most recently used. + struct buf head; +} bcache; + +void binit(void) { + struct buf *b; + + initlock(&bcache.lock, "bcache"); + + // Create linked list of buffers + bcache.head.prev = &bcache.head; + bcache.head.next = &bcache.head; + for (b = bcache.buf; b < bcache.buf + NBUF; b++) { + b->next = bcache.head.next; + b->prev = &bcache.head; + initsleeplock(&b->lock, "buffer"); + bcache.head.next->prev = b; + bcache.head.next = b; + } +} + +// Look through buffer cache for block on device dev. +// If not found, allocate a buffer. +// In either case, return locked buffer. + +static struct buf* bget(uint dev, uint blockno) { + struct buf *b; + + acquire(&bcache.lock); + + // Is the block already cached? + for (b = bcache.head.next; b != &bcache.head; b = b->next) { + if (b->dev == dev && b->blockno == blockno) { + b->refcnt++; + release(&bcache.lock); + acquiresleep(&b->lock); + return b; + } + } + + // Not cached; recycle an unused buffer. + // Even if refcnt==0, B_DIRTY indicates a buffer is in use + // because log.c has modified it but not yet committed it. + + for (b = bcache.head.prev; b != &bcache.head; b = b->prev) { + if (b->refcnt == 0 && (b->flags & B_DIRTY) == 0) { + b->dev = dev; + b->blockno = blockno; + b->flags = 0; + b->refcnt = 1; + release(&bcache.lock); + acquiresleep(&b->lock); + return b; + } + } + panic("bget: no buffers"); +} + +// Return a locked buf with the contents of the indicated block. + +struct buf*bread(uint dev, uint blockno) { + struct buf *b; + + b = bget(dev, blockno); + if ((b->flags & B_VALID) == 0) { + iderw(b); + } + return b; +} + +// Write b's contents to disk. Must be locked. + +void bwrite(struct buf *b) { + if (!holdingsleep(&b->lock)) { + panic("bwrite"); + } + b->flags |= B_DIRTY; + iderw(b); +} + +// Release a locked buffer. +// Move to the head of the MRU list. + +void brelse(struct buf *b) { + if (!holdingsleep(&b->lock)) { + panic("brelse"); + } + + releasesleep(&b->lock); + + acquire(&bcache.lock); + b->refcnt--; + if (b->refcnt == 0) { + // no one is waiting for it. + b->next->prev = b->prev; + b->prev->next = b->next; + b->next = bcache.head.next; + b->prev = &bcache.head; + bcache.head.next->prev = b; + bcache.head.next = b; + } + + release(&bcache.lock); +} + + + diff --git a/bootasm.S b/bootasm.S new file mode 100644 index 0000000..260a156 --- /dev/null +++ b/bootasm.S @@ -0,0 +1,81 @@ +#include "asm.h" +#include "memlayout.h" +#include "mmu.h" + +# Start the first CPU: switch to 32-bit protected mode, jump into C. +# The BIOS loads this code from the first sector of the hard disk into +# memory at physical address 0x7c00 and starts executing in real mode +# with %cs=0 %ip=7c00. + +.code16 # Assemble for 16-bit mode +.globl start +start: + cli # BIOS enabled interrupts; disable + + # Zero data segment registers DS, ES, and SS. + xorw %ax,%ax # Set %ax to zero + movw %ax,%ds # -> Data Segment + movw %ax,%es # -> Extra Segment + movw %ax,%ss # -> Stack Segment + + # Physical address line A20 is tied to zero so that the first PCs + # with 2 MB would run software that assumed 1 MB. Undo that. +seta20.1: + inb $0x64,%al # Wait for not busy + testb $0x2,%al + jnz seta20.1 + + movb $0xd1,%al # 0xd1 -> port 0x64 + outb %al,$0x64 + +seta20.2: + inb $0x64,%al # Wait for not busy + testb $0x2,%al + jnz seta20.2 + + movb $0xdf,%al # 0xdf -> port 0x60 + outb %al,$0x60 + + # Switch from real to protected mode. Use a bootstrap GDT that makes + # virtual addresses map directly to physical addresses so that the + # effective memory map doesn't change during the transition. + lgdt gdtdesc + movl %cr0, %eax + orl $CR0_PE, %eax + movl %eax, %cr0 + + # Complete the transition to 32-bit protected mode by using a long jmp + # to reload %cs and %eip. The segment descriptors are set up with no + # translation, so that the mapping is still the identity mapping. + ljmp $(SEG_KCODE<<3), $start32 + +.code32 # Tell assembler to generate 32-bit code now. +start32: + # Set up the protected-mode data segment registers + movw $(SEG_KDATA<<3), %ax # Our data segment selector + movw %ax, %ds # -> DS: Data Segment + movw %ax, %es # -> ES: Extra Segment + movw %ax, %ss # -> SS: Stack Segment + movw $0, %ax # Zero segments not ready for use + movw %ax, %fs # -> FS + movw %ax, %gs # -> GS + + # Set up the stack pointer and call into C. + movl $start, %esp + call bootmain + + # If bootmain returns (it shouldn't), loop. +spin: + jmp spin + + # Bootstrap GDT +.p2align 2 # force 4 byte alignment +gdt: + SEG_NULLASM # null seg + SEG_ASM(STA_X|STA_R, 0x0, 0xffffffff) # code seg + SEG_ASM(STA_W, 0x0, 0xffffffff) # data seg + +gdtdesc: + .word (gdtdesc - gdt - 1) # sizeof(gdt) - 1 + .long gdt # address gdt + diff --git a/bootmain.c b/bootmain.c new file mode 100644 index 0000000..7efd295 --- /dev/null +++ b/bootmain.c @@ -0,0 +1,93 @@ +// Boot loader. +// +// Part of the boot block, along with bootasm.S, which calls bootmain(). +// bootasm.S has put the processor into protected 32-bit mode. +// bootmain() loads an ELF kernel image from the disk starting at +// sector 1 and then jumps to the kernel entry routine. + +#include "types.h" +#include "elf.h" +#include "x86.h" +#include "memlayout.h" + +#define SECTSIZE 512 + +void readseg(uchar*, uint, uint); + +void bootmain(void) { + struct elfhdr *elf; + struct proghdr *ph, *eph; + void (*entry)(void); + uchar* pa; + + elf = (struct elfhdr*)0x10000; // scratch space + + // Read 1st page off disk + readseg((uchar*)elf, 4096, 0); + + // Is this an ELF executable? + if (elf->magic != ELF_MAGIC) { + return; // let bootasm.S handle error + + } + // Load each program segment (ignores ph flags). + ph = (struct proghdr*)((uchar*)elf + elf->phoff); + eph = ph + elf->phnum; + for (; ph < eph; ph++) { + pa = (uchar*)ph->paddr; + readseg(pa, ph->filesz, ph->off); + if (ph->memsz > ph->filesz) { + stosb(pa + ph->filesz, 0, ph->memsz - ph->filesz); + } + } + + // Call the entry point from the ELF header. + // Does not return! + entry = (void (*)(void))(elf->entry); + entry(); +} + +void waitdisk(void) { + // Wait for disk ready. + while ((inb(0x1F7) & 0xC0) != 0x40) { + ; + } +} + +// Read a single sector at offset into dst. +void readsect(void *dst, uint offset) { + // Issue command. + waitdisk(); + outb(0x1F2, 1); // count = 1 + outb(0x1F3, offset); + outb(0x1F4, offset >> 8); + outb(0x1F5, offset >> 16); + outb(0x1F6, (offset >> 24) | 0xE0); + outb(0x1F7, 0x20); // cmd 0x20 - read sectors + + // Read data. + waitdisk(); + insl(0x1F0, dst, SECTSIZE / 4); +} + +// Read 'count' bytes at 'offset' from kernel into physical address 'pa'. +// Might copy more than asked. + +void readseg(uchar* pa, uint count, uint offset) { + uchar* epa; + + epa = pa + count; + + // Round down to sector boundary. + pa -= offset % SECTSIZE; + + // Translate from bytes to sectors; kernel starts at sector 1. + offset = (offset / SECTSIZE) + 1; + + // If this is too slow, we could read lots of sectors at a time. + // We'd write more to memory than asked, but it doesn't matter -- + // we load in increasing order. + for (; pa < epa; pa += SECTSIZE, offset++) { + readsect(pa, offset); + } +} diff --git a/buf.h b/buf.h new file mode 100644 index 0000000..d5d8d4d --- /dev/null +++ b/buf.h @@ -0,0 +1,14 @@ +struct buf { + int flags; + uint dev; + uint blockno; + struct sleeplock lock; + uint refcnt; + struct buf *prev; // LRU cache list + struct buf *next; + struct buf *qnext; // disk queue + uchar data[BSIZE]; +}; +#define B_VALID 0x2 // buffer has been read from disk +#define B_DIRTY 0x4 // buffer needs to be written to disk + diff --git a/cat.c b/cat.c new file mode 100644 index 0000000..b523487 --- /dev/null +++ b/cat.c @@ -0,0 +1,39 @@ +#include "types.h" +#include "stat.h" +#include "user.h" + +char buf[512]; + +void cat(int fd) { + int n; + + while ((n = read(fd, buf, sizeof(buf))) > 0) { + if (write(1, buf, n) != n) { + printf(1, "cat: write error\n"); + exit(); + } + } + if (n < 0) { + printf(1, "cat: read error\n"); + exit(); + } +} + +int main(int argc, char *argv[]) { + int fd, i; + + if (argc <= 1) { + cat(0); + exit(); + } + + for (i = 1; i < argc; i++) { + if ((fd = open(argv[i], 0)) < 0) { + printf(1, "cat: cannot open %s\n", argv[i]); + exit(); + } + cat(fd); + close(fd); + } + exit(); +} diff --git a/config.xlaunch b/config.xlaunch new file mode 100644 index 0000000..63548ab --- /dev/null +++ b/config.xlaunch @@ -0,0 +1,2 @@ + + diff --git a/console.c b/console.c new file mode 100644 index 0000000..a330c97 --- /dev/null +++ b/console.c @@ -0,0 +1,332 @@ +// Console input and output. +// Input is from the keyboard or serial port. +// Output is written to the screen and serial port. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "traps.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "file.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "x86.h" + +#define INPUT_BUF 128 + +struct kbdbuffer { + char buf[INPUT_BUF]; + uint r; // Read index + uint w; // Write index + uint e; // Edit index +}; + +struct kbdbuffer inputBuffer; + +struct kbdbuffer * input = 0; + +#define C(x) ((x) - '@') // Control-x + + + +static void consputc(int); + +static int panicked = 0; + +static struct { + struct spinlock lock; + int locking; +} cons; + +static void printint(int xx, int base, int sign) { + static char digits[] = "0123456789abcdef"; + char buf[16]; + int i; + uint x; + + if (sign && (sign = xx < 0)) { + x = -xx; + } + else { + x = xx; + } + + i = 0; + do { + buf[i++] = digits[x % base]; + } + while ((x /= base) != 0); + + if (sign) { + buf[i++] = '-'; + } + + while (--i >= 0) { + consputc(buf[i]); + } +} + +// Print to the console. only understands %d, %x, %p, %s. +void cprintf(char *fmt, ...) { + int i, c, locking; + uint *argp; + char *s; + + locking = cons.locking; + if (locking) { + acquire(&cons.lock); + } + + if (fmt == 0) { + panic("null fmt"); + } + + argp = (uint*)(void*)(&fmt + 1); + for (i = 0; (c = fmt[i] & 0xff) != 0; i++) { + if (c != '%') { + consputc(c); + continue; + } + c = fmt[++i] & 0xff; + if (c == 0) { + break; + } + switch (c) { + case 'd': + printint(*argp++, 10, 1); + break; + case 'x': + case 'p': + printint(*argp++, 16, 0); + break; + case 's': + if ((s = (char*)*argp++) == 0) { + s = "(null)"; + } + for (; *s; s++) { + consputc(*s); + } + break; + case '%': + consputc('%'); + break; + default: + // Print unknown % sequence to draw attention. + consputc('%'); + consputc(c); + break; + } + } + + if (locking) { + release(&cons.lock); + } +} + +void panic(char *s) { + int i; + uint pcs[10]; + + cli(); + cons.locking = 0; + // use lapiccpunum so that we can call panic from mycpu() + cprintf("lapicid %d: panic: ", lapicid()); + cprintf(s); + cprintf("\n"); + getcallerpcs(&s, pcs); + for (i = 0; i < 10; i++) { + cprintf(" %p", pcs[i]); + } + panicked = 1; // freeze other CPU + for (;;) { + ; + } +} + +#define BACKSPACE 0x100 +#define CRTPORT 0x3d4 +static ushort *crt = (ushort*)P2V(0xb8000); // CGA memory + +static void cgaputc(int c) { + int pos; + + // Cursor position: col + 80*row. + outb(CRTPORT, 14); + pos = inb(CRTPORT + 1) << 8; + outb(CRTPORT, 15); + pos |= inb(CRTPORT + 1); + + if (c == '\n') { + pos += 80 - pos % 80; + } + else if (c == BACKSPACE) { + if (pos > 0) { + --pos; + } + } + else { + crt[pos++] = (c & 0xff) | 0x0700; // black on white + + } + if (pos < 0 || pos > 25 * 80) { + panic("pos under/overflow"); + } + + if ((pos / 80) >= 24) { // Scroll up. + memmove(crt, crt + 80, sizeof(crt[0]) * 23 * 80); + pos -= 80; + memset(crt + pos, 0, sizeof(crt[0]) * (24 * 80 - pos)); + } + + outb(CRTPORT, 14); + outb(CRTPORT + 1, pos >> 8); + outb(CRTPORT, 15); + outb(CRTPORT + 1, pos); + crt[pos] = ' ' | 0x0700; +} + +void consputc(int c) { + if (panicked) { + cli(); + for (;;) { + ; + } + } + + if (c == BACKSPACE) { + uartputc('\b'); + uartputc(' '); + uartputc('\b'); + } + else { + uartputc(c); + } + cgaputc(c); +} + +int consoleget(void) { + int c; + + acquire(&cons.lock); + + while ((c = kbdgetc()) <= 0) { + if (c == 0) { + c = kbdgetc(); + } + } + + release(&cons.lock); + + return c; +} + +void consoleintr(int (*getc)(void)) { + int c, doprocdump = 0; + + acquire(&cons.lock); + while ((c = getc()) >= 0) { + switch (c) { + case C('P'): // Process listing. + // procdump() locks cons.lock indirectly; invoke later + doprocdump = 1; + break; + case C('U'): // Kill line. + while (input->e != input->w && + input->buf[(input->e - 1) % INPUT_BUF] != '\n') { + input->e--; + consputc(BACKSPACE); + } + break; + case C('H'): + case '\x7f': // Backspace + if (input->e != input->w) { + input->e--; + consputc(BACKSPACE); + } + break; + default: + if (c != 0 && input->e - input->r < INPUT_BUF) { + c = (c == '\r') ? '\n' : c; + input->buf[input->e++ % INPUT_BUF] = c; + consputc(c); + if (c == '\n' || c == C('D') || input->e == input->r + INPUT_BUF) { + input->w = input->e; + wakeup(&(input->r)); + } + } + break; + } + } + release(&cons.lock); + if (doprocdump) { + procdump(); // now call procdump() wo. cons.lock held + } +} + +int consoleread(struct inode *ip, char *dst, int n) { + uint target; + int c; + + iunlock(ip); + target = n; + acquire(&cons.lock); + while (n > 0) { + while (input->r == input->w) { + if (myproc()->killed) { + release(&cons.lock); + ilock(ip); + return -1; + } + sleep(&(input->r), &cons.lock); + } + c = input->buf[input->r++ % INPUT_BUF]; + if (c == C('D')) { // EOF + if (n < target) { + // Save ^D for next time, to make sure + // caller gets a 0-byte result. + input->r--; + } + break; + } + *dst++ = c; + --n; + if (c == '\n') { + break; + } + } + release(&cons.lock); + ilock(ip); + + return target - n; +} + +int consolewrite(struct inode *ip, char *buf, int n) { + int i; + + iunlock(ip); + acquire(&cons.lock); + for (i = 0; i < n; i++) { + consputc(buf[i] & 0xff); + } + release(&cons.lock); + ilock(ip); + + return n; +} + +void consoleinit(void) { + initlock(&cons.lock, "console"); + + // Initialise pointer to point to our console input buffer + input = &inputBuffer; + + devsw[CONSOLE].write = consolewrite; + devsw[CONSOLE].read = consoleread; + cons.locking = 1; + + ioapicenable(IRQ_KBD, 0); +} + diff --git a/cuth b/cuth new file mode 100644 index 0000000..cce8c0c --- /dev/null +++ b/cuth @@ -0,0 +1,48 @@ +#!/usr/bin/perl + +$| = 1; + +sub writefile($@){ + my ($file, @lines) = @_; + + sleep(1); + open(F, ">$file") || die "open >$file: $!"; + print F @lines; + close(F); +} + +# Cut out #include lines that don't contribute anything. +for($i=0; $i<@ARGV; $i++){ + $file = $ARGV[$i]; + if(!open(F, $file)){ + print STDERR "open $file: $!\n"; + next; + } + @lines = ; + close(F); + + $obj = "$file.o"; + $obj =~ s/\.c\.o$/.o/; + system("touch $file"); + + if(system("make CC='gcc -Werror' $obj >/dev/null 2>\&1") != 0){ + print STDERR "make $obj failed: $rv\n"; + next; + } + + system("cp $file =$file"); + for($j=@lines-1; $j>=0; $j--){ + if($lines[$j] =~ /^#include/){ + $old = $lines[$j]; + $lines[$j] = "/* CUT-H */\n"; + writefile($file, @lines); + if(system("make CC='gcc -Werror' $obj >/dev/null 2>\&1") != 0){ + $lines[$j] = $old; + }else{ + print STDERR "$file $old"; + } + } + } + writefile($file, grep {!/CUT-H/} @lines); + system("rm =$file"); +} diff --git a/date.h b/date.h new file mode 100644 index 0000000..f2e81b0 --- /dev/null +++ b/date.h @@ -0,0 +1,8 @@ +struct rtcdate { + uint second; + uint minute; + uint hour; + uint day; + uint month; + uint year; +}; diff --git a/defs.h b/defs.h new file mode 100644 index 0000000..02710a1 --- /dev/null +++ b/defs.h @@ -0,0 +1,191 @@ +struct buf; +struct context; +struct file; +struct inode; +struct pipe; +struct proc; +struct rtcdate; +struct spinlock; +struct sleeplock; +struct stat; +struct superblock; + +// bio.c +void binit(void); +struct buf* bread(uint, uint); +void brelse(struct buf*); +void bwrite(struct buf*); + +// console.c +void consoleinit(void); +void cprintf(char*, ...); +void consoleintr(int (*)(void)); +int consoleget(void); +void panic(char*) __attribute__((noreturn)); + +// exec.c +int exec(char*, char**); + +// file.c +struct file* filealloc(void); +void fileclose(struct file*); +struct file* filedup(struct file*); +void fileinit(void); +int fileread(struct file*, char*, int n); +int filestat(struct file*, struct stat*); +int filewrite(struct file*, char*, int n); + +// fs.c +void readsb(int dev, struct superblock *sb); +int dirlink(struct inode*, char*, uint); +struct inode* dirlookup(struct inode*, char*, uint*); +struct inode* ialloc(uint, short); +struct inode* idup(struct inode*); +void iinit(int dev); +void ilock(struct inode*); +void iput(struct inode*); +void iunlock(struct inode*); +void iunlockput(struct inode*); +void iupdate(struct inode*); +int namecmp(const char*, const char*); +struct inode* namei(char*); +struct inode* nameiparent(char*, char*); +int readi(struct inode*, char*, uint, uint); +void stati(struct inode*, struct stat*); +int writei(struct inode*, char*, uint, uint); + +// ide.c +void ideinit(void); +void ideintr(void); +void iderw(struct buf*); + +// ioapic.c +void ioapicenable(int irq, int cpu); +extern uchar ioapicid; +void ioapicinit(void); + +// kalloc.c +char* kalloc(void); +void kfree(char*); +void kinit1(void*, void*); +void kinit2(void*, void*); + +// kbd.c +void kbdintr(void); +int kbdgetc(void); + +// lapic.c +void cmostime(struct rtcdate *r); +int lapicid(void); +extern volatile uint* lapic; +void lapiceoi(void); +void lapicinit(void); +void lapicstartap(uchar, uint); +void microdelay(int); + +// log.c +void initlog(int dev); +void log_write(struct buf*); +void begin_op(); +void end_op(); + +// mp.c +extern int ismp; +void mpinit(void); + +// picirq.c +void picenable(int); +void picinit(void); + +// pipe.c +int pipealloc(struct file**, struct file**); +void pipeclose(struct pipe*, int); +int piperead(struct pipe*, char*, int); +int pipewrite(struct pipe*, char*, int); + +// proc.c +int cpuid(void); +void exit(void); +int fork(void); +int growproc(int); +int kill(int); +struct cpu* mycpu(void); +struct proc* myproc(); +void pinit(void); +void procdump(void); +void scheduler(void) __attribute__((noreturn)); +void sched(void); +void setproc(struct proc*); +void sleep(void*, struct spinlock*); +void userinit(void); +int wait(void); +void wakeup(void*); +void yield(void); + +// swtch.S +void swtch(struct context**, struct context*); + +// spinlock.c +void acquire(struct spinlock*); +void getcallerpcs(void*, uint*); +int holding(struct spinlock*); +void initlock(struct spinlock*, char*); +void release(struct spinlock*); +void pushcli(void); +void popcli(void); + +// sleeplock.c +void acquiresleep(struct sleeplock*); +void releasesleep(struct sleeplock*); +int holdingsleep(struct sleeplock*); +void initsleeplock(struct sleeplock*, char*); + +// string.c +int memcmp(const void*, const void*, uint); +void* memmove(void*, const void*, uint); +void* memset(void*, int, uint); +char* safestrcpy(char*, const char*, int); +int strlen(const char*); +int strncmp(const char*, const char*, uint); +char* strncpy(char*, const char*, int); + +// syscall.c +int argint(int, int*); +int argptr(int, char**, int); +int argstr(int, char**); +int fetchint(uint, int*); +int fetchstr(uint, char**); +void syscall(void); + +// timer.c +void timerinit(void); + +// trap.c +void idtinit(void); +extern uint ticks; +void tvinit(void); +extern struct spinlock tickslock; + +// uart.c +void uartinit(void); +void uartintr(void); +void uartputc(int); + +// vm.c +void seginit(void); +void kvmalloc(void); +pde_t* setupkvm(void); +char* uva2ka(pde_t*, char*); +int allocuvm(pde_t*, uint, uint); +int deallocuvm(pde_t*, uint, uint); +void freevm(pde_t*); +void inituvm(pde_t*, char*, uint); +int loaduvm(pde_t*, char*, struct inode*, uint, uint); +pde_t* copyuvm(pde_t*, uint); +void switchuvm(struct proc*); +void switchkvm(void); +int copyout(pde_t*, uint, void*, uint); +void clearpteu(pde_t *pgdir, char *uva); + +// number of elements in fixed-size array +#define NELEM(x) (sizeof(x) / sizeof((x)[0])) diff --git a/echo.c b/echo.c new file mode 100644 index 0000000..89b105c --- /dev/null +++ b/echo.c @@ -0,0 +1,12 @@ +#include "types.h" +#include "stat.h" +#include "user.h" + +int main(int argc, char *argv[]) { + int i; + + for (i = 1; i < argc; i++) { + printf(1, "%s%s", argv[i], i + 1 < argc ? " " : "\n"); + } + exit(); +} diff --git a/elf.h b/elf.h new file mode 100644 index 0000000..0a312a5 --- /dev/null +++ b/elf.h @@ -0,0 +1,42 @@ +// Format of an ELF executable file + +#define ELF_MAGIC 0x464C457FU // "\x7FELF" in little endian + +// File header +struct elfhdr { + uint magic; // must equal ELF_MAGIC + uchar elf[12]; + ushort type; + ushort machine; + uint version; + uint entry; + uint phoff; + uint shoff; + uint flags; + ushort ehsize; + ushort phentsize; + ushort phnum; + ushort shentsize; + ushort shnum; + ushort shstrndx; +}; + +// Program section header +struct proghdr { + uint type; + uint off; + uint vaddr; + uint paddr; + uint filesz; + uint memsz; + uint flags; + uint align; +}; + +// Values for Proghdr type +#define ELF_PROG_LOAD 1 + +// Flag bits for Proghdr flags +#define ELF_PROG_FLAG_EXEC 1 +#define ELF_PROG_FLAG_WRITE 2 +#define ELF_PROG_FLAG_READ 4 diff --git a/entry.S b/entry.S new file mode 100644 index 0000000..2d9504d --- /dev/null +++ b/entry.S @@ -0,0 +1,68 @@ +# The xv6 kernel starts executing in this file. This file is linked with +# the kernel C code, so it can refer to kernel symbols such as main(). +# The boot block (bootasm.S and bootmain.c) jumps to entry below. + +# Multiboot header, for multiboot boot loaders like GNU Grub. +# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html +# +# Using GRUB 2, you can boot xv6 from a file stored in a +# Linux file system by copying kernel or kernelmemfs to /boot +# and then adding this menu entry: +# +# menuentry "xv6" { +# insmod ext2 +# set root='(hd0,msdos1)' +# set kernel='/boot/kernel' +# echo "Loading ${kernel}..." +# multiboot ${kernel} ${kernel} +# boot +# } + +#include "asm.h" +#include "memlayout.h" +#include "mmu.h" +#include "param.h" + +# Multiboot header. Data to direct multiboot loader. +.p2align 2 +.text +.globl multiboot_header +multiboot_header: + #define magic 0x1badb002 + #define flags 0 + .long magic + .long flags + .long (-magic-flags) + +# By convention, the _start symbol specifies the ELF entry point. +# Since we haven't set up virtual memory yet, our entry point is +# the physical address of 'entry'. +.globl _start +_start = V2P_WO(entry) + +# Entering xv6 on boot processor, with paging off. +.globl entry +entry: + # Turn on page size extension for 4Mbyte pages + movl %cr4, %eax + orl $(CR4_PSE), %eax + movl %eax, %cr4 + # Set page directory + movl $(V2P_WO(entrypgdir)), %eax + movl %eax, %cr3 + # Turn on paging. + movl %cr0, %eax + orl $(CR0_PG|CR0_WP), %eax + movl %eax, %cr0 + + # Set up the stack pointer. + movl $(stack + KSTACKSIZE), %esp + + # Jump to main(), and switch to executing at + # high addresses. The indirect call is needed because + # the assembler produces a PC-relative instruction + # for a direct jump. + mov $main, %eax + jmp *%eax + +.comm stack, KSTACKSIZE diff --git a/entryother.S b/entryother.S new file mode 100644 index 0000000..93aea8e --- /dev/null +++ b/entryother.S @@ -0,0 +1,89 @@ +#include "asm.h" +#include "memlayout.h" +#include "mmu.h" + +# Each non-boot CPU ("AP") is started up in response to a STARTUP +# IPI from the boot CPU. Section B.4.2 of the Multi-Processor +# Specification says that the AP will start in real mode with CS:IP +# set to XY00:0000, where XY is an 8-bit value sent with the +# STARTUP. Thus this code must start at a 4096-byte boundary. +# +# Because this code sets DS to zero, it must sit +# at an address in the low 2^16 bytes. +# +# Startothers (in main.c) sends the STARTUPs one at a time. +# It copies this code (start) at 0x7000. It puts the address of +# a newly allocated per-core stack in start-4,the address of the +# place to jump to (mpenter) in start-8, and the physical address +# of entrypgdir in start-12. +# +# This code combines elements of bootasm.S and entry.S. + +.code16 +.globl start +start: + cli + + # Zero data segment registers DS, ES, and SS. + xorw %ax,%ax + movw %ax,%ds + movw %ax,%es + movw %ax,%ss + + # Switch from real to protected mode. Use a bootstrap GDT that makes + # virtual addresses map directly to physical addresses so that the + # effective memory map doesn't change during the transition. + lgdt gdtdesc + movl %cr0, %eax + orl $CR0_PE, %eax + movl %eax, %cr0 + + # Complete the transition to 32-bit protected mode by using a long jmp + # to reload %cs and %eip. The segment descriptors are set up with no + # translation, so that the mapping is still the identity mapping. + ljmpl $(SEG_KCODE<<3), $(start32) + + +.code32 # Tell assembler to generate 32-bit code now. +start32: + # Set up the protected-mode data segment registers + movw $(SEG_KDATA<<3), %ax # Our data segment selector + movw %ax, %ds # -> DS: Data Segment + movw %ax, %es # -> ES: Extra Segment + movw %ax, %ss # -> SS: Stack Segment + movw $0, %ax # Zero segments not ready for use + movw %ax, %fs # -> FS + movw %ax, %gs # -> GS + + # Turn on page size extension for 4Mbyte pages + movl %cr4, %eax + orl $(CR4_PSE), %eax + movl %eax, %cr4 + # Use entrypgdir as our initial page table + movl (start-12), %eax + movl %eax, %cr3 + # Turn on paging. + movl %cr0, %eax + orl $(CR0_PE|CR0_PG|CR0_WP), %eax + movl %eax, %cr0 + + # Switch to the stack allocated by startothers() + movl (start-4), %esp + # Call mpenter() + call *(start-8) + + # We should never return. +spin: + jmp spin + +.p2align 2 +gdt: + SEG_NULLASM + SEG_ASM(STA_X|STA_R, 0, 0xffffffff) + SEG_ASM(STA_W, 0, 0xffffffff) + + +gdtdesc: + .word (gdtdesc - gdt - 1) + .long gdt + diff --git a/exec.c b/exec.c new file mode 100644 index 0000000..73c54b1 --- /dev/null +++ b/exec.c @@ -0,0 +1,142 @@ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "defs.h" +#include "x86.h" +#include "elf.h" + +void cleanupexec(pde_t * pgdir, struct inode *ip) { + if (pgdir) { + freevm(pgdir); + } + if (ip) { + iunlockput(ip); + end_op(); + } +} + +int exec(char *path, char **argv) { + char *s, *last; + int i, off; + uint argc, sz, sp, ustack[3 + MAXARG + 1]; + struct elfhdr elf; + struct inode *ip; + struct proghdr ph; + pde_t *pgdir, *oldpgdir; + struct proc *curproc = myproc(); + + begin_op(); + + if ((ip = namei(path)) == 0) { + end_op(); + cprintf("exec: fail\n"); + return -1; + } + ilock(ip); + pgdir = 0; + + // Check ELF header + if (readi(ip, (char*)&elf, 0, sizeof(elf)) != sizeof(elf)) { + cleanupexec(pgdir, ip); + return -1; + } + if (elf.magic != ELF_MAGIC) { + cleanupexec(pgdir, ip); + return -1; + } + + if ((pgdir = setupkvm()) == 0) { + cleanupexec(pgdir, ip); + return -1; + } + + // Load program into memory. + sz = 0; + for (i = 0, off = elf.phoff; i < elf.phnum; i++, off += sizeof(ph)) { + if (readi(ip, (char*)&ph, off, sizeof(ph)) != sizeof(ph)) { + cleanupexec(pgdir, ip); + return -1; + } + if (ph.type != ELF_PROG_LOAD) { + continue; + } + if (ph.memsz < ph.filesz) { + cleanupexec(pgdir, ip); + return -1; + } + if (ph.vaddr + ph.memsz < ph.vaddr) { + cleanupexec(pgdir, ip); + return -1; + } + if ((sz = allocuvm(pgdir, sz, ph.vaddr + ph.memsz)) == 0) { + cleanupexec(pgdir, ip); + return -1; + } + if (ph.vaddr % PGSIZE != 0) { + cleanupexec(pgdir, ip); + return -1; + } + if (loaduvm(pgdir, (char*)ph.vaddr, ip, ph.off, ph.filesz) < 0) { + cleanupexec(pgdir, ip); + return -1; + } + } + iunlockput(ip); + end_op(); + ip = 0; + + // Allocate two pages at the next page boundary. + // Make the first inaccessible. Use the second as the user stack. + sz = PGROUNDUP(sz); + if ((sz = allocuvm(pgdir, sz, sz + 2 * PGSIZE)) == 0) { + cleanupexec(pgdir, ip); + return -1; + } + clearpteu(pgdir, (char*)(sz - 2 * PGSIZE)); + sp = sz; + + // Push argument strings, prepare rest of stack in ustack. + for (argc = 0; argv[argc]; argc++) { + if (argc >= MAXARG) { + cleanupexec(pgdir, ip); + return -1; + } + sp = (sp - (strlen(argv[argc]) + 1)) & ~3; + if (copyout(pgdir, sp, argv[argc], strlen(argv[argc]) + 1) < 0) { + cleanupexec(pgdir, ip); + return -1; + } + ustack[3 + argc] = sp; + } + ustack[3 + argc] = 0; + + ustack[0] = 0xffffffff; // fake return PC + ustack[1] = argc; + ustack[2] = sp - (argc + 1) * 4; // argv pointer + + sp -= (3 + argc + 1) * 4; + if (copyout(pgdir, sp, ustack, (3 + argc + 1) * 4) < 0) { + cleanupexec(pgdir, ip); + return -1; + } + + // Save program name for debugging. + for (last = s = path; *s; s++) { + if (*s == '/') { + last = s + 1; + } + } + safestrcpy(curproc->name, last, sizeof(curproc->name)); + + // Commit to the user image. + oldpgdir = curproc->pgdir; + curproc->pgdir = pgdir; + curproc->sz = sz; + curproc->tf->eip = elf.entry; // main + curproc->tf->esp = sp; + switchuvm(curproc); + freevm(oldpgdir); + return 0; +} diff --git a/fcntl.h b/fcntl.h new file mode 100644 index 0000000..d565483 --- /dev/null +++ b/fcntl.h @@ -0,0 +1,4 @@ +#define O_RDONLY 0x000 +#define O_WRONLY 0x001 +#define O_RDWR 0x002 +#define O_CREATE 0x200 diff --git a/file.c b/file.c new file mode 100644 index 0000000..bbd83ab --- /dev/null +++ b/file.c @@ -0,0 +1,155 @@ +// +// File descriptors +// + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "fs.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "file.h" + +struct devsw devsw[NDEV]; +struct { + struct spinlock lock; + struct file file[NFILE]; +} ftable; + +void fileinit(void) { + initlock(&ftable.lock, "ftable"); +} + +// Allocate a file structure. +struct file* filealloc(void) { + struct file *f; + + acquire(&ftable.lock); + for (f = ftable.file; f < ftable.file + NFILE; f++) { + if (f->ref == 0) { + f->ref = 1; + release(&ftable.lock); + return f; + } + } + release(&ftable.lock); + return 0; +} + +// Increment ref count for file f. +struct file* filedup(struct file *f) { + acquire(&ftable.lock); + if (f->ref < 1) { + panic("filedup"); + } + f->ref++; + release(&ftable.lock); + return f; +} + +// Close file f. (Decrement ref count, close when reaches 0.) +void fileclose(struct file *f) { + struct file ff; + + acquire(&ftable.lock); + if (f->ref < 1) { + panic("fileclose"); + } + if (--f->ref > 0) { + release(&ftable.lock); + return; + } + ff = *f; + f->ref = 0; + f->type = FD_NONE; + release(&ftable.lock); + + if (ff.type == FD_PIPE) { + pipeclose(ff.pipe, ff.writable); + } + else if (ff.type == FD_INODE) { + begin_op(); + iput(ff.ip); + end_op(); + } +} + +// Get metadata about file f. +int filestat(struct file *f, struct stat *st) { + if (f->type == FD_INODE) { + ilock(f->ip); + stati(f->ip, st); + iunlock(f->ip); + return 0; + } + return -1; +} + +// Read from file f. +int fileread(struct file *f, char *addr, int n) { + int r; + + if (f->readable == 0) { + return -1; + } + if (f->type == FD_PIPE) { + return piperead(f->pipe, addr, n); + } + if (f->type == FD_INODE) { + ilock(f->ip); + if ((r = readi(f->ip, addr, f->off, n)) > 0) { + f->off += r; + } + iunlock(f->ip); + return r; + } + panic("fileread"); +} + + +// Write to file f. +int filewrite(struct file *f, char *addr, int n) { + int r; + + if (f->writable == 0) { + return -1; + } + if (f->type == FD_PIPE) { + return pipewrite(f->pipe, addr, n); + } + if (f->type == FD_INODE) { + // write a few blocks at a time to avoid exceeding + // the maximum log transaction size, including + // i-node, indirect block, allocation blocks, + // and 2 blocks of slop for non-aligned writes. + // this really belongs lower down, since writei() + // might be writing a device like the console. + int max = ((MAXOPBLOCKS - 1 - 1 - 2) / 2) * 512; + int i = 0; + while (i < n) { + int n1 = n - i; + if (n1 > max) { + n1 = max; + } + + begin_op(); + ilock(f->ip); + if ((r = writei(f->ip, addr + i, f->off, n1)) > 0) { + f->off += r; + } + iunlock(f->ip); + end_op(); + + if (r < 0) { + break; + } + if (r != n1) { + panic("short filewrite"); + } + i += r; + } + return i == n ? n : -1; + } + panic("filewrite"); +} + diff --git a/file.h b/file.h new file mode 100644 index 0000000..7ffe18b --- /dev/null +++ b/file.h @@ -0,0 +1,37 @@ +struct file { + enum { FD_NONE, FD_PIPE, FD_INODE } type; + int ref; // reference count + char readable; + char writable; + struct pipe *pipe; + struct inode *ip; + uint off; +}; + + +// in-memory copy of an inode +struct inode { + uint dev; // Device number + uint inum; // Inode number + int ref; // Reference count + struct sleeplock lock; // protects everything below here + int valid; // inode has been read from disk? + + short type; // copy of disk inode + short major; + short minor; + short nlink; + uint size; + uint addrs[NDIRECT + 1]; +}; + +// table mapping major device number to +// device functions +struct devsw { + int (*read)(struct inode*, char*, int); + int (*write)(struct inode*, char*, int); +}; + +extern struct devsw devsw[]; + +#define CONSOLE 1 diff --git a/forktest.c b/forktest.c new file mode 100644 index 0000000..89c6d91 --- /dev/null +++ b/forktest.c @@ -0,0 +1,52 @@ +// Test that fork fails gracefully. +// Tiny executable so that the limit can be filling the proc table. + +#include "types.h" +#include "stat.h" +#include "user.h" + +#define N 1000 + +void printf(int fd, const char *s, ...) { + write(fd, s, strlen(s)); +} + +void forktest(void) { + int n, pid; + + printf(1, "fork test\n"); + + for (n = 0; n < N; n++) { + pid = fork(); + if (pid < 0) { + break; + } + if (pid == 0) { + exit(); + } + } + + if (n == N) { + printf(1, "fork claimed to work N times!\n", N); + exit(); + } + + for (; n > 0; n--) { + if (wait() < 0) { + printf(1, "wait stopped early\n"); + exit(); + } + } + + if (wait() != -1) { + printf(1, "wait got too many\n"); + exit(); + } + + printf(1, "fork test OK\n"); +} + +int main(int argc, char* argv[]) { + forktest(); + exit(); +} diff --git a/fs.c b/fs.c new file mode 100644 index 0000000..9ada0f2 --- /dev/null +++ b/fs.c @@ -0,0 +1,649 @@ +// File system implementation. Five layers: +// + Blocks: allocator for raw disk blocks. +// + Log: crash recovery for multi-step updates. +// + Files: inode allocator, reading, writing, metadata. +// + Directories: inode with special contents (list of other inodes!) +// + Names: paths like /usr/rtm/xv6/fs.c for convenient naming. +// +// This file contains the low-level file system manipulation +// routines. The (higher-level) system call implementations +// are in sysfile.c. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "stat.h" +#include "mmu.h" +#include "proc.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" +#include "file.h" + +#define min(a, b) ((a) < (b) ? (a) : (b)) + +static void itrunc(struct inode*); +// there should be one superblock per disk device, but we run with +// only one device +struct superblock sb; + +// Read the super block. +void readsb(int dev, struct superblock *sb) { + struct buf *bp; + + bp = bread(dev, 1); + memmove(sb, bp->data, sizeof(*sb)); + brelse(bp); +} + +// Zero a block. +static void bzero(int dev, int bno) { + struct buf *bp; + + bp = bread(dev, bno); + memset(bp->data, 0, BSIZE); + log_write(bp); + brelse(bp); +} + +// Blocks. + +// Allocate a zeroed disk block. +static uint balloc(uint dev) { + int b, bi, m; + struct buf *bp; + + bp = 0; + for (b = 0; b < sb.size; b += BPB) { + bp = bread(dev, BBLOCK(b, sb)); + for (bi = 0; bi < BPB && b + bi < sb.size; bi++) { + m = 1 << (bi % 8); + if ((bp->data[bi / 8] & m) == 0) { // Is block free? + bp->data[bi / 8] |= m; // Mark block in use. + log_write(bp); + brelse(bp); + bzero(dev, b + bi); + return b + bi; + } + } + brelse(bp); + } + panic("balloc: out of blocks"); +} + +// Free a disk block. +static void bfree(int dev, uint b) { + struct buf *bp; + int bi, m; + + bp = bread(dev, BBLOCK(b, sb)); + bi = b % BPB; + m = 1 << (bi % 8); + if ((bp->data[bi / 8] & m) == 0) { + panic("freeing free block"); + } + bp->data[bi / 8] &= ~m; + log_write(bp); + brelse(bp); +} + +// Inodes. +// +// An inode describes a single unnamed file. +// The inode disk structure holds metadata: the file's type, +// its size, the number of links referring to it, and the +// list of blocks holding the file's content. +// +// The inodes are laid out sequentially on disk at +// sb.startinode. Each inode has a number, indicating its +// position on the disk. +// +// The kernel keeps a cache of in-use inodes in memory +// to provide a place for synchronizing access +// to inodes used by multiple processes. The cached +// inodes include book-keeping information that is +// not stored on disk: ip->ref and ip->valid. +// +// An inode and its in-memory representation go through a +// sequence of states before they can be used by the +// rest of the file system code. +// +// * Allocation: an inode is allocated if its type (on disk) +// is non-zero. ialloc() allocates, and iput() frees if +// the reference and link counts have fallen to zero. +// +// * Referencing in cache: an entry in the inode cache +// is free if ip->ref is zero. Otherwise ip->ref tracks +// the number of in-memory pointers to the entry (open +// files and current directories). iget() finds or +// creates a cache entry and increments its ref; iput() +// decrements ref. +// +// * Valid: the information (type, size, &c) in an inode +// cache entry is only correct when ip->valid is 1. +// ilock() reads the inode from +// the disk and sets ip->valid, while iput() clears +// ip->valid if ip->ref has fallen to zero. +// +// * Locked: file system code may only examine and modify +// the information in an inode and its content if it +// has first locked the inode. +// +// Thus a typical sequence is: +// ip = iget(dev, inum) +// ilock(ip) +// ... examine and modify ip->xxx ... +// iunlock(ip) +// iput(ip) +// +// ilock() is separate from iget() so that system calls can +// get a long-term reference to an inode (as for an open file) +// and only lock it for short periods (e.g., in read()). +// The separation also helps avoid deadlock and races during +// pathname lookup. iget() increments ip->ref so that the inode +// stays cached and pointers to it remain valid. +// +// Many internal file system functions expect the caller to +// have locked the inodes involved; this lets callers create +// multi-step atomic operations. +// +// The icache.lock spin-lock protects the allocation of icache +// entries. Since ip->ref indicates whether an entry is free, +// and ip->dev and ip->inum indicate which i-node an entry +// holds, one must hold icache.lock while using any of those fields. +// +// An ip->lock sleep-lock protects all ip-> fields other than ref, +// dev, and inum. One must hold ip->lock in order to +// read or write that inode's ip->valid, ip->size, ip->type, &c. + +struct { + struct spinlock lock; + struct inode inode[NINODE]; +} icache; + +void iinit(int dev) { + int i = 0; + + initlock(&icache.lock, "icache"); + for (i = 0; i < NINODE; i++) { + initsleeplock(&icache.inode[i].lock, "inode"); + } + + readsb(dev, &sb); + cprintf("sb: size %d nblocks %d ninodes %d nlog %d logstart %d\ + inodestart %d bmap start %d\n", sb.size, sb.nblocks, + sb.ninodes, sb.nlog, sb.logstart, sb.inodestart, + sb.bmapstart); +} + +static struct inode* iget(uint dev, uint inum); + + +// Allocate an inode on device dev. +// Mark it as allocated by giving it type type. +// Returns an unlocked but allocated and referenced inode. +struct inode* ialloc(uint dev, short type) { + int inum; + struct buf *bp; + struct dinode *dip; + + for (inum = 1; inum < sb.ninodes; inum++) { + bp = bread(dev, IBLOCK(inum, sb)); + dip = (struct dinode*)bp->data + inum % IPB; + if (dip->type == 0) { // a free inode + memset(dip, 0, sizeof(*dip)); + dip->type = type; + log_write(bp); // mark it allocated on the disk + brelse(bp); + return iget(dev, inum); + } + brelse(bp); + } + panic("ialloc: no inodes"); +} + +// Copy a modified in-memory inode to disk. +// Must be called after every change to an ip->xxx field +// that lives on disk, since i-node cache is write-through. +// Caller must hold ip->lock. +void iupdate(struct inode *ip) { + struct buf *bp; + struct dinode *dip; + + bp = bread(ip->dev, IBLOCK(ip->inum, sb)); + dip = (struct dinode*)bp->data + ip->inum % IPB; + dip->type = ip->type; + dip->major = ip->major; + dip->minor = ip->minor; + dip->nlink = ip->nlink; + dip->size = ip->size; + memmove(dip->addrs, ip->addrs, sizeof(ip->addrs)); + log_write(bp); + brelse(bp); +} + +// Find the inode with number inum on device dev +// and return the in-memory copy. Does not lock +// the inode and does not read it from disk. +static struct inode* iget(uint dev, uint inum) { + struct inode *ip, *empty; + + acquire(&icache.lock); + + // Is the inode already cached? + empty = 0; + for (ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++) { + if (ip->ref > 0 && ip->dev == dev && ip->inum == inum) { + ip->ref++; + release(&icache.lock); + return ip; + } + if (empty == 0 && ip->ref == 0) { // Remember empty slot. + empty = ip; + } + } + + // Recycle an inode cache entry. + if (empty == 0) { + panic("iget: no inodes"); + } + + ip = empty; + ip->dev = dev; + ip->inum = inum; + ip->ref = 1; + ip->valid = 0; + release(&icache.lock); + + return ip; +} + +// Increment reference count for ip. +// Returns ip to enable ip = idup(ip1) idiom. +struct inode* idup(struct inode *ip) { + acquire(&icache.lock); + ip->ref++; + release(&icache.lock); + return ip; +} + +// Lock the given inode. +// Reads the inode from disk if necessary. +void ilock(struct inode *ip) { + struct buf *bp; + struct dinode *dip; + + if (ip == 0 || ip->ref < 1) { + panic("ilock"); + } + + acquiresleep(&ip->lock); + + if (ip->valid == 0) { + bp = bread(ip->dev, IBLOCK(ip->inum, sb)); + dip = (struct dinode*)bp->data + ip->inum % IPB; + ip->type = dip->type; + ip->major = dip->major; + ip->minor = dip->minor; + ip->nlink = dip->nlink; + ip->size = dip->size; + memmove(ip->addrs, dip->addrs, sizeof(ip->addrs)); + brelse(bp); + ip->valid = 1; + if (ip->type == 0) { + panic("ilock: no type"); + } + } +} + +// Unlock the given inode. +void iunlock(struct inode *ip) { + if (ip == 0 || !holdingsleep(&ip->lock) || ip->ref < 1) { + panic("iunlock"); + } + + releasesleep(&ip->lock); +} + +// Drop a reference to an in-memory inode. +// If that was the last reference, the inode cache entry can +// be recycled. +// If that was the last reference and the inode has no links +// to it, free the inode (and its content) on disk. +// All calls to iput() must be inside a transaction in +// case it has to free the inode. +void iput(struct inode *ip) { + acquiresleep(&ip->lock); + if (ip->valid && ip->nlink == 0) { + acquire(&icache.lock); + int r = ip->ref; + release(&icache.lock); + if (r == 1) { + // inode has no links and no other references: truncate and free. + itrunc(ip); + ip->type = 0; + iupdate(ip); + ip->valid = 0; + } + } + releasesleep(&ip->lock); + + acquire(&icache.lock); + ip->ref--; + release(&icache.lock); +} + +// Common idiom: unlock, then put. +void iunlockput(struct inode *ip) { + iunlock(ip); + iput(ip); +} + + +// Inode content +// +// The content (data) associated with each inode is stored +// in blocks on the disk. The first NDIRECT block numbers +// are listed in ip->addrs[]. The next NINDIRECT blocks are +// listed in block ip->addrs[NDIRECT]. + +// Return the disk block address of the nth block in inode ip. +// If there is no such block, bmap allocates one. +static uint bmap(struct inode *ip, uint bn) { + uint addr, *a; + struct buf *bp; + + if (bn < NDIRECT) { + if ((addr = ip->addrs[bn]) == 0) { + ip->addrs[bn] = addr = balloc(ip->dev); + } + return addr; + } + bn -= NDIRECT; + + if (bn < NINDIRECT) { + // Load indirect block, allocating if necessary. + if ((addr = ip->addrs[NDIRECT]) == 0) { + ip->addrs[NDIRECT] = addr = balloc(ip->dev); + } + bp = bread(ip->dev, addr); + a = (uint*)bp->data; + if ((addr = a[bn]) == 0) { + a[bn] = addr = balloc(ip->dev); + log_write(bp); + } + brelse(bp); + return addr; + } + + panic("bmap: out of range"); +} + +// Truncate inode (discard contents). +// Only called when the inode has no links +// to it (no directory entries referring to it) +// and has no in-memory reference to it (is +// not an open file or current directory). +static void itrunc(struct inode *ip) { + int i, j; + struct buf *bp; + uint *a; + + for (i = 0; i < NDIRECT; i++) { + if (ip->addrs[i]) { + bfree(ip->dev, ip->addrs[i]); + ip->addrs[i] = 0; + } + } + + if (ip->addrs[NDIRECT]) { + bp = bread(ip->dev, ip->addrs[NDIRECT]); + a = (uint*)bp->data; + for (j = 0; j < NINDIRECT; j++) { + if (a[j]) { + bfree(ip->dev, a[j]); + } + } + brelse(bp); + bfree(ip->dev, ip->addrs[NDIRECT]); + ip->addrs[NDIRECT] = 0; + } + + ip->size = 0; + iupdate(ip); +} + +// Copy stat information from inode. +// Caller must hold ip->lock. +void stati(struct inode *ip, struct stat *st) { + st->dev = ip->dev; + st->ino = ip->inum; + st->type = ip->type; + st->nlink = ip->nlink; + st->size = ip->size; +} + + +// Read data from inode. +// Caller must hold ip->lock. +int readi(struct inode *ip, char *dst, uint off, uint n) { + uint tot, m; + struct buf *bp; + + if (ip->type == T_DEV) { + if (ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read) { + return -1; + } + return devsw[ip->major].read(ip, dst, n); + } + + if (off > ip->size || off + n < off) { + return -1; + } + if (off + n > ip->size) { + n = ip->size - off; + } + + for (tot = 0; tot < n; tot += m, off += m, dst += m) { + bp = bread(ip->dev, bmap(ip, off / BSIZE)); + m = min(n - tot, BSIZE - off % BSIZE); + memmove(dst, bp->data + off % BSIZE, m); + brelse(bp); + } + return n; +} + +// Write data to inode. +// Caller must hold ip->lock. +int writei(struct inode *ip, char *src, uint off, uint n) { + uint tot, m; + struct buf *bp; + + if (ip->type == T_DEV) { + if (ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write) { + return -1; + } + return devsw[ip->major].write(ip, src, n); + } + + if (off > ip->size || off + n < off) { + return -1; + } + if (off + n > MAXFILE * BSIZE) { + return -1; + } + + for (tot = 0; tot < n; tot += m, off += m, src += m) { + bp = bread(ip->dev, bmap(ip, off / BSIZE)); + m = min(n - tot, BSIZE - off % BSIZE); + memmove(bp->data + off % BSIZE, src, m); + log_write(bp); + brelse(bp); + } + + if (n > 0 && off > ip->size) { + ip->size = off; + iupdate(ip); + } + return n; +} + + +// Directories + +int namecmp(const char *s, const char *t) { + return strncmp(s, t, DIRSIZ); +} + +// Look for a directory entry in a directory. +// If found, set *poff to byte offset of entry. +struct inode* dirlookup(struct inode *dp, char *name, uint *poff) { + uint off, inum; + struct dirent de; + + if (dp->type != T_DIR) { + panic("dirlookup not DIR"); + } + + for (off = 0; off < dp->size; off += sizeof(de)) { + if (readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) { + panic("dirlookup read"); + } + if (de.inum == 0) { + continue; + } + if (namecmp(name, de.name) == 0) { + // entry matches path element + if (poff) { + *poff = off; + } + inum = de.inum; + return iget(dp->dev, inum); + } + } + + return 0; +} + +// Write a new directory entry (name, inum) into the directory dp. +int dirlink(struct inode *dp, char *name, uint inum) { + int off; + struct dirent de; + struct inode *ip; + + // Check that name is not present. + if ((ip = dirlookup(dp, name, 0)) != 0) { + iput(ip); + return -1; + } + + // Look for an empty dirent. + for (off = 0; off < dp->size; off += sizeof(de)) { + if (readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) { + panic("dirlink read"); + } + if (de.inum == 0) { + break; + } + } + + strncpy(de.name, name, DIRSIZ); + de.inum = inum; + if (writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) { + panic("dirlink"); + } + + return 0; +} + + +// Paths + +// Copy the next path element from path into name. +// Return a pointer to the element following the copied one. +// The returned path has no leading slashes, +// so the caller can check *path=='\0' to see if the name is the last one. +// If no name to remove, return 0. +// +// Examples: +// skipelem("a/bb/c", name) = "bb/c", setting name = "a" +// skipelem("///a//bb", name) = "bb", setting name = "a" +// skipelem("a", name) = "", setting name = "a" +// skipelem("", name) = skipelem("////", name) = 0 +// +static char* skipelem(char *path, char *name) { + char *s; + int len; + + while (*path == '/') { + path++; + } + if (*path == 0) { + return 0; + } + s = path; + while (*path != '/' && *path != 0) { + path++; + } + len = path - s; + if (len >= DIRSIZ) { + memmove(name, s, DIRSIZ); + } + else { + memmove(name, s, len); + name[len] = 0; + } + while (*path == '/') { + path++; + } + return path; +} + +// Look up and return the inode for a path name. +// If parent != 0, return the inode for the parent and copy the final +// path element into name, which must have room for DIRSIZ bytes. +// Must be called inside a transaction since it calls iput(). +static struct inode* namex(char *path, int nameiparent, char *name) { + struct inode *ip, *next; + + if (*path == '/') { + ip = iget(ROOTDEV, ROOTINO); + } + else { + ip = idup(myproc()->cwd); + } + + while ((path = skipelem(path, name)) != 0) { + ilock(ip); + if (ip->type != T_DIR) { + iunlockput(ip); + return 0; + } + if (nameiparent && *path == '\0') { + // Stop one level early. + iunlock(ip); + return ip; + } + if ((next = dirlookup(ip, name, 0)) == 0) { + iunlockput(ip); + return 0; + } + iunlockput(ip); + ip = next; + } + if (nameiparent) { + iput(ip); + return 0; + } + return ip; +} + +struct inode* namei(char *path) { + char name[DIRSIZ]; + return namex(path, 0, name); +} + +struct inode*nameiparent(char *path, char *name) { + return namex(path, 1, name); +} diff --git a/fs.h b/fs.h new file mode 100644 index 0000000..3651c7b --- /dev/null +++ b/fs.h @@ -0,0 +1,57 @@ +// On-disk file system format. +// Both the kernel and user programs use this header file. + + +#define ROOTINO 1 // root i-number +#define BSIZE 512 // block size + +// Disk layout: +// [ boot block | super block | log | inode blocks | +// free bit map | data blocks] +// +// mkfs computes the super block and builds an initial file system. The +// super block describes the disk layout: +struct superblock { + uint size; // Size of file system image (blocks) + uint nblocks; // Number of data blocks + uint ninodes; // Number of inodes. + uint nlog; // Number of log blocks + uint logstart; // Block number of first log block + uint inodestart; // Block number of first inode block + uint bmapstart; // Block number of first free map block +}; + +#define NDIRECT 12 +#define NINDIRECT (BSIZE / sizeof(uint)) +#define MAXFILE (NDIRECT + NINDIRECT) + +// On-disk inode structure +struct dinode { + short type; // File type + short major; // Major device number (T_DEV only) + short minor; // Minor device number (T_DEV only) + short nlink; // Number of links to inode in file system + uint size; // Size of file (bytes) + uint addrs[NDIRECT + 1]; // Data block addresses +}; + +// Inodes per block. +#define IPB (BSIZE / sizeof(struct dinode)) + +// Block containing inode i +#define IBLOCK(i, sb) ((i) / IPB + sb.inodestart) + +// Bitmap bits per block +#define BPB (BSIZE * 8) + +// Block of free map containing bit for block b +#define BBLOCK(b, sb) (b / BPB + sb.bmapstart) + +// Directory is a file containing a sequence of dirent structures. +#define DIRSIZ 14 + +struct dirent { + ushort inum; + char name[DIRSIZ]; +}; + diff --git a/gdbutil b/gdbutil new file mode 100644 index 0000000..e0c362f --- /dev/null +++ b/gdbutil @@ -0,0 +1,291 @@ +# -*- gdb-script -*- + +# Utility functions to pretty-print x86 segment/interrupt descriptors. +# To load this file, run "source gdbutil" in gdb. +# printdesc and printdescs are the main entry points. + +# IA32 2007, Volume 3A, Table 3-2 +set $STS_T16A = 0x1 +set $STS_LDT = 0x2 +set $STS_T16B = 0x3 +set $STS_CG16 = 0x4 +set $STS_TG = 0x5 +set $STS_IG16 = 0x6 +set $STS_TG16 = 0x7 +set $STS_T32A = 0x9 +set $STS_T32B = 0xB +set $STS_CG32 = 0xC +set $STS_IG32 = 0xE +set $STS_TG32 = 0xF + +define outputsts + while 1 + if $arg0 == $STS_T16A + echo STS_T16A + loop_break + end + if $arg0 == $STS_LDT + echo STS_LDT\ + loop_break + end + if $arg0 == $STS_T16B + echo STS_T16B + loop_break + end + if $arg0 == $STS_CG16 + echo STS_CG16 + loop_break + end + if $arg0 == $STS_TG + echo STS_TG\ \ + loop_break + end + if $arg0 == $STS_IG16 + echo STS_IG16 + loop_break + end + if $arg0 == $STS_TG16 + echo STS_TG16 + loop_break + end + if $arg0 == $STS_T32A + echo STS_T32A + loop_break + end + if $arg0 == $STS_T32B + echo STS_T32B + loop_break + end + if $arg0 == $STS_CG32 + echo STS_CG32 + loop_break + end + if $arg0 == $STS_IG32 + echo STS_IG32 + loop_break + end + if $arg0 == $STS_TG32 + echo STS_TG32 + loop_break + end + echo Reserved + loop_break + end +end + +# IA32 2007, Volume 3A, Table 3-1 +set $STA_X = 0x8 +set $STA_E = 0x4 +set $STA_C = 0x4 +set $STA_W = 0x2 +set $STA_R = 0x2 +set $STA_A = 0x1 + +define outputsta + if $arg0 & $STA_X + # Code segment + echo code + if $arg0 & $STA_C + echo |STA_C + end + if $arg0 & $STA_R + echo |STA_R + end + else + # Data segment + echo data + if $arg0 & $STA_E + echo |STA_E + end + if $arg0 & $STA_W + echo |STA_W + end + end + if $arg0 & $STA_A + echo |STA_A + else + printf " " + end +end + +# xv6-specific +set $SEG_KCODE = 1 +set $SEG_KDATA = 2 +set $SEG_KCPU = 3 +set $SEG_UCODE = 4 +set $SEG_UDATA = 5 +set $SEG_TSS = 6 + +define outputcs + if ($arg0 & 4) == 0 + if $arg0 >> 3 == $SEG_KCODE + printf "SEG_KCODE<<3" + end + if $arg0 >> 3 == $SEG_KDATA + printf "SEG_KDATA<<3" + end + if $arg0 >> 3 == $SEG_KCPU + printf "SEG_KCPU<<3" + end + if $arg0 >> 3 == $SEG_UCODE + printf "SEG_UCODE<<3" + end + if $arg0 >> 3 == $SEG_UDATA + printf "SEG_UDATA<<3" + end + if $arg0 >> 3 == $SEG_TSS + printf "SEG_TSS<<3" + end + if ($arg0 >> 3 < 1) + ($arg0 >> 3 > 6) + printf "GDT[%d]", $arg0 >> 3 + end + else + printf "LDT[%d]", $arg0 >> 3 + end + if ($arg0 & 3) > 0 + printf "|" + outputdpl ($arg0&3) + end +end + +define outputdpl + if $arg0 == 0 + printf "DPL_KERN" + else + if $arg0 == 3 + printf "DPL_USER" + else + printf "DPL%d", $arg0 + end + end +end + +define printdesc + if $argc != 1 + echo Usage: printdesc expr + else + _printdesc ((uint*)&($arg0))[0] ((uint*)&($arg0))[1] + printf "\n" + end +end + +document printdesc +Print an x86 segment or gate descriptor. +printdesc EXPR +EXPR must evaluate to a descriptor value. It can be of any C type. +end + +define _printdesc + _printdesc1 $arg0 $arg1 ($arg1>>15&1) ($arg1>>13&3) ($arg1>>12&1) ($arg1>>8&15) +end + +define _printdesc1 + # 2:P 3:DPL 4:S 5:Type + if $arg2 == 0 + printf "P = 0 (Not present)" + else + printf "type = " + if $arg4 == 0 + # System segment + outputsts $arg5 + printf " (0x%x) ", $arg5 + _printsysdesc $arg0 $arg1 $arg5 + else + # Code/data segment + outputsta $arg5 + printf " " + _printsegdesc $arg0 $arg1 + end + + printf " DPL = " + outputdpl $arg3 + printf " (%d)", $arg3 + end +end + +define _printsysdesc + # 2:Type + # GDB's || is buggy + if ($arg2 == $STS_TG) + (($arg2&7) == $STS_IG16) + (($arg2&7) == $STS_TG16) + # Gate descriptor + _printgate $arg2 ($arg0>>16) ($arg0&0xFFFF) ($arg1>>16) + else + # System segment descriptor + _printsegdesc $arg0 $arg1 + end +end + +define _printgate + # IA32 2007, Voume 3A, Figure 5-2 + # 0:Type 1:CS 2:Offset 15..0 3:Offset 31..16 + printf "CS = " + outputcs $arg1 + printf " (%d)", $arg1 + + if (($arg0&7) == $STS_IG16) + (($arg0&7) == $STS_TG16) + printf " Offset = " + output/a $arg3 << 16 | $arg2 + end +end + +define _printsegdesc + # IA32 20007, Volume 3A, Figure 3-8 and Figure 4-1 + _printsegdesc1 ($arg0>>16) ($arg1&0xFF) ($arg1>>24) ($arg0&0xFFFF) ($arg1>>16&15) ($arg1>>23&1) + if ($arg1>>12&1) == 1 + printf " AVL = %d", $arg1>>20&1 + if ($arg1>>11&1) == 0 + # Data segment + if ($arg1>>22&1) == 0 + printf " B = small (0) " + else + printf " B = big (1) " + end + else + # Code segment + printf " D = " + if ($arg1>>22&1) == 0 + printf "16-bit (0)" + else + printf "32-bit (1)" + end + end + end +end + +define _printsegdesc1 + # 0:Base 0..15 1:Base 16..23 2:Base 24..32 3:Limit 0..15 4:Limit 16..19 5:G + printf "base = 0x%08x", $arg0 | ($arg1<<16) | ($arg2<<24) + printf " limit = 0x" + if $arg5 == 0 + printf "%08x", $arg3 | ($arg4<<16) + else + printf "%08x", (($arg3 | ($arg4<<16)) << 12) | 0xFFF + end +end + +define printdescs + if $argc < 1 || $argc > 2 + echo Usage: printdescs expr [count] + else + if $argc == 1 + _printdescs ($arg0) (sizeof($arg0)/sizeof(($arg0)[0])) + else + _printdescs ($arg0) ($arg1) + end + end +end + +document printdescs +Print an array of x86 segment or gate descriptors. +printdescs EXPR [COUNT] +EXPR must evaluate to an array of descriptors. +end + +define _printdescs + set $i = 0 + while $i < $arg1 + printf "[%d] ", $i + printdesc $arg0[$i] + set $i = $i + 1 + end +end diff --git a/gensyscalls.pl b/gensyscalls.pl new file mode 100755 index 0000000..16b7975 --- /dev/null +++ b/gensyscalls.pl @@ -0,0 +1,102 @@ +#!/usr/bin/perl -w + +# Generate syscall.h, syscalltable.h or usys.S. These are the header and assembly +# files for system calls. +# +# Generating these files from one script avoids them getting out of sync. +# +# Specify an argument of -h to generate syscall.h +# Specify an argument of -c to generate syscalltable.h +# Specify an argument of -a to generate usys.S +# +# Note that you also need to update user.h with the declarations for these functions that +# user programs will use. This ensures that the C compiler generates the correct code to +# push the parameters on to the stack. + +my @syscalls = ( + "fork", + "exit", + "wait", + "pipe", + "read", + "kill", + "exec", + "fstat", + "chdir", + "dup", + "getpid", + "sbrk", + "sleep", + "uptime", + "open", + "write", + "mknod", + "unlink", + "link", + "mkdir", + "close", + "getch" + ); + +my $i; +if ($#ARGV == -1) +{ + print 'Error: No argument supplied to gensyscalls.pl'; + exit(1); +} +if (($ARGV[0] ne '-h') && ($ARGV[0] ne '-a') && ($ARGV[0] ne '-c')) +{ + print 'Error: Invalid argument to gensyscalls.pl'; + exit(1); +} +if ($ARGV[0] eq '-h'|| $ARGV[0] eq '-c') +{ + print "// Generated by gensyscalls.pl. Do not edit.\n"; + print "// To change syscall numbers or add new syscalls, edit gensyscalls.pl\n"; + print "\n"; +} +else +{ + print "# Generated by gensyscalls.pl. Do not edit.\n"; + print "# To change syscall numbers or add new syscalls, edit gensyscalls.pl\n"; + print "\n"; +} +for ($i = 0; $i < scalar(@syscalls); $i++) +{ + my $index = $i + 1; + if ($ARGV[0] eq '-h') + { + print "#define SYS_$syscalls[$i]\t\t$index\n"; + } + elsif ($ARGV[0] eq '-c') + { + print "extern int sys_$syscalls[$i](void);\n"; + } +} +if ($ARGV[0] eq '-a') +{ + print "#include \"syscall.h\"\n"; + print "#include \"traps.h\"\n"; + print "\n"; + print "#define SYSCALL(name) \\\n"; + print ".globl name; \\\n"; + print "name: \\\n"; + print "\tmovl\t\$SYS_ ## name, \%eax; \\\n"; + print "\tint\t\$T_SYSCALL; \\\n"; + print "\tret\n"; + print "\n"; + for ($i = 0; $i < scalar(@syscalls); $i++) + { + print "SYSCALL($syscalls[$i])\n"; + } +} +elsif ($ARGV[0] eq '-c') +{ + print "\n"; + print "static int(*syscalls[])(void) = {\n"; + for ($i = 0; $i < scalar(@syscalls); $i++) + { + print "[SYS_$syscalls[$i]]\tsys_$syscalls[$i],\n"; + } + print "};\n" +} diff --git a/grep.c b/grep.c new file mode 100644 index 0000000..a0aa833 --- /dev/null +++ b/grep.c @@ -0,0 +1,109 @@ +// Simple grep. Only supports ^ . * $ operators. + +#include "types.h" +#include "stat.h" +#include "user.h" + +char buf[1024]; +int match(char*, char*); + +void grep(char *pattern, int fd) { + int n, m; + char *p, *q; + + m = 0; + while ((n = read(fd, buf + m, sizeof(buf) - m - 1)) > 0) { + m += n; + buf[m] = '\0'; + p = buf; + while ((q = strchr(p, '\n')) != 0) { + *q = 0; + if (match(pattern, p)) { + *q = '\n'; + write(1, p, q + 1 - p); + } + p = q + 1; + } + if (p == buf) { + m = 0; + } + if (m > 0) { + m -= p - buf; + memmove(buf, p, m); + } + } +} + +int main(int argc, char *argv[]) { + int fd, i; + char *pattern; + + if (argc <= 1) { + printf(2, "usage: grep pattern [file ...]\n"); + exit(); + } + pattern = argv[1]; + + if (argc <= 2) { + grep(pattern, 0); + exit(); + } + + for (i = 2; i < argc; i++) { + if ((fd = open(argv[i], 0)) < 0) { + printf(1, "grep: cannot open %s\n", argv[i]); + exit(); + } + grep(pattern, fd); + close(fd); + } + exit(); +} + +// Regexp matcher from Kernighan & Pike, +// The Practice of Programming, Chapter 9. + +int matchhere(char*, char*); +int matchstar(int, char*, char*); + +int match(char *re, char *text) { + if (re[0] == '^') { + return matchhere(re + 1, text); + } + do { // must look at empty string + if (matchhere(re, text)) { + return 1; + } + } + while (*text++ != '\0'); + return 0; +} + +// matchhere: search for re at beginning of text +int matchhere(char *re, char *text){ + if (re[0] == '\0') { + return 1; + } + if (re[1] == '*') { + return matchstar(re[0], re + 2, text); + } + if (re[0] == '$' && re[1] == '\0') { + return *text == '\0'; + } + if (*text != '\0' && (re[0] == '.' || re[0] == *text)) { + return matchhere(re + 1, text + 1); + } + return 0; +} + +// matchstar: search for c*re at beginning of text +int matchstar(int c, char *re, char *text) { + do { // a * matches zero or more instances + if (matchhere(re, text)) { + return 1; + } + } + while (*text != '\0' && (*text++ == c || c == '.')); + return 0; +} + diff --git a/ide.c b/ide.c new file mode 100644 index 0000000..1278db5 --- /dev/null +++ b/ide.c @@ -0,0 +1,171 @@ +// Simple PIO-based (non-DMA) IDE driver code. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "x86.h" +#include "traps.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" + +#define SECTOR_SIZE 512 +#define IDE_BSY 0x80 +#define IDE_DRDY 0x40 +#define IDE_DF 0x20 +#define IDE_ERR 0x01 + +#define IDE_CMD_READ 0x20 +#define IDE_CMD_WRITE 0x30 +#define IDE_CMD_RDMUL 0xc4 +#define IDE_CMD_WRMUL 0xc5 + +// idequeue points to the buf now being read/written to the disk. +// idequeue->qnext points to the next buf to be processed. +// You must hold idelock while manipulating queue. + +static struct spinlock idelock; +static struct buf *idequeue; + +static int havedisk1; +static void idestart(struct buf*); + +// Wait for IDE disk to become ready. +static int idewait(int checkerr) { + int r; + + while (((r = inb(0x1f7)) & (IDE_BSY | IDE_DRDY)) != IDE_DRDY) { + ; + } + if (checkerr && (r & (IDE_DF | IDE_ERR)) != 0) { + return -1; + } + return 0; +} + +void ideinit(void) { + int i; + + initlock(&idelock, "ide"); + ioapicenable(IRQ_IDE, ncpu - 1); + idewait(0); + + // Check if disk 1 is present + outb(0x1f6, 0xe0 | (1 << 4)); + for (i = 0; i < 1000; i++) { + if (inb(0x1f7) != 0) { + havedisk1 = 1; + break; + } + } + + // Switch back to disk 0. + outb(0x1f6, 0xe0 | (0 << 4)); +} + +// Start the request for b. Caller must hold idelock. +static void idestart(struct buf *b) { + if (b == 0) { + panic("idestart"); + } + if (b->blockno >= FSSIZE) { + panic("incorrect blockno"); + } + int sector_per_block = BSIZE / SECTOR_SIZE; + int sector = b->blockno * sector_per_block; + int read_cmd = (sector_per_block == 1) ? IDE_CMD_READ : IDE_CMD_RDMUL; + int write_cmd = (sector_per_block == 1) ? IDE_CMD_WRITE : IDE_CMD_WRMUL; + + if (sector_per_block > 7) { + panic("idestart"); + } + + idewait(0); + outb(0x3f6, 0); // generate interrupt + outb(0x1f2, sector_per_block); // number of sectors + outb(0x1f3, sector & 0xff); + outb(0x1f4, (sector >> 8) & 0xff); + outb(0x1f5, (sector >> 16) & 0xff); + outb(0x1f6, 0xe0 | ((b->dev & 1) << 4) | ((sector >> 24) & 0x0f)); + if (b->flags & B_DIRTY) { + outb(0x1f7, write_cmd); + outsl(0x1f0, b->data, BSIZE / 4); + } + else { + outb(0x1f7, read_cmd); + } +} + +// Interrupt handler. +void ideintr(void) { + struct buf *b; + + // First queued buffer is the active request. + acquire(&idelock); + + if ((b = idequeue) == 0) { + release(&idelock); + return; + } + idequeue = b->qnext; + + // Read data if needed. + if (!(b->flags & B_DIRTY) && idewait(1) >= 0) { + insl(0x1f0, b->data, BSIZE / 4); + } + + // Wake process waiting for this buf. + b->flags |= B_VALID; + b->flags &= ~B_DIRTY; + wakeup(b); + + // Start disk on next buf in queue. + if (idequeue != 0) { + idestart(idequeue); + } + + release(&idelock); +} + + +// Sync buf with disk. +// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID. +// Else if B_VALID is not set, read buf from disk, set B_VALID. +void iderw(struct buf *b) { + struct buf **pp; + + if (!holdingsleep(&b->lock)) { + panic("iderw: buf not locked"); + } + if ((b->flags & (B_VALID | B_DIRTY)) == B_VALID) { + panic("iderw: nothing to do"); + } + if (b->dev != 0 && !havedisk1) { + panic("iderw: ide disk 1 not present"); + } + + acquire(&idelock); //DOC:acquire-lock + + // Append b to idequeue. + b->qnext = 0; + for (pp = &idequeue; *pp; pp = &(*pp)->qnext) { //DOC:insert-queue + ; + } + *pp = b; + + // Start disk if necessary. + if (idequeue == b) { + idestart(b); + } + + // Wait for request to finish. + while ((b->flags & (B_VALID | B_DIRTY)) != B_VALID) { + sleep(b, &idelock); + } + + release(&idelock); +} diff --git a/init.c b/init.c new file mode 100644 index 0000000..f6f01a3 --- /dev/null +++ b/init.c @@ -0,0 +1,35 @@ +// init: The initial user-level program + +#include "types.h" +#include "stat.h" +#include "user.h" +#include "fcntl.h" + +char *shell_argv[] = { "sh", 0 }; + +int main(int argc, char* argv[]) { + int pid, wpid; + + if (open("console", O_RDWR) < 0) { + mknod("console", 1, 1); + open("console", O_RDWR); + } + dup(0); // stdout + dup(0); // stderr + + for (;;) { + printf(1, "init: starting sh\n"); + pid = fork(); + if (pid < 0) { + printf(1, "init: fork failed\n"); + exit(); + } + if (pid == 0) { + exec("sh", shell_argv); + printf(1, "init: exec sh failed\n"); + exit(); + } + while ((wpid = wait()) >= 0 && wpid != pid) { + } + } +} diff --git a/initcode.S b/initcode.S new file mode 100644 index 0000000..71842d8 --- /dev/null +++ b/initcode.S @@ -0,0 +1,32 @@ +# Initial process execs /init. +# This code runs in user space. + +#include "syscall.h" +#include "traps.h" + + +# exec(init, argv) +.globl start +start: + pushl $argv + pushl $init + pushl $0 // where caller pc would be + movl $SYS_exec, %eax + int $T_SYSCALL + +# for(;;) exit(); +exit: + movl $SYS_exit, %eax + int $T_SYSCALL + jmp exit + +# char init[] = "/init\0"; +init: + .string "/init\0" + +# char *argv[] = { init, 0 }; + .p2align 2 +argv: + .long init + .long 0 + diff --git a/ioapic.c b/ioapic.c new file mode 100644 index 0000000..1756da4 --- /dev/null +++ b/ioapic.c @@ -0,0 +1,68 @@ +// The I/O APIC manages hardware interrupts for an SMP system. +// http://www.intel.com/design/chipsets/datashts/29056601.pdf +// See also picirq.c. + +#include "types.h" +#include "defs.h" +#include "traps.h" + +#define IOAPIC 0xFEC00000 // Default physical address of IO APIC + +#define REG_ID 0x00 // Register index: ID +#define REG_VER 0x01 // Register index: version +#define REG_TABLE 0x10 // Redirection table base + +// The redirection table starts at REG_TABLE and uses +// two registers to configure each interrupt. +// The first (low) register in a pair contains configuration bits. +// The second (high) register contains a bitmask telling which +// CPUs can serve that interrupt. +#define INT_DISABLED 0x00010000 // Interrupt disabled +#define INT_LEVEL 0x00008000 // Level-triggered (vs edge-) +#define INT_ACTIVELOW 0x00002000 // Active low (vs high) +#define INT_LOGICAL 0x00000800 // Destination is CPU id (vs APIC ID) + +volatile struct ioapic *ioapic; + +// IO APIC MMIO structure: write reg, then read or write data. +struct ioapic { + uint reg; + uint pad[3]; + uint data; +}; + +static uint ioapicread(int reg) { + ioapic->reg = reg; + return ioapic->data; +} + +static void ioapicwrite(int reg, uint data) { + ioapic->reg = reg; + ioapic->data = data; +} + +void ioapicinit(void) { + int i, id, maxintr; + + ioapic = (volatile struct ioapic*)IOAPIC; + maxintr = (ioapicread(REG_VER) >> 16) & 0xFF; + id = ioapicread(REG_ID) >> 24; + if (id != ioapicid) { + cprintf("ioapicinit: id isn't equal to ioapicid; not a MP\n"); + } + + // Mark all interrupts edge-triggered, active high, disabled, + // and not routed to any CPUs. + for (i = 0; i <= maxintr; i++) { + ioapicwrite(REG_TABLE + 2 * i, INT_DISABLED | (T_IRQ0 + i)); + ioapicwrite(REG_TABLE + 2 * i + 1, 0); + } +} + +void ioapicenable(int irq, int cpunum) { + // Mark interrupt edge-triggered, active high, + // enabled, and routed to the given cpunum, + // which happens to be that cpu's APIC ID. + ioapicwrite(REG_TABLE + 2 * irq, T_IRQ0 + irq); + ioapicwrite(REG_TABLE + 2 * irq + 1, cpunum << 24); +} diff --git a/kalloc.c b/kalloc.c new file mode 100644 index 0000000..d1ef801 --- /dev/null +++ b/kalloc.c @@ -0,0 +1,93 @@ +// Physical memory allocator, intended to allocate +// memory for user processes, kernel stacks, page table pages, +// and pipe buffers. Allocates 4096-byte pages. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "spinlock.h" + +void freerange(void *vstart, void *vend); +extern char end[]; // first address after kernel loaded from ELF file + // defined by the kernel linker script in kernel.ld + +struct run { + struct run *next; +}; + +struct { + struct spinlock lock; + int use_lock; + struct run *freelist; +} kmem; + +// Initialization happens in two phases. +// 1. main() calls kinit1() while still using entrypgdir to place just +// the pages mapped by entrypgdir on free list. +// 2. main() calls kinit2() with the rest of the physical pages +// after installing a full page table that maps them on all cores. +void kinit1(void *vstart, void *vend) { + initlock(&kmem.lock, "kmem"); + kmem.use_lock = 0; + freerange(vstart, vend); +} + +void kinit2(void *vstart, void *vend) { + freerange(vstart, vend); + kmem.use_lock = 1; +} + +void freerange(void *vstart, void *vend) { + char *p; + p = (char*)PGROUNDUP((uint)vstart); + for (; p + PGSIZE <= (char*)vend; p += PGSIZE) { + kfree(p); + } +} + +// Free the page of physical memory pointed at by v, +// which normally should have been returned by a +// call to kalloc(). (The exception is when +// initializing the allocator; see kinit above.) +void kfree(char *v) { + struct run *r; + + if ((uint)v % PGSIZE || v < end || V2P(v) >= PHYSTOP) { + panic("kfree"); + } + + // Fill with junk to catch dangling refs. + memset(v, 1, PGSIZE); + + if (kmem.use_lock) { + acquire(&kmem.lock); + } + r = (struct run*)v; + r->next = kmem.freelist; + kmem.freelist = r; + if (kmem.use_lock) { + release(&kmem.lock); + } +} + +// Allocate one 4096-byte page of physical memory. +// Returns a pointer that the kernel can use. +// Returns 0 if the memory cannot be allocated. +char* kalloc(void) { + struct run *r; + + if (kmem.use_lock) { + acquire(&kmem.lock); + } + r = kmem.freelist; + if (r) { + kmem.freelist = r->next; + } + if (kmem.use_lock) { + release(&kmem.lock); + } + return (char*)r; +} + diff --git a/kbd.c b/kbd.c new file mode 100644 index 0000000..0426cb9 --- /dev/null +++ b/kbd.c @@ -0,0 +1,51 @@ +#include "types.h" +#include "x86.h" +#include "defs.h" +#include "kbd.h" + +int kbdgetc(void) { + static uint shift; + static uchar *charcode[4] = { + normalmap, shiftmap, ctlmap, ctlmap + }; + uint st, data, c; + + st = inb(KBSTATP); + if ((st & KBS_DIB) == 0) { + return -1; + } + data = inb(KBDATAP); + + if (data == 0xE0) { + shift |= E0ESC; + return 0; + } + else if (data & 0x80) { + // Key released + data = (shift & E0ESC ? data : data & 0x7F); + shift &= ~(shiftcode[data] | E0ESC); + return 0; + } + else if (shift & E0ESC) { + // Last character was an E0 escape; or with 0x80 + data |= 0x80; + shift &= ~E0ESC; + } + + shift |= shiftcode[data]; + shift ^= togglecode[data]; + c = charcode[shift & (CTL | SHIFT)][data]; + if (shift & CAPSLOCK) { + if ('a' <= c && c <= 'z') { + c += 'A' - 'a'; + } + else if ('A' <= c && c <= 'Z') { + c += 'a' - 'A'; + } + } + return c; +} + +void kbdintr(void) { + consoleintr(kbdgetc); +} diff --git a/kbd.h b/kbd.h new file mode 100644 index 0000000..1d11783 --- /dev/null +++ b/kbd.h @@ -0,0 +1,112 @@ +// PC keyboard interface constants + +#define KBSTATP 0x64 // kbd controller status port(I) +#define KBS_DIB 0x01 // kbd data in buffer +#define KBDATAP 0x60 // kbd data port(I) + +#define NO 0 + +#define SHIFT (1 << 0) +#define CTL (1 << 1) +#define ALT (1 << 2) + +#define CAPSLOCK (1 << 3) +#define NUMLOCK (1 << 4) +#define SCROLLLOCK (1 << 5) + +#define E0ESC (1 << 6) + +// Special keycodes +#define KEY_HOME 0xE0 +#define KEY_END 0xE1 +#define KEY_UP 0xE2 +#define KEY_DN 0xE3 +#define KEY_LF 0xE4 +#define KEY_RT 0xE5 +#define KEY_PGUP 0xE6 +#define KEY_PGDN 0xE7 +#define KEY_INS 0xE8 +#define KEY_DEL 0xE9 + +// C('A') == Control-A +#define C(x) (x - '@') + +static uchar shiftcode[256] = +{ + [0x1D] CTL, + [0x2A] SHIFT, + [0x36] SHIFT, + [0x38] ALT, + [0x9D] CTL, + [0xB8] ALT +}; + +static uchar togglecode[256] = +{ + [0x3A] CAPSLOCK, + [0x45] NUMLOCK, + [0x46] SCROLLLOCK +}; + +static uchar normalmap[256] = +{ + NO, 0x1B, '1', '2', '3', '4', '5', '6', // 0x00 + '7', '8', '9', '0', '-', '=', '\b', '\t', + 'q', 'w', 'e', 'r', 't', 'y', 'u', 'i', // 0x10 + 'o', 'p', '[', ']', '\n', NO, 'a', 's', + 'd', 'f', 'g', 'h', 'j', 'k', 'l', ';', // 0x20 + '\'', '`', NO, '\\', 'z', 'x', 'c', 'v', + 'b', 'n', 'm', ',', '.', '/', NO, '*', // 0x30 + NO, ' ', NO, NO, NO, NO, NO, NO, + NO, NO, NO, NO, NO, NO, NO, '7', // 0x40 + '8', '9', '-', '4', '5', '6', '+', '1', + '2', '3', '0', '.', NO, NO, NO, NO, // 0x50 + [0x9C] '\n', // KP_Enter + [0xB5] '/', // KP_Div + [0xC8] KEY_UP, [0xD0] KEY_DN, + [0xC9] KEY_PGUP, [0xD1] KEY_PGDN, + [0xCB] KEY_LF, [0xCD] KEY_RT, + [0x97] KEY_HOME, [0xCF] KEY_END, + [0xD2] KEY_INS, [0xD3] KEY_DEL +}; + +static uchar shiftmap[256] = +{ + NO, 033, '!', '@', '#', '$', '%', '^', // 0x00 + '&', '*', '(', ')', '_', '+', '\b', '\t', + 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I', // 0x10 + 'O', 'P', '{', '}', '\n', NO, 'A', 'S', + 'D', 'F', 'G', 'H', 'J', 'K', 'L', ':', // 0x20 + '"', '~', NO, '|', 'Z', 'X', 'C', 'V', + 'B', 'N', 'M', '<', '>', '?', NO, '*', // 0x30 + NO, ' ', NO, NO, NO, NO, NO, NO, + NO, NO, NO, NO, NO, NO, NO, '7', // 0x40 + '8', '9', '-', '4', '5', '6', '+', '1', + '2', '3', '0', '.', NO, NO, NO, NO, // 0x50 + [0x9C] '\n', // KP_Enter + [0xB5] '/', // KP_Div + [0xC8] KEY_UP, [0xD0] KEY_DN, + [0xC9] KEY_PGUP, [0xD1] KEY_PGDN, + [0xCB] KEY_LF, [0xCD] KEY_RT, + [0x97] KEY_HOME, [0xCF] KEY_END, + [0xD2] KEY_INS, [0xD3] KEY_DEL +}; + +static uchar ctlmap[256] = +{ + NO, NO, NO, NO, NO, NO, NO, NO, + NO, NO, NO, NO, NO, NO, NO, NO, + C('Q'), C('W'), C('E'), C('R'), C('T'), C('Y'), C('U'), C('I'), + C('O'), C('P'), NO, NO, '\r', NO, C('A'), C('S'), + C('D'), C('F'), C('G'), C('H'), C('J'), C('K'), C('L'), NO, + NO, NO, NO, C('\\'), C('Z'), C('X'), C('C'), C('V'), + C('B'), C('N'), C('M'), NO, NO, C('/'), NO, NO, + [0x9C] '\r', // KP_Enter + [0xB5] C('/'), // KP_Div + [0xC8] KEY_UP, [0xD0] KEY_DN, + [0xC9] KEY_PGUP, [0xD1] KEY_PGDN, + [0xCB] KEY_LF, [0xCD] KEY_RT, + [0x97] KEY_HOME, [0xCF] KEY_END, + [0xD2] KEY_INS, [0xD3] KEY_DEL +}; + diff --git a/kernel.ld b/kernel.ld new file mode 100644 index 0000000..4e12e14 --- /dev/null +++ b/kernel.ld @@ -0,0 +1,64 @@ +/* Simple linker script for the JOS kernel. + See the GNU ld 'info' manual ("info ld") to learn the syntax. */ + +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) + +SECTIONS +{ + /* Link the kernel at this address: "." means the current address */ + /* Must be equal to KERNLINK */ + . = 0x80100000; + + .text : AT(0x100000) { + *(.text .stub .text.* .gnu.linkonce.t.*) + } + + PROVIDE(etext = .); /* Define the 'etext' symbol to this value */ + + .rodata : { + *(.rodata .rodata.* .gnu.linkonce.r.*) + } + + /* Include debugging information in kernel memory */ + .stab : { + PROVIDE(__STAB_BEGIN__ = .); + *(.stab); + PROVIDE(__STAB_END__ = .); + } + + .stabstr : { + PROVIDE(__STABSTR_BEGIN__ = .); + *(.stabstr); + PROVIDE(__STABSTR_END__ = .); + } + + /* Adjust the address for the data segment to the next page */ + . = ALIGN(0x1000); + + /* Conventionally, Unix linkers provide pseudo-symbols + * etext, edata, and end, at the end of the text, data, and bss. + * For the kernel mapping, we need the address at the beginning + * of the data section, but that's not one of the conventional + * symbols, because the convention started before there was a + * read-only rodata section between text and data. */ + PROVIDE(data = .); + + /* The data segment */ + .data : { + *(.data) + } + + PROVIDE(edata = .); + + .bss : { + *(.bss) + } + + PROVIDE(end = .); + + /DISCARD/ : { + *(.eh_frame .note.GNU-stack) + } +} diff --git a/kill.c b/kill.c new file mode 100644 index 0000000..bcca9a9 --- /dev/null +++ b/kill.c @@ -0,0 +1,16 @@ +#include "types.h" +#include "stat.h" +#include "user.h" + +int main(int argc, char **argv) { + int i; + + if (argc < 2) { + printf(2, "usage: kill pid...\n"); + exit(); + } + for (i = 1; i < argc; i++) { + kill(atoi(argv[i])); + } + exit(); +} diff --git a/lapic.c b/lapic.c new file mode 100644 index 0000000..946db80 --- /dev/null +++ b/lapic.c @@ -0,0 +1,218 @@ +// The local APIC manages internal (non-I/O) interrupts. +// See Chapter 8 & Appendix C of Intel processor manual volume 3. + +#include "param.h" +#include "types.h" +#include "defs.h" +#include "date.h" +#include "memlayout.h" +#include "traps.h" +#include "mmu.h" +#include "x86.h" + +// Local APIC registers, divided by 4 for use as uint[] indices. +#define ID (0x0020 / 4) // ID +#define VER (0x0030 / 4) // Version +#define TPR (0x0080 / 4) // Task Priority +#define EOI (0x00B0 / 4) // EOI +#define SVR (0x00F0 / 4) // Spurious Interrupt Vector + #define ENABLE 0x00000100 // Unit Enable +#define ESR (0x0280 / 4) // Error Status +#define ICRLO (0x0300 / 4) // Interrupt Command + #define INIT 0x00000500 // INIT/RESET + #define STARTUP 0x00000600 // Startup IPI + #define DELIVS 0x00001000 // Delivery status + #define ASSERT 0x00004000 // Assert interrupt (vs deassert) + #define DEASSERT 0x00000000 + #define LEVEL 0x00008000 // Level triggered + #define BCAST 0x00080000 // Send to all APICs, including self. + #define BUSY 0x00001000 + #define FIXED 0x00000000 +#define ICRHI (0x0310 / 4) // Interrupt Command [63:32] +#define TIMER (0x0320 / 4) // Local Vector Table 0 (TIMER) + #define X1 0x0000000B // divide counts by 1 + #define PERIODIC 0x00020000 // Periodic +#define PCINT (0x0340 / 4) // Performance Counter LVT +#define LINT0 (0x0350 / 4) // Local Vector Table 1 (LINT0) +#define LINT1 (0x0360 / 4) // Local Vector Table 2 (LINT1) +#define ERROR (0x0370 / 4) // Local Vector Table 3 (ERROR) + #define MASKED 0x00010000 // Interrupt masked +#define TICR (0x0380 / 4) // Timer Initial Count +#define TCCR (0x0390 / 4) // Timer Current Count +#define TDCR (0x03E0 / 4) // Timer Divide Configuration + +volatile uint *lapic; // Initialized in mp.c + + +static void lapicw(int index, int value) { + lapic[index] = value; + lapic[ID]; // wait for write to finish, by reading +} + +void lapicinit(void) { + if (!lapic) { + return; + } + + // Enable local APIC; set spurious interrupt vector. + lapicw(SVR, ENABLE | (T_IRQ0 + IRQ_SPURIOUS)); + + // The timer repeatedly counts down at bus frequency + // from lapic[TICR] and then issues an interrupt. + // If xv6 cared more about precise timekeeping, + // TICR would be calibrated using an external time source. + lapicw(TDCR, X1); + lapicw(TIMER, PERIODIC | (T_IRQ0 + IRQ_TIMER)); + lapicw(TICR, 10000000); + + // Disable logical interrupt lines. + lapicw(LINT0, MASKED); + lapicw(LINT1, MASKED); + + // Disable performance counter overflow interrupts + // on machines that provide that interrupt entry. + if (((lapic[VER] >> 16) & 0xFF) >= 4) { + lapicw(PCINT, MASKED); + } + + // Map error interrupt to IRQ_ERROR. + lapicw(ERROR, T_IRQ0 + IRQ_ERROR); + + // Clear error status register (requires back-to-back writes). + lapicw(ESR, 0); + lapicw(ESR, 0); + + // Ack any outstanding interrupts. + lapicw(EOI, 0); + + // Send an Init Level De-Assert to synchronise arbitration ID's. + lapicw(ICRHI, 0); + lapicw(ICRLO, BCAST | INIT | LEVEL); + while (lapic[ICRLO] & DELIVS) { + ; + } + + // Enable interrupts on the APIC (but not on the processor). + lapicw(TPR, 0); +} + +int lapicid(void) { + if (!lapic) { + return 0; + } + return lapic[ID] >> 24; +} + +// Acknowledge interrupt. +void lapiceoi(void) { + if (lapic) { + lapicw(EOI, 0); + } +} + +// Spin for a given number of microseconds. +// On real hardware would want to tune this dynamically. +void microdelay(int us) { +} + +#define CMOS_PORT 0x70 +#define CMOS_RETURN 0x71 + +// Start additional processor running entry code at addr. +// See Appendix B of MultiProcessor Specification. +void lapicstartap(uchar apicid, uint addr) { + int i; + ushort *wrv; + + // "The BSP must initialize CMOS shutdown code to 0AH + // and the warm reset vector (DWORD based at 40:67) to point at + // the AP startup code prior to the [universal startup algorithm]." + outb(CMOS_PORT, 0xF); // offset 0xF is shutdown code + outb(CMOS_PORT + 1, 0x0A); + wrv = (ushort*)P2V((0x40 << 4 | 0x67)); // Warm reset vector + wrv[0] = 0; + wrv[1] = addr >> 4; + + // "Universal startup algorithm." + // Send INIT (level-triggered) interrupt to reset other CPU. + lapicw(ICRHI, apicid << 24); + lapicw(ICRLO, INIT | LEVEL | ASSERT); + microdelay(200); + lapicw(ICRLO, INIT | LEVEL); + microdelay(100); // should be 10ms, but too slow in Bochs! + + // Send startup IPI (twice!) to enter code. + // Regular hardware is supposed to only accept a STARTUP + // when it is in the halted state due to an INIT. So the second + // should be ignored, but it is part of the official Intel algorithm. + // Bochs complains about the second one. Too bad for Bochs. + for (i = 0; i < 2; i++) { + lapicw(ICRHI, apicid << 24); + lapicw(ICRLO, STARTUP | (addr >> 12)); + microdelay(200); + } +} + +#define CMOS_STATA 0x0a +#define CMOS_STATB 0x0b +#define CMOS_UIP (1 << 7) // RTC update in progress + +#define SECS 0x00 +#define MINS 0x02 +#define HOURS 0x04 +#define DAY 0x07 +#define MONTH 0x08 +#define YEAR 0x09 + +static uint cmos_read(uint reg) { + outb(CMOS_PORT, reg); + microdelay(200); + + return inb(CMOS_RETURN); +} + +static void fill_rtcdate(struct rtcdate *r) { + r->second = cmos_read(SECS); + r->minute = cmos_read(MINS); + r->hour = cmos_read(HOURS); + r->day = cmos_read(DAY); + r->month = cmos_read(MONTH); + r->year = cmos_read(YEAR); +} + +// qemu seems to use 24-hour GWT and the values are BCD encoded +void cmostime(struct rtcdate *r) { + struct rtcdate t1, t2; + int sb, bcd; + + sb = cmos_read(CMOS_STATB); + + bcd = (sb & (1 << 2)) == 0; + + // make sure CMOS doesn't modify time while we read it + for (;;) { + fill_rtcdate(&t1); + if (cmos_read(CMOS_STATA) & CMOS_UIP) { + continue; + } + fill_rtcdate(&t2); + if (memcmp(&t1, &t2, sizeof(t1)) == 0) { + break; + } + } + + // convert + if (bcd) { +#define CONV(x) (t1.x = ((t1.x >> 4) * 10) + (t1.x & 0xf)) + CONV(second); + CONV(minute); + CONV(hour ); + CONV(day ); + CONV(month ); + CONV(year ); +#undef CONV + } + + *r = t1; + r->year += 2000; +} diff --git a/ln.c b/ln.c new file mode 100644 index 0000000..8883514 --- /dev/null +++ b/ln.c @@ -0,0 +1,14 @@ +#include "types.h" +#include "stat.h" +#include "user.h" + +int main(int argc, char *argv[]) { + if (argc != 3) { + printf(2, "Usage: ln old new\n"); + exit(); + } + if (link(argv[1], argv[2]) < 0) { + printf(2, "link %s %s: failed\n", argv[1], argv[2]); + } + exit(); +} diff --git a/log.c b/log.c new file mode 100644 index 0000000..a64c0f6 --- /dev/null +++ b/log.c @@ -0,0 +1,234 @@ +#include "types.h" +#include "defs.h" +#include "param.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" + +// Simple logging that allows concurrent FS system calls. +// +// A log transaction contains the updates of multiple FS system +// calls. The logging system only commits when there are +// no FS system calls active. Thus there is never +// any reasoning required about whether a commit might +// write an uncommitted system call's updates to disk. +// +// A system call should call begin_op()/end_op() to mark +// its start and end. Usually begin_op() just increments +// the count of in-progress FS system calls and returns. +// But if it thinks the log is close to running out, it +// sleeps until the last outstanding end_op() commits. +// +// The log is a physical re-do log containing disk blocks. +// The on-disk log format: +// header block, containing block #s for block A, B, C, ... +// block A +// block B +// block C +// ... +// Log appends are synchronous. + +// Contents of the header block, used for both the on-disk header block +// and to keep track in memory of logged block# before commit. +struct logheader { + int n; + int block[LOGSIZE]; +}; + +struct log { + struct spinlock lock; + int start; + int size; + int outstanding; // how many FS sys calls are executing. + int committing; // in commit(), please wait. + int dev; + struct logheader lh; +}; +struct log log; + +static void recover_from_log(void); +static void commit(); + +void +initlog(int dev) +{ + if (sizeof(struct logheader) >= BSIZE) + panic("initlog: too big logheader"); + + struct superblock sb; + initlock(&log.lock, "log"); + readsb(dev, &sb); + log.start = sb.logstart; + log.size = sb.nlog; + log.dev = dev; + recover_from_log(); +} + +// Copy committed blocks from log to their home location +static void +install_trans(void) +{ + int tail; + + for (tail = 0; tail < log.lh.n; tail++) { + struct buf *lbuf = bread(log.dev, log.start+tail+1); // read log block + struct buf *dbuf = bread(log.dev, log.lh.block[tail]); // read dst + memmove(dbuf->data, lbuf->data, BSIZE); // copy block to dst + bwrite(dbuf); // write dst to disk + brelse(lbuf); + brelse(dbuf); + } +} + +// Read the log header from disk into the in-memory log header +static void +read_head(void) +{ + struct buf *buf = bread(log.dev, log.start); + struct logheader *lh = (struct logheader *) (buf->data); + int i; + log.lh.n = lh->n; + for (i = 0; i < log.lh.n; i++) { + log.lh.block[i] = lh->block[i]; + } + brelse(buf); +} + +// Write in-memory log header to disk. +// This is the true point at which the +// current transaction commits. +static void +write_head(void) +{ + struct buf *buf = bread(log.dev, log.start); + struct logheader *hb = (struct logheader *) (buf->data); + int i; + hb->n = log.lh.n; + for (i = 0; i < log.lh.n; i++) { + hb->block[i] = log.lh.block[i]; + } + bwrite(buf); + brelse(buf); +} + +static void +recover_from_log(void) +{ + read_head(); + install_trans(); // if committed, copy from log to disk + log.lh.n = 0; + write_head(); // clear the log +} + +// called at the start of each FS system call. +void +begin_op(void) +{ + acquire(&log.lock); + while(1){ + if(log.committing){ + sleep(&log, &log.lock); + } else if(log.lh.n + (log.outstanding+1)*MAXOPBLOCKS > LOGSIZE){ + // this op might exhaust log space; wait for commit. + sleep(&log, &log.lock); + } else { + log.outstanding += 1; + release(&log.lock); + break; + } + } +} + +// called at the end of each FS system call. +// commits if this was the last outstanding operation. +void +end_op(void) +{ + int do_commit = 0; + + acquire(&log.lock); + log.outstanding -= 1; + if(log.committing) + panic("log.committing"); + if(log.outstanding == 0){ + do_commit = 1; + log.committing = 1; + } else { + // begin_op() may be waiting for log space, + // and decrementing log.outstanding has decreased + // the amount of reserved space. + wakeup(&log); + } + release(&log.lock); + + if(do_commit){ + // call commit w/o holding locks, since not allowed + // to sleep with locks. + commit(); + acquire(&log.lock); + log.committing = 0; + wakeup(&log); + release(&log.lock); + } +} + +// Copy modified blocks from cache to log. +static void +write_log(void) +{ + int tail; + + for (tail = 0; tail < log.lh.n; tail++) { + struct buf *to = bread(log.dev, log.start+tail+1); // log block + struct buf *from = bread(log.dev, log.lh.block[tail]); // cache block + memmove(to->data, from->data, BSIZE); + bwrite(to); // write the log + brelse(from); + brelse(to); + } +} + +static void +commit() +{ + if (log.lh.n > 0) { + write_log(); // Write modified blocks from cache to log + write_head(); // Write header to disk -- the real commit + install_trans(); // Now install writes to home locations + log.lh.n = 0; + write_head(); // Erase the transaction from the log + } +} + +// Caller has modified b->data and is done with the buffer. +// Record the block number and pin in the cache with B_DIRTY. +// commit()/write_log() will do the disk write. +// +// log_write() replaces bwrite(); a typical use is: +// bp = bread(...) +// modify bp->data[] +// log_write(bp) +// brelse(bp) +void +log_write(struct buf *b) +{ + int i; + + if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1) + panic("too big a transaction"); + if (log.outstanding < 1) + panic("log_write outside of trans"); + + acquire(&log.lock); + for (i = 0; i < log.lh.n; i++) { + if (log.lh.block[i] == b->blockno) // log absorbtion + break; + } + log.lh.block[i] = b->blockno; + if (i == log.lh.n) + log.lh.n++; + b->flags |= B_DIRTY; // prevent eviction + release(&log.lock); +} + diff --git a/ls.c b/ls.c new file mode 100644 index 0000000..2862913 --- /dev/null +++ b/ls.c @@ -0,0 +1,85 @@ +#include "types.h" +#include "stat.h" +#include "user.h" +#include "fs.h" + +char* +fmtname(char *path) +{ + static char buf[DIRSIZ+1]; + char *p; + + // Find first character after last slash. + for(p=path+strlen(path); p >= path && *p != '/'; p--) + ; + p++; + + // Return blank-padded name. + if(strlen(p) >= DIRSIZ) + return p; + memmove(buf, p, strlen(p)); + memset(buf+strlen(p), ' ', DIRSIZ-strlen(p)); + return buf; +} + +void +ls(char *path) +{ + char buf[512], *p; + int fd; + struct dirent de; + struct stat st; + + if((fd = open(path, 0)) < 0){ + printf(2, "ls: cannot open %s\n", path); + return; + } + + if(fstat(fd, &st) < 0){ + printf(2, "ls: cannot stat %s\n", path); + close(fd); + return; + } + + switch(st.type){ + case T_FILE: + printf(1, "%s %d %d %d\n", fmtname(path), st.type, st.ino, st.size); + break; + + case T_DIR: + if(strlen(path) + 1 + DIRSIZ + 1 > sizeof buf){ + printf(1, "ls: path too long\n"); + break; + } + strcpy(buf, path); + p = buf+strlen(buf); + *p++ = '/'; + while(read(fd, &de, sizeof(de)) == sizeof(de)){ + if(de.inum == 0) + continue; + memmove(p, de.name, DIRSIZ); + p[DIRSIZ] = 0; + if(stat(buf, &st) < 0){ + printf(1, "ls: cannot stat %s\n", buf); + continue; + } + printf(1, "%s %d %d %d\n", fmtname(buf), st.type, st.ino, st.size); + } + break; + } + close(fd); +} + +int +main(int argc, char *argv[]) +{ + int i; + + if(argc < 2){ + ls("."); + exit(); + } + for(i=1; istarted), 1); // tell startothers() we're up + scheduler(); // start running processes +} + +pde_t entrypgdir[]; // For entry.S + +// Start the non-boot (AP) processors. +static void startothers(void) { + extern uchar _binary_entryother_start[], _binary_entryother_size[]; + uchar *code; + struct cpu *c; + char *stack; + + // Write entry code to unused memory at 0x7000. + // The linker has placed the image of entryother.S in + // _binary_entryother_start. + code = P2V(0x7000); + memmove(code, _binary_entryother_start, (uint)_binary_entryother_size); + + for (c = cpus; c < cpus + ncpu; c++) { + if (c == mycpu()) { // We've started already. + continue; + } + + // Tell entryother.S what stack to use, where to enter, and what + // pgdir to use. We cannot use kpgdir yet, because the AP processor + // is running in low memory, so we use entrypgdir for the APs too. + stack = kalloc(); + *(void**)(code - 4) = stack + KSTACKSIZE; + *(void(**)(void))(code - 8) = mpenter; + *(int**)(code - 12) = (void *) V2P(entrypgdir); + + lapicstartap(c->apicid, V2P(code)); + + // wait for cpu to finish mpmain() + while (c->started == 0) { + ; + } + } +} + +// The boot page table used in entry.S and entryother.S. +// Page directories (and page tables) must start on page boundaries, +// hence the __aligned__ attribute. +// PTE_PS in a page directory entry enables 4Mbyte pages. + +__attribute__((__aligned__(PGSIZE))) +pde_t entrypgdir[NPDENTRIES] = { + // Map VA's [0, 4MB) to PA's [0, 4MB) + [0] = (0) | PTE_P | PTE_W | PTE_PS, + // Map VA's [KERNBASE, KERNBASE+4MB) to PA's [0, 4MB) + [KERNBASE >> PDXSHIFT] = (0) | PTE_P | PTE_W | PTE_PS, +}; + + + + + + + + diff --git a/memide.c b/memide.c new file mode 100644 index 0000000..d0a8cf8 --- /dev/null +++ b/memide.c @@ -0,0 +1,60 @@ +// Fake IDE disk; stores blocks in memory. +// Useful for running kernel without scratch disk. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "mmu.h" +#include "proc.h" +#include "x86.h" +#include "traps.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "buf.h" + +extern uchar _binary_fs_img_start[], _binary_fs_img_size[]; + +static int disksize; +static uchar *memdisk; + +void ideinit(void) { + memdisk = _binary_fs_img_start; + disksize = (uint)_binary_fs_img_size / BSIZE; +} + +// Interrupt handler. +void ideintr(void) { + // no-op +} + +// Sync buf with disk. +// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID. +// Else if B_VALID is not set, read buf from disk, set B_VALID. +void iderw(struct buf *b) { + uchar *p; + + if (!holdingsleep(&b->lock)) { + panic("iderw: buf not locked"); + } + if ((b->flags & (B_VALID | B_DIRTY)) == B_VALID) { + panic("iderw: nothing to do"); + } + if (b->dev != 1) { + panic("iderw: request not for disk 1"); + } + if (b->blockno >= disksize) { + panic("iderw: block out of range"); + } + + p = memdisk + b->blockno * BSIZE; + + if (b->flags & B_DIRTY) { + b->flags &= ~B_DIRTY; + memmove(p, b->data, BSIZE); + } + else { + memmove(b->data, p, BSIZE); + } + b->flags |= B_VALID; +} diff --git a/memlayout.h b/memlayout.h new file mode 100644 index 0000000..8942340 --- /dev/null +++ b/memlayout.h @@ -0,0 +1,15 @@ +// Memory layout + +#define EXTMEM 0x100000 // Start of extended memory +#define PHYSTOP 0xE000000 // Top physical memory +#define DEVSPACE 0xFE000000 // Other devices are at high addresses + +// Key addresses for address space layout (see kmap in vm.c for layout) +#define KERNBASE 0x80000000 // First kernel virtual address +#define KERNLINK (KERNBASE + EXTMEM) // Address where kernel is linked + +#define V2P(a) (((uint) (a)) - KERNBASE) +#define P2V(a) ((void *)(((char *) (a)) + KERNBASE)) + +#define V2P_WO(x) ((x) - KERNBASE) // same as V2P, but without casts +#define P2V_WO(x) ((x) + KERNBASE) // same as P2V, but without casts diff --git a/mkdir.c b/mkdir.c new file mode 100644 index 0000000..c83dc84 --- /dev/null +++ b/mkdir.c @@ -0,0 +1,21 @@ +#include "types.h" +#include "stat.h" +#include "user.h" + +int main(int argc, char *argv[]) { + int i; + + if (argc < 2) { + printf(2, "Usage: mkdir files...\n"); + exit(); + } + + for (i = 1; i < argc; i++) { + if (mkdir(argv[i]) < 0) { + printf(2, "mkdir: %s failed to create\n", argv[i]); + break; + } + } + + exit(); +} diff --git a/mkfs.c b/mkfs.c new file mode 100644 index 0000000..a970b77 --- /dev/null +++ b/mkfs.c @@ -0,0 +1,283 @@ +#include +#include +#include +#include +#include +#include + +#define stat xv6_stat // avoid clash with host struct stat +#include "types.h" +#include "fs.h" +#include "stat.h" +#include "param.h" + +#ifndef static_assert +#define static_assert(a, b) do { switch (0) case 0: \ + case (a): \ + ; } while (0) +#endif + +#define NINODES 200 + +// Disk layout: +// [ boot block | sb block | log | inode blocks | free bit map | data blocks ] + +int nbitmap = FSSIZE / (BSIZE * 8) + 1; +int ninodeblocks = NINODES / IPB + 1; +int nlog = LOGSIZE; +int nmeta; // Number of meta blocks (boot, sb, nlog, inode, bitmap) +int nblocks; // Number of data blocks + +int fsfd; +struct superblock sb; +char zeroes[BSIZE]; +uint freeinode = 1; +uint freeblock; + + +void balloc(int); +void wsect(uint, void*); +void winode(uint, struct dinode*); +void rinode(uint inum, struct dinode *ip); +void rsect(uint sec, void *buf); +uint ialloc(ushort type); +void iappend(uint inum, void *p, int n); + +// convert to intel byte order +ushort xshort(ushort x) { + ushort y; + uchar *a = (uchar*)&y; + a[0] = x; + a[1] = x >> 8; + return y; +} + +uint xint(uint x) { + uint y; + uchar *a = (uchar*)&y; + a[0] = x; + a[1] = x >> 8; + a[2] = x >> 16; + a[3] = x >> 24; + return y; +} + +int main(int argc, char *argv[]) { + int i, cc, fd; + uint rootino, inum, off; + struct dirent de; + char buf[BSIZE]; + struct dinode din; + + + static_assert(sizeof(int) == 4, "Integers must be 4 bytes!"); + + if (argc < 2) { + fprintf(stderr, "Usage: mkfs fs.img files...\n"); + exit(1); + } + + assert((BSIZE % sizeof(struct dinode)) == 0); + assert((BSIZE % sizeof(struct dirent)) == 0); + + fsfd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, 0666); + if (fsfd < 0) { + perror(argv[1]); + exit(1); + } + + // 1 fs block = 1 disk sector + nmeta = 2 + nlog + ninodeblocks + nbitmap; + nblocks = FSSIZE - nmeta; + + sb.size = xint(FSSIZE); + sb.nblocks = xint(nblocks); + sb.ninodes = xint(NINODES); + sb.nlog = xint(nlog); + sb.logstart = xint(2); + sb.inodestart = xint(2 + nlog); + sb.bmapstart = xint(2 + nlog + ninodeblocks); + + printf("nmeta %d (boot, super, log blocks %u inode blocks %u, bitmap blocks %u) blocks %d total %d\n", + nmeta, nlog, ninodeblocks, nbitmap, nblocks, FSSIZE); + + freeblock = nmeta; // the first free block that we can allocate + + for (i = 0; i < FSSIZE; i++) { + wsect(i, zeroes); + } + + memset(buf, 0, sizeof(buf)); + memmove(buf, &sb, sizeof(sb)); + wsect(1, buf); + + rootino = ialloc(T_DIR); + assert(rootino == ROOTINO); + + bzero(&de, sizeof(de)); + de.inum = xshort(rootino); + strcpy(de.name, "."); + iappend(rootino, &de, sizeof(de)); + + bzero(&de, sizeof(de)); + de.inum = xshort(rootino); + strcpy(de.name, ".."); + iappend(rootino, &de, sizeof(de)); + + for (i = 2; i < argc; i++) { + assert(index(argv[i], '/') == 0); + + if ((fd = open(argv[i], 0)) < 0) { + perror(argv[i]); + exit(1); + } + + // Skip leading _ in name when writing to file system. + // The binaries are named _rm, _cat, etc. to keep the + // build operating system from trying to execute them + // in place of system binaries like rm and cat. + if (argv[i][0] == '_') { + ++argv[i]; + } + + inum = ialloc(T_FILE); + + bzero(&de, sizeof(de)); + de.inum = xshort(inum); + strncpy(de.name, argv[i], DIRSIZ); + iappend(rootino, &de, sizeof(de)); + + while ((cc = read(fd, buf, sizeof(buf))) > 0) { + iappend(inum, buf, cc); + } + + close(fd); + } + + // fix size of root inode dir + rinode(rootino, &din); + off = xint(din.size); + off = ((off / BSIZE) + 1) * BSIZE; + din.size = xint(off); + winode(rootino, &din); + + balloc(freeblock); + + exit(0); +} + +void wsect(uint sec, void *buf) { + if (lseek(fsfd, sec * BSIZE, 0) != sec * BSIZE) { + perror("lseek"); + exit(1); + } + if (write(fsfd, buf, BSIZE) != BSIZE) { + perror("write"); + exit(1); + } +} + +void winode(uint inum, struct dinode *ip) { + char buf[BSIZE]; + uint bn; + struct dinode *dip; + + bn = IBLOCK(inum, sb); + rsect(bn, buf); + dip = ((struct dinode*)buf) + (inum % IPB); + *dip = *ip; + wsect(bn, buf); +} + +void rinode(uint inum, struct dinode *ip) { + char buf[BSIZE]; + uint bn; + struct dinode *dip; + + bn = IBLOCK(inum, sb); + rsect(bn, buf); + dip = ((struct dinode*)buf) + (inum % IPB); + *ip = *dip; +} + +void rsect(uint sec, void *buf) { + if (lseek(fsfd, sec * BSIZE, 0) != sec * BSIZE) { + perror("lseek"); + exit(1); + } + if (read(fsfd, buf, BSIZE) != BSIZE) { + perror("read"); + exit(1); + } +} + +uint ialloc(ushort type) { + uint inum = freeinode++; + struct dinode din; + + bzero(&din, sizeof(din)); + din.type = xshort(type); + din.nlink = xshort(1); + din.size = xint(0); + winode(inum, &din); + return inum; +} + +void balloc(int used) { + uchar buf[BSIZE]; + int i; + + printf("balloc: first %d blocks have been allocated\n", used); + assert(used < BSIZE * 8); + bzero(buf, BSIZE); + for (i = 0; i < used; i++) { + buf[i / 8] = buf[i / 8] | (0x1 << (i % 8)); + } + printf("balloc: write bitmap block at sector %d\n", sb.bmapstart); + wsect(sb.bmapstart, buf); +} + +#define min(a, b) ((a) < (b) ? (a) : (b)) + +void iappend(uint inum, void *xp, int n) { + char *p = (char*)xp; + uint fbn, off, n1; + struct dinode din; + char buf[BSIZE]; + uint indirect[NINDIRECT]; + uint x; + + rinode(inum, &din); + off = xint(din.size); + // printf("append inum %d at off %d sz %d\n", inum, off, n); + while (n > 0) { + fbn = off / BSIZE; + assert(fbn < MAXFILE); + if (fbn < NDIRECT) { + if (xint(din.addrs[fbn]) == 0) { + din.addrs[fbn] = xint(freeblock++); + } + x = xint(din.addrs[fbn]); + } + else { + if (xint(din.addrs[NDIRECT]) == 0) { + din.addrs[NDIRECT] = xint(freeblock++); + } + rsect(xint(din.addrs[NDIRECT]), (char*)indirect); + if (indirect[fbn - NDIRECT] == 0) { + indirect[fbn - NDIRECT] = xint(freeblock++); + wsect(xint(din.addrs[NDIRECT]), (char*)indirect); + } + x = xint(indirect[fbn - NDIRECT]); + } + n1 = min(n, (fbn + 1) * BSIZE - off); + rsect(x, buf); + bcopy(p, buf + off - (fbn * BSIZE), n1); + wsect(x, buf); + n -= n1; + off += n1; + p += n1; + } + din.size = xint(off); + winode(inum, &din); +} diff --git a/mmu.h b/mmu.h new file mode 100644 index 0000000..b126119 --- /dev/null +++ b/mmu.h @@ -0,0 +1,181 @@ +// This file contains definitions for the +// x86 memory management unit (MMU). + +// Eflags register +#define FL_IF 0x00000200 // Interrupt Enable + +// Control Register flags +#define CR0_PE 0x00000001 // Protection Enable +#define CR0_WP 0x00010000 // Write Protect +#define CR0_PG 0x80000000 // Paging + +#define CR4_PSE 0x00000010 // Page size extension + +// various segment selectors. +#define SEG_KCODE 1 // kernel code +#define SEG_KDATA 2 // kernel data+stack +#define SEG_UCODE 3 // user code +#define SEG_UDATA 4 // user data+stack +#define SEG_TSS 5 // this process's task state + +// cpu->gdt[NSEGS] holds the above segments. +#define NSEGS 6 + +#ifndef __ASSEMBLER__ +// Segment Descriptor +struct segdesc { + uint lim_15_0 : 16; // Low bits of segment limit + uint base_15_0 : 16; // Low bits of segment base address + uint base_23_16 : 8; // Middle bits of segment base address + uint type : 4; // Segment type (see STS_ constants) + uint s : 1; // 0 = system, 1 = application + uint dpl : 2; // Descriptor Privilege Level + uint p : 1; // Present + uint lim_19_16 : 4; // High bits of segment limit + uint avl : 1; // Unused (available for software use) + uint rsv1 : 1; // Reserved + uint db : 1; // 0 = 16-bit segment, 1 = 32-bit segment + uint g : 1; // Granularity: limit scaled by 4K when set + uint base_31_24 : 8; // High bits of segment base address +}; + +// Normal segment +#define SEG(type, base, lim, dpl) (struct segdesc) \ + { ((lim) >> 12) & 0xffff, (uint)(base) & 0xffff, \ + ((uint)(base) >> 16) & 0xff, type, 1, dpl, 1, \ + (uint)(lim) >> 28, 0, 0, 1, 1, (uint)(base) >> 24 } +#define SEG16(type, base, lim, dpl) (struct segdesc) \ + { (lim) & 0xffff, (uint)(base) & 0xffff, \ + ((uint)(base) >> 16) & 0xff, type, 1, dpl, 1, \ + (uint)(lim) >> 16, 0, 0, 1, 0, (uint)(base) >> 24 } +#endif + +#define DPL_USER 0x3 // User DPL + +// Application segment type bits +#define STA_X 0x8 // Executable segment +#define STA_W 0x2 // Writeable (non-executable segments) +#define STA_R 0x2 // Readable (executable segments) + +// System segment type bits +#define STS_T32A 0x9 // Available 32-bit TSS +#define STS_IG32 0xE // 32-bit Interrupt Gate +#define STS_TG32 0xF // 32-bit Trap Gate + +// A virtual address 'la' has a three-part structure as follows: +// +// +--------10------+-------10-------+---------12----------+ +// | Page Directory | Page Table | Offset within Page | +// | Index | Index | | +// +----------------+----------------+---------------------+ +// \--- PDX(va) --/ \--- PTX(va) --/ + +// page directory index +#define PDX(va) (((uint)(va) >> PDXSHIFT) & 0x3FF) + +// page table index +#define PTX(va) (((uint)(va) >> PTXSHIFT) & 0x3FF) + +// construct virtual address from indexes and offset +#define PGADDR(d, t, o) ((uint)((d) << PDXSHIFT | (t) << PTXSHIFT | (o))) + +// Page directory and page table constants. +#define NPDENTRIES 1024 // # directory entries per page directory +#define NPTENTRIES 1024 // # PTEs per page table +#define PGSIZE 4096 // bytes mapped by a page + +#define PTXSHIFT 12 // offset of PTX in a linear address +#define PDXSHIFT 22 // offset of PDX in a linear address + +#define PGROUNDUP(sz) (((sz) + PGSIZE - 1) & ~(PGSIZE - 1)) +#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE - 1)) + +// Page table/directory entry flags. +#define PTE_P 0x001 // Present +#define PTE_W 0x002 // Writeable +#define PTE_U 0x004 // User +#define PTE_PS 0x080 // Page Size + +// Address in page table or page directory entry +#define PTE_ADDR(pte) ((uint)(pte) & ~0xFFF) +#define PTE_FLAGS(pte) ((uint)(pte) & 0xFFF) + +#ifndef __ASSEMBLER__ +typedef uint pte_t; + +// Task state segment format +struct taskstate { + uint link; // Old ts selector + uint esp0; // Stack pointers and segment selectors + ushort ss0; // after an increase in privilege level + ushort padding1; + uint *esp1; + ushort ss1; + ushort padding2; + uint *esp2; + ushort ss2; + ushort padding3; + void *cr3; // Page directory base + uint *eip; // Saved state from last task switch + uint eflags; + uint eax; // More saved state (registers) + uint ecx; + uint edx; + uint ebx; + uint *esp; + uint *ebp; + uint esi; + uint edi; + ushort es; // Even more saved state (segment selectors) + ushort padding4; + ushort cs; + ushort padding5; + ushort ss; + ushort padding6; + ushort ds; + ushort padding7; + ushort fs; + ushort padding8; + ushort gs; + ushort padding9; + ushort ldt; + ushort padding10; + ushort t; // Trap on task switch + ushort iomb; // I/O map base address +}; + +// Gate descriptors for interrupts and traps +struct gatedesc { + uint off_15_0 : 16; // low 16 bits of offset in segment + uint cs : 16; // code segment selector + uint args : 5; // # args, 0 for interrupt/trap gates + uint rsv1 : 3; // reserved(should be zero I guess) + uint type : 4; // type(STS_{IG32,TG32}) + uint s : 1; // must be 0 (system) + uint dpl : 2; // descriptor(meaning new) privilege level + uint p : 1; // Present + uint off_31_16 : 16; // high bits of offset in segment +}; + +// Set up a normal interrupt/trap gate descriptor. +// - istrap: 1 for a trap (= exception) gate, 0 for an interrupt gate. +// interrupt gate clears FL_IF, trap gate leaves FL_IF alone +// - sel: Code segment selector for interrupt/trap handler +// - off: Offset in code segment for interrupt/trap handler +// - dpl: Descriptor Privilege Level - +// the privilege level required for software to invoke +// this interrupt/trap gate explicitly using an int instruction. +#define SETGATE(gate, istrap, sel, off, d) \ + { \ + (gate).off_15_0 = (uint)(off) & 0xffff; \ + (gate).cs = (sel); \ + (gate).args = 0; \ + (gate).rsv1 = 0; \ + (gate).type = (istrap) ? STS_TG32 : STS_IG32; \ + (gate).s = 0; \ + (gate).dpl = (d); \ + (gate).p = 1; \ + (gate).off_31_16 = (uint)(off) >> 16; \ + } + +#endif diff --git a/mp.c b/mp.c new file mode 100644 index 0000000..bb1454d --- /dev/null +++ b/mp.c @@ -0,0 +1,141 @@ +// Multiprocessor support +// Search memory for MP description structures. +// http://developer.intel.com/design/pentium/datashts/24201606.pdf + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mp.h" +#include "x86.h" +#include "mmu.h" +#include "proc.h" + +struct cpu cpus[NCPU]; +int ncpu; +uchar ioapicid; + +static uchar sum(uchar *addr, int len) { + int i, sum; + + sum = 0; + for (i = 0; i < len; i++) { + sum += addr[i]; + } + return sum; +} + +// Look for an MP structure in the len bytes at addr. +static struct mp*mpsearch1(uint a, int len) { + uchar *e, *p, *addr; + + addr = P2V(a); + e = addr + len; + for (p = addr; p < e; p += sizeof(struct mp)) { + if (memcmp(p, "_MP_", 4) == 0 && sum(p, sizeof(struct mp)) == 0) { + return (struct mp*)p; + } + } + return 0; +} + +// Search for the MP Floating Pointer Structure, which according to the +// spec is in one of the following three locations: +// 1) in the first KB of the EBDA; +// 2) in the last KB of system base memory; +// 3) in the BIOS ROM between 0xE0000 and 0xFFFFF. +static struct mp*mpsearch(void) { + uchar *bda; + uint p; + struct mp *mp; + + bda = (uchar *) P2V(0x400); + if ((p = ((bda[0x0F] << 8) | bda[0x0E]) << 4)) { + if ((mp = mpsearch1(p, 1024))) { + return mp; + } + } + else { + p = ((bda[0x14] << 8) | bda[0x13]) * 1024; + if ((mp = mpsearch1(p - 1024, 1024))) { + return mp; + } + } + return mpsearch1(0xF0000, 0x10000); +} + +// Search for an MP configuration table. For now, +// don't accept the default configurations (physaddr == 0). +// Check for correct signature, calculate the checksum and, +// if correct, check the version. +// To do: check extended table checksum. +static struct mpconf*mpconfig(struct mp **pmp) { + struct mpconf *conf; + struct mp *mp; + + if ((mp = mpsearch()) == 0 || mp->physaddr == 0) { + return 0; + } + conf = (struct mpconf*) P2V((uint) mp->physaddr); + if (memcmp(conf, "PCMP", 4) != 0) { + return 0; + } + if (conf->version != 1 && conf->version != 4) { + return 0; + } + if (sum((uchar*)conf, conf->length) != 0) { + return 0; + } + *pmp = mp; + return conf; +} + +void mpinit(void) { + uchar *p, *e; + int ismp; + struct mp *mp; + struct mpconf *conf; + struct mpproc *proc; + struct mpioapic *ioapic; + + if ((conf = mpconfig(&mp)) == 0) { + panic("Expect to run on an SMP"); + } + ismp = 1; + lapic = (uint*)conf->lapicaddr; + for (p = (uchar*)(conf + 1), e = (uchar*)conf + conf->length; p < e;) { + switch (*p) { + case MPPROC: + proc = (struct mpproc*)p; + if (ncpu < NCPU) { + cpus[ncpu].apicid = proc->apicid; // apicid may differ from ncpu + ncpu++; + } + p += sizeof(struct mpproc); + continue; + case MPIOAPIC: + ioapic = (struct mpioapic*)p; + ioapicid = ioapic->apicno; + p += sizeof(struct mpioapic); + continue; + case MPBUS: + case MPIOINTR: + case MPLINTR: + p += 8; + continue; + default: + ismp = 0; + break; + } + } + if (!ismp) { + panic("Didn't find a suitable machine"); + } + + if (mp->imcrp) { + // Bochs doesn't support IMCR, so this doesn't run on Bochs. + // But it would on real hardware. + outb(0x22, 0x70); // Select IMCR + outb(0x23, inb(0x23) | 1); // Mask external interrupts. + } +} diff --git a/mp.h b/mp.h new file mode 100644 index 0000000..c316dde --- /dev/null +++ b/mp.h @@ -0,0 +1,56 @@ +// See MultiProcessor Specification Version 1.[14] + +struct mp { // floating pointer + uchar signature[4]; // "_MP_" + void *physaddr; // phys addr of MP config table + uchar length; // 1 + uchar specrev; // [14] + uchar checksum; // all bytes must add up to 0 + uchar type; // MP system config type + uchar imcrp; + uchar reserved[3]; +}; + +struct mpconf { // configuration table header + uchar signature[4]; // "PCMP" + ushort length; // total table length + uchar version; // [14] + uchar checksum; // all bytes must add up to 0 + uchar product[20]; // product id + uint *oemtable; // OEM table pointer + ushort oemlength; // OEM table length + ushort entry; // entry count + uint *lapicaddr; // address of local APIC + ushort xlength; // extended table length + uchar xchecksum; // extended table checksum + uchar reserved; +}; + +struct mpproc { // processor table entry + uchar type; // entry type (0) + uchar apicid; // local APIC id + uchar version; // local APIC verison + uchar flags; // CPU flags + #define MPBOOT 0x02 // This proc is the bootstrap processor. + uchar signature[4]; // CPU signature + uint feature; // feature flags from CPUID instruction + uchar reserved[8]; +}; + +struct mpioapic { // I/O APIC table entry + uchar type; // entry type (2) + uchar apicno; // I/O APIC id + uchar version; // I/O APIC version + uchar flags; // I/O APIC flags + uint *addr; // I/O APIC address +}; + +// Table entry types +#define MPPROC 0x00 // One per processor +#define MPBUS 0x01 // One per bus +#define MPIOAPIC 0x02 // One per I/O APIC +#define MPIOINTR 0x03 // One per bus interrupt source +#define MPLINTR 0x04 // One per system interrupt source + + + diff --git a/param.h b/param.h new file mode 100644 index 0000000..a7e90ef --- /dev/null +++ b/param.h @@ -0,0 +1,14 @@ +#define NPROC 64 // maximum number of processes +#define KSTACKSIZE 4096 // size of per-process kernel stack +#define NCPU 8 // maximum number of CPUs +#define NOFILE 16 // open files per process +#define NFILE 100 // open files per system +#define NINODE 50 // maximum number of active i-nodes +#define NDEV 10 // maximum major device number +#define ROOTDEV 1 // device number of file system root disk +#define MAXARG 32 // max exec arguments +#define MAXOPBLOCKS 10 // max # of blocks any FS op writes +#define LOGSIZE (MAXOPBLOCKS*3) // max data blocks in on-disk log +#define NBUF (MAXOPBLOCKS*3) // size of disk block cache +#define FSSIZE 1000 // size of file system in blocks + diff --git a/picirq.c b/picirq.c new file mode 100644 index 0000000..2abdc69 --- /dev/null +++ b/picirq.c @@ -0,0 +1,17 @@ +#include "types.h" +#include "x86.h" +#include "traps.h" + +// I/O Addresses of the two programmable interrupt controllers +#define IO_PIC1 0x20 // Master (IRQs 0-7) +#define IO_PIC2 0xA0 // Slave (IRQs 8-15) + +// Don't use the 8259A interrupt controllers. Xv6 assumes SMP hardware. +void picinit(void) { + // mask all interrupts + outb(IO_PIC1 + 1, 0xFF); + outb(IO_PIC2 + 1, 0xFF); +} + + + diff --git a/pipe.c b/pipe.c new file mode 100644 index 0000000..579d7f5 --- /dev/null +++ b/pipe.c @@ -0,0 +1,122 @@ +#include "types.h" +#include "defs.h" +#include "param.h" +#include "mmu.h" +#include "proc.h" +#include "fs.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "file.h" + +#define PIPESIZE 512 + +struct pipe { + struct spinlock lock; + char data[PIPESIZE]; + uint nread; // number of bytes read + uint nwrite; // number of bytes written + int readopen; // read fd is still open + int writeopen; // write fd is still open +}; + +void cleanuppipealloc(struct pipe *p, struct file **f0, struct file **f1) { + if (p) { + kfree((char*)p); + } + if (*f0) { + fileclose(*f0); + } + if (*f1) { + fileclose(*f1); + } +} + +int pipealloc(struct file **f0, struct file **f1) { + struct pipe *p; + + p = 0; + *f0 = *f1 = 0; + if ((*f0 = filealloc()) == 0 || (*f1 = filealloc()) == 0) { + cleanuppipealloc(p, f0, f1); + return -1; + } + if ((p = (struct pipe*)kalloc()) == 0) { + cleanuppipealloc(p, f0, f1); + return -1; + } + p->readopen = 1; + p->writeopen = 1; + p->nwrite = 0; + p->nread = 0; + initlock(&p->lock, "pipe"); + (*f0)->type = FD_PIPE; + (*f0)->readable = 1; + (*f0)->writable = 0; + (*f0)->pipe = p; + (*f1)->type = FD_PIPE; + (*f1)->readable = 0; + (*f1)->writable = 1; + (*f1)->pipe = p; + return 0; +} + +void pipeclose(struct pipe *p, int writable) { + acquire(&p->lock); + if (writable) { + p->writeopen = 0; + wakeup(&p->nread); + } + else { + p->readopen = 0; + wakeup(&p->nwrite); + } + if (p->readopen == 0 && p->writeopen == 0) { + release(&p->lock); + kfree((char*)p); + } + else { + release(&p->lock); + } +} + +int pipewrite(struct pipe *p, char *addr, int n) { + int i; + + acquire(&p->lock); + for (i = 0; i < n; i++) { + while (p->nwrite == p->nread + PIPESIZE) { //DOC: pipewrite-full + if (p->readopen == 0 || myproc()->killed) { + release(&p->lock); + return -1; + } + wakeup(&p->nread); + sleep(&p->nwrite, &p->lock); //DOC: pipewrite-sleep + } + p->data[p->nwrite++ % PIPESIZE] = addr[i]; + } + wakeup(&p->nread); //DOC: pipewrite-wakeup1 + release(&p->lock); + return n; +} + +int piperead(struct pipe *p, char *addr, int n) { + int i; + + acquire(&p->lock); + while (p->nread == p->nwrite && p->writeopen) { //DOC: pipe-empty + if (myproc()->killed) { + release(&p->lock); + return -1; + } + sleep(&p->nread, &p->lock); //DOC: piperead-sleep + } + for (i = 0; i < n; i++) { //DOC: piperead-copy + if (p->nread == p->nwrite) { + break; + } + addr[i] = p->data[p->nread++ % PIPESIZE]; + } + wakeup(&p->nwrite); //DOC: piperead-wakeup + release(&p->lock); + return i; +} diff --git a/printf.c b/printf.c new file mode 100644 index 0000000..6f3b091 --- /dev/null +++ b/printf.c @@ -0,0 +1,91 @@ +#include "types.h" +#include "stat.h" +#include "user.h" + +static void putc(int fd, char c) { + write(fd, &c, 1); +} + +static void printint(int fd, int xx, int base, int sgn) { + static char digits[] = "0123456789ABCDEF"; + char buf[16]; + int i, neg; + uint x; + + neg = 0; + if (sgn && xx < 0) { + neg = 1; + x = -xx; + } + else { + x = xx; + } + + i = 0; + do { + buf[i++] = digits[x % base]; + } + while ((x /= base) != 0); + if (neg) { + buf[i++] = '-'; + } + + while (--i >= 0) { + putc(fd, buf[i]); + } +} + +// Print to the given fd. Only understands %d, %x, %p, %s. +void printf(int fd, const char *fmt, ...) { + char *s; + int c, i, state; + uint *ap; + + state = 0; + ap = (uint*)(void*)&fmt + 1; + for (i = 0; fmt[i]; i++) { + c = fmt[i] & 0xff; + if (state == 0) { + if (c == '%') { + state = '%'; + } + else { + putc(fd, c); + } + } + else if (state == '%') { + if (c == 'd') { + printint(fd, *ap, 10, 1); + ap++; + } + else if (c == 'x' || c == 'p') { + printint(fd, *ap, 16, 0); + ap++; + } + else if (c == 's') { + s = (char*)*ap; + ap++; + if (s == 0) { + s = "(null)"; + } + while (*s != 0) { + putc(fd, *s); + s++; + } + } + else if (c == 'c') { + putc(fd, *ap); + ap++; + } + else if (c == '%') { + putc(fd, c); + } + else { + // Unknown % sequence. Print it to draw attention. + putc(fd, '%'); + putc(fd, c); + } + state = 0; + } + } +} diff --git a/proc.c b/proc.c new file mode 100644 index 0000000..dc3ea65 --- /dev/null +++ b/proc.c @@ -0,0 +1,527 @@ +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "x86.h" +#include "proc.h" +#include "spinlock.h" + +struct { + struct spinlock lock; + struct proc proc[NPROC]; +} ptable; + +static struct proc *initproc; + +int nextpid = 1; +extern void forkret(void); +extern void trapret(void); + +static void wakeup1(void *chan); + +void pinit(void) { + initlock(&ptable.lock, "ptable"); +} + +// Must be called with interrupts disabled +int cpuid() { + return mycpu() - cpus; +} + +// Must be called with interrupts disabled to avoid the caller being +// rescheduled between reading lapicid and running through the loop. +struct cpu*mycpu(void) { + int apicid, i; + + if (readeflags() & FL_IF) { + panic("mycpu called with interrupts enabled\n"); + } + + apicid = lapicid(); + // APIC IDs are not guaranteed to be contiguous. Maybe we should have + // a reverse map, or reserve a register to store &cpus[i]. + for (i = 0; i < ncpu; ++i) { + if (cpus[i].apicid == apicid) { + return &cpus[i]; + } + } + panic("unknown apicid\n"); +} + +// Disable interrupts so that we are not rescheduled +// while reading proc from the cpu structure +struct proc*myproc(void) { + struct cpu *c; + struct proc *p; + pushcli(); + c = mycpu(); + p = c->proc; + popcli(); + return p; +} + +// Look in the process table for an UNUSED proc. +// If found, change state to EMBRYO and initialize +// state required to run in the kernel. +// Otherwise return 0. +static struct proc* allocproc(void) { + struct proc *p; + char *sp; + int found = 0; + + acquire(&ptable.lock); + + p = ptable.proc; + while (p < &ptable.proc[NPROC] && !found) { + if (p->state == UNUSED) { + found = 1; + } + else { + p++; + } + + } + if (!found) { + release(&ptable.lock); + return 0; + } + + p->state = EMBRYO; + p->pid = nextpid++; + + release(&ptable.lock); + + // Allocate kernel stack. + if ((p->kstack = kalloc()) == 0) { + p->state = UNUSED; + return 0; + } + sp = p->kstack + KSTACKSIZE; + + // Leave room for trap frame. + sp -= sizeof *p->tf; + p->tf = (struct trapframe*)sp; + + // Set up new context to start executing at forkret, + // which returns to trapret. + sp -= 4; + *(uint*)sp = (uint)trapret; + + sp -= sizeof *p->context; + p->context = (struct context*)sp; + memset(p->context, 0, sizeof *p->context); + p->context->eip = (uint)forkret; + + return p; +} + +// Set up first user process. +void userinit(void) { + struct proc *p; + extern char _binary_initcode_start[], _binary_initcode_size[]; + + p = allocproc(); + + initproc = p; + if ((p->pgdir = setupkvm()) == 0) { + panic("userinit: out of memory?"); + } + inituvm(p->pgdir, _binary_initcode_start, (int)_binary_initcode_size); + p->sz = PGSIZE; + memset(p->tf, 0, sizeof(*p->tf)); + p->tf->cs = (SEG_UCODE << 3) | DPL_USER; + p->tf->ds = (SEG_UDATA << 3) | DPL_USER; + p->tf->es = p->tf->ds; + p->tf->ss = p->tf->ds; + p->tf->eflags = FL_IF; + p->tf->esp = PGSIZE; + p->tf->eip = 0; // beginning of initcode.S + + safestrcpy(p->name, "initcode", sizeof(p->name)); + p->cwd = namei("/"); + + // this assignment to p->state lets other cores + // run this process. the acquire forces the above + // writes to be visible, and the lock is also needed + // because the assignment might not be atomic. + acquire(&ptable.lock); + + p->state = RUNNABLE; + + release(&ptable.lock); +} + +// Grow current process's memory by n bytes. +// Return 0 on success, -1 on failure. +int growproc(int n) { + uint sz; + struct proc *curproc = myproc(); + + sz = curproc->sz; + if (n > 0) { + if ((sz = allocuvm(curproc->pgdir, sz, sz + n)) == 0) { + return -1; + } + } + else if (n < 0) { + if ((sz = deallocuvm(curproc->pgdir, sz, sz + n)) == 0) { + return -1; + } + } + curproc->sz = sz; + switchuvm(curproc); + return 0; +} + +// Create a new process copying p as the parent. +// Sets up stack to return as if from system call. +// Caller must set state of returned proc to RUNNABLE. +int fork(void) { + int i, pid; + struct proc *np; + struct proc *curproc = myproc(); + + // Allocate process. + if ((np = allocproc()) == 0) { + return -1; + } + + // Copy process state from proc. + if ((np->pgdir = copyuvm(curproc->pgdir, curproc->sz)) == 0) { + kfree(np->kstack); + np->kstack = 0; + np->state = UNUSED; + return -1; + } + np->sz = curproc->sz; + np->parent = curproc; + *np->tf = *curproc->tf; + + // Clear %eax so that fork returns 0 in the child. + np->tf->eax = 0; + + for (i = 0; i < NOFILE; i++) { + if (curproc->ofile[i]) { + np->ofile[i] = filedup(curproc->ofile[i]); + } + } + np->cwd = idup(curproc->cwd); + + safestrcpy(np->name, curproc->name, sizeof(curproc->name)); + + pid = np->pid; + + acquire(&ptable.lock); + + np->state = RUNNABLE; + + release(&ptable.lock); + + return pid; +} + +// Exit the current process. Does not return. +// An exited process remains in the zombie state +// until its parent calls wait() to find out it exited. +void exit(void) { + struct proc *curproc = myproc(); + struct proc *p; + int fd; + + if (curproc == initproc) { + panic("init exiting"); + } + + // Close all open files. + for (fd = 0; fd < NOFILE; fd++) { + if (curproc->ofile[fd]) { + fileclose(curproc->ofile[fd]); + curproc->ofile[fd] = 0; + } + } + + begin_op(); + iput(curproc->cwd); + end_op(); + curproc->cwd = 0; + + acquire(&ptable.lock); + + // Parent might be sleeping in wait(). + wakeup1(curproc->parent); + + // Pass abandoned children to init. + for (p = ptable.proc; p < &ptable.proc[NPROC]; p++) { + if (p->parent == curproc) { + p->parent = initproc; + if (p->state == ZOMBIE) { + wakeup1(initproc); + } + } + } + + // Jump into the scheduler, never to return. + curproc->state = ZOMBIE; + sched(); + panic("zombie exit"); +} + +// Wait for a child process to exit and return its pid. +// Return -1 if this process has no children. +int wait(void) { + struct proc *p; + int havekids, pid; + struct proc *curproc = myproc(); + + acquire(&ptable.lock); + for (;;) { + // Scan through table looking for exited children. + havekids = 0; + for (p = ptable.proc; p < &ptable.proc[NPROC]; p++) { + if (p->parent != curproc) { + continue; + } + havekids = 1; + if (p->state == ZOMBIE) { + // Found one. + pid = p->pid; + kfree(p->kstack); + p->kstack = 0; + freevm(p->pgdir); + p->pid = 0; + p->parent = 0; + p->name[0] = 0; + p->killed = 0; + p->state = UNUSED; + release(&ptable.lock); + return pid; + } + } + + // No point waiting if we don't have any children. + if (!havekids || curproc->killed) { + release(&ptable.lock); + return -1; + } + + // Wait for children to exit. (See wakeup1 call in proc_exit.) + sleep(curproc, &ptable.lock); //DOC: wait-sleep + } +} + +// Per-CPU process scheduler. +// Each CPU calls scheduler() after setting itself up. +// Scheduler never returns. It loops, doing: +// - choose a process to run +// - swtch to start running that process +// - eventually that process transfers control +// via swtch back to the scheduler. +void scheduler(void) { + struct proc *p; + struct cpu *c = mycpu(); + c->proc = 0; + + for (;;) { + // Enable interrupts on this processor. + sti(); + + // Loop over process table looking for process to run. + acquire(&ptable.lock); + for (p = ptable.proc; p < &ptable.proc[NPROC]; p++) { + if (p->state != RUNNABLE) { + continue; + } + + // Switch to chosen process. It is the process's job + // to release ptable.lock and then reacquire it + // before jumping back to us. + c->proc = p; + switchuvm(p); + p->state = RUNNING; + + swtch(&(c->scheduler), p->context); + switchkvm(); + + // Process is done running for now. + // It should have changed its p->state before coming back. + c->proc = 0; + } + release(&ptable.lock); + + } +} + +// Enter scheduler. Must hold only ptable.lock +// and have changed proc->state. Saves and restores +// intena because intena is a property of this +// kernel thread, not this CPU. It should +// be proc->intena and proc->ncli, but that would +// break in the few places where a lock is held but +// there's no process. +void sched(void) { + int intena; + struct proc *p = myproc(); + + if (!holding(&ptable.lock)) { + panic("sched ptable.lock"); + } + if (mycpu()->ncli != 1) { + panic("sched locks"); + } + if (p->state == RUNNING) { + panic("sched running"); + } + if (readeflags() & FL_IF) { + panic("sched interruptible"); + } + intena = mycpu()->intena; + swtch(&p->context, mycpu()->scheduler); + mycpu()->intena = intena; +} + +// Give up the CPU for one scheduling round. +void yield(void) { + acquire(&ptable.lock); //DOC: yieldlock + myproc()->state = RUNNABLE; + sched(); + release(&ptable.lock); +} + +// A fork child's very first scheduling by scheduler() +// will swtch here. "Return" to user space. +void forkret(void) { + static int first = 1; + // Still holding ptable.lock from scheduler. + release(&ptable.lock); + + if (first) { + // Some initialization functions must be run in the context + // of a regular process (e.g., they call sleep), and thus cannot + // be run from main(). + first = 0; + iinit(ROOTDEV); + initlog(ROOTDEV); + } + + // Return to "caller", actually trapret (see allocproc). +} + +// Atomically release lock and sleep on chan. +// Reacquires lock when awakened. +void sleep(void *chan, struct spinlock *lk) { + struct proc *p = myproc(); + + if (p == 0) { + panic("sleep"); + } + + if (lk == 0) { + panic("sleep without lk"); + } + + // Must acquire ptable.lock in order to + // change p->state and then call sched. + // Once we hold ptable.lock, we can be + // guaranteed that we won't miss any wakeup + // (wakeup runs with ptable.lock locked), + // so it's okay to release lk. + if (lk != &ptable.lock) { //DOC: sleeplock0 + acquire(&ptable.lock); //DOC: sleeplock1 + release(lk); + } + // Go to sleep. + p->chan = chan; + p->state = SLEEPING; + + sched(); + + // Tidy up. + p->chan = 0; + + // Reacquire original lock. + if (lk != &ptable.lock) { //DOC: sleeplock2 + release(&ptable.lock); + acquire(lk); + } +} + + +// Wake up all processes sleeping on chan. +// The ptable lock must be held. +static void wakeup1(void *chan) { + struct proc *p; + + for (p = ptable.proc; p < &ptable.proc[NPROC]; p++) { + if (p->state == SLEEPING && p->chan == chan) { + p->state = RUNNABLE; + } + } +} + +// Wake up all processes sleeping on chan. +void wakeup(void *chan) { + acquire(&ptable.lock); + wakeup1(chan); + release(&ptable.lock); +} + +// Kill the process with the given pid. +// Process won't exit until it returns +// to user space (see trap in trap.c). +int kill(int pid) { + struct proc *p; + + acquire(&ptable.lock); + for (p = ptable.proc; p < &ptable.proc[NPROC]; p++) { + if (p->pid == pid) { + p->killed = 1; + // Wake process from sleep if necessary. + if (p->state == SLEEPING) { + p->state = RUNNABLE; + } + release(&ptable.lock); + return 0; + } + } + release(&ptable.lock); + return -1; +} + +// Print a process listing to console. For debugging. +// Runs when user types ^P on console. +// No lock to avoid wedging a stuck machine further. +void procdump(void) { + static char *states[] = { + [UNUSED] "unused", + [EMBRYO] "embryo", + [SLEEPING] "sleep ", + [RUNNABLE] "runble", + [RUNNING] "run ", + [ZOMBIE] "zombie" + }; + int i; + struct proc *p; + char *state; + uint pc[10]; + + for (p = ptable.proc; p < &ptable.proc[NPROC]; p++) { + if (p->state == UNUSED) { + continue; + } + if (p->state >= 0 && p->state < NELEM(states) && states[p->state]) { + state = states[p->state]; + } + else { + state = "???"; + } + cprintf("%d %s %s", p->pid, state, p->name); + if (p->state == SLEEPING) { + getcallerpcs((uint*)p->context->ebp + 2, pc); + for (i = 0; i < 10 && pc[i] != 0; i++) { + cprintf(" %p", pc[i]); + } + } + cprintf("\n"); + } +} diff --git a/proc.h b/proc.h new file mode 100644 index 0000000..bed7825 --- /dev/null +++ b/proc.h @@ -0,0 +1,57 @@ +// Per-CPU state +struct cpu { + uchar apicid; // Local APIC ID + struct context *scheduler; // swtch() here to enter scheduler + struct taskstate ts; // Used by x86 to find stack for interrupt + struct segdesc gdt[NSEGS]; // x86 global descriptor table + volatile uint started; // Has the CPU started? + int ncli; // Depth of pushcli nesting. + int intena; // Were interrupts enabled before pushcli? + struct proc *proc; // The process running on this cpu or null +}; + +extern struct cpu cpus[NCPU]; +extern int ncpu; + +// Saved registers for kernel context switches. +// Don't need to save all the segment registers (%cs, etc), +// because they are constant across kernel contexts. +// Don't need to save %eax, %ecx, %edx, because the +// x86 convention is that the caller has saved them. +// Contexts are stored at the bottom of the stack they +// describe; the stack pointer is the address of the context. +// The layout of the context matches the layout of the stack in swtch.S +// at the "Switch stacks" comment. Switch doesn't save eip explicitly, +// but it is on the stack and allocproc() manipulates it. +struct context { + uint edi; + uint esi; + uint ebx; + uint ebp; + uint eip; +}; + +enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; + +// Per-process state +struct proc { + uint sz; // Size of process memory (bytes) + pde_t* pgdir; // Page table + char *kstack; // Bottom of kernel stack for this process + enum procstate state; // Process state + int pid; // Process ID + struct proc *parent; // Parent process + struct trapframe *tf; // Trap frame for current syscall + struct context *context; // swtch() here to run process + void *chan; // If non-zero, sleeping on chan + int killed; // If non-zero, have been killed + struct file *ofile[NOFILE]; // Open files + struct inode *cwd; // Current directory + char name[16]; // Process name (debugging) +}; + +// Process memory is laid out contiguously, low addresses first: +// text +// original data and bss +// fixed-size stack +// expandable heap diff --git a/rm.c b/rm.c new file mode 100644 index 0000000..022afba --- /dev/null +++ b/rm.c @@ -0,0 +1,21 @@ +#include "types.h" +#include "stat.h" +#include "user.h" + +int main(int argc, char *argv[]) { + int i; + + if (argc < 2) { + printf(2, "Usage: rm files...\n"); + exit(); + } + + for (i = 1; i < argc; i++) { + if (unlink(argv[i]) < 0) { + printf(2, "rm: %s failed to delete\n", argv[i]); + break; + } + } + + exit(); +} diff --git a/sh.c b/sh.c new file mode 100644 index 0000000..1cf99a6 --- /dev/null +++ b/sh.c @@ -0,0 +1,482 @@ +// Shell. + +#include "types.h" +#include "user.h" +#include "fcntl.h" + +// Parsed command representation +#define EXEC 1 +#define REDIR 2 +#define PIPE 3 +#define LIST 4 +#define BACK 5 + +#define MAXARGS 10 + +struct cmd { + int type; +}; + +struct execcmd { + int type; + char *argv[MAXARGS]; + char *eargv[MAXARGS]; +}; + +struct redircmd { + int type; + struct cmd *cmd; + char *file; + char *efile; + int mode; + int fd; +}; + +struct pipecmd { + int type; + struct cmd *left; + struct cmd *right; +}; + +struct listcmd { + int type; + struct cmd *left; + struct cmd *right; +}; + +struct backcmd { + int type; + struct cmd *cmd; +}; + +int fork1(void); // Fork but panics on failure. +void panic(char*); +struct cmd *parsecmd(char*); + +// Execute cmd. Never returns. +void runcmd(struct cmd *cmd) { + int p[2]; + struct backcmd *bcmd; + struct execcmd *ecmd; + struct listcmd *lcmd; + struct pipecmd *pcmd; + struct redircmd *rcmd; + + if (cmd == 0) { + exit(); + } + + switch (cmd->type) { + default: + panic("runcmd"); + + case EXEC: + ecmd = (struct execcmd*)cmd; + if (ecmd->argv[0] == 0) { + exit(); + } + exec(ecmd->argv[0], ecmd->argv); + printf(2, "exec %s failed\n", ecmd->argv[0]); + break; + + case REDIR: + rcmd = (struct redircmd*)cmd; + close(rcmd->fd); + if (open(rcmd->file, rcmd->mode) < 0) { + printf(2, "open %s failed\n", rcmd->file); + exit(); + } + runcmd(rcmd->cmd); + break; + + case LIST: + lcmd = (struct listcmd*)cmd; + if (fork1() == 0) { + runcmd(lcmd->left); + } + wait(); + runcmd(lcmd->right); + break; + + case PIPE: + pcmd = (struct pipecmd*)cmd; + if (pipe(p) < 0) { + panic("pipe"); + } + if (fork1() == 0) { + close(1); + dup(p[1]); + close(p[0]); + close(p[1]); + runcmd(pcmd->left); + } + if (fork1() == 0) { + close(0); + dup(p[0]); + close(p[0]); + close(p[1]); + runcmd(pcmd->right); + } + close(p[0]); + close(p[1]); + wait(); + wait(); + break; + + case BACK: + bcmd = (struct backcmd*)cmd; + if (fork1() == 0) { + runcmd(bcmd->cmd); + } + break; + } + exit(); +} + +int getcmd(char *buf, int nbuf) { + printf(2, "$ "); + memset(buf, 0, nbuf); + gets(buf, nbuf); + if (buf[0] == 0) { // EOF + return -1; + } + return 0; +} + +int main(int argc, char* argv[]) { + static char buf[100]; + int fd; + + // Ensure that three file descriptors are open. + while ((fd = open("console", O_RDWR)) >= 0) { + if (fd >= 3) { + close(fd); + break; + } + } + + // Read and run input commands. + while (getcmd(buf, sizeof(buf)) >= 0) { + if (buf[0] == 'e' && buf[1] == 'x' && buf[2] == 'i' && buf[3] == 't') { + exit(); + } + if (buf[0] == 'c' && buf[1] == 'd' && buf[2] == ' ') { + // Chdir must be called by the parent, not the child. + buf[strlen(buf) - 1] = 0; // chop \n + if (chdir(buf + 3) < 0) { + printf(2, "cannot cd %s\n", buf + 3); + } + continue; + } + if (fork1() == 0) { + runcmd(parsecmd(buf)); + } + wait(); + } + exit(); +} + +void panic(char *s) { + printf(2, "%s\n", s); + exit(); +} + +int fork1(void) { + int pid; + + pid = fork(); + if (pid == -1) { + panic("fork"); + } + return pid; +} + + +// Constructors + +struct cmd* execcmd(void) { + struct execcmd *cmd; + + cmd = malloc(sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->type = EXEC; + return (struct cmd*)cmd; +} + +struct cmd* redircmd(struct cmd *subcmd, char *file, char *efile, int mode, int fd) { + struct redircmd *cmd; + + cmd = malloc(sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->type = REDIR; + cmd->cmd = subcmd; + cmd->file = file; + cmd->efile = efile; + cmd->mode = mode; + cmd->fd = fd; + return (struct cmd*)cmd; +} + +struct cmd* pipecmd(struct cmd *left, struct cmd *right) { + struct pipecmd *cmd; + + cmd = malloc(sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->type = PIPE; + cmd->left = left; + cmd->right = right; + return (struct cmd*)cmd; +} + +struct cmd* listcmd(struct cmd *left, struct cmd *right) { + struct listcmd *cmd; + + cmd = malloc(sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->type = LIST; + cmd->left = left; + cmd->right = right; + return (struct cmd*)cmd; +} + +struct cmd* backcmd(struct cmd *subcmd) { + struct backcmd *cmd; + + cmd = malloc(sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->type = BACK; + cmd->cmd = subcmd; + return (struct cmd*)cmd; +} + +// Parsing + +char whitespace[] = " \t\r\n\v"; +char symbols[] = "<|>&;()"; + +int gettoken(char **ps, char *es, char **q, char **eq) { + char *s; + int ret; + + s = *ps; + while (s < es && strchr(whitespace, *s)) { + s++; + } + if (q) { + *q = s; + } + ret = *s; + switch (*s) { + case 0: + break; + case '|': + case '(': + case ')': + case ';': + case '&': + case '<': + s++; + break; + case '>': + s++; + if (*s == '>') { + ret = '+'; + s++; + } + break; + default: + ret = 'a'; + while (s < es && !strchr(whitespace, *s) && !strchr(symbols, *s)) { + s++; + } + break; + } + if (eq) { + *eq = s; + } + + while (s < es && strchr(whitespace, *s)) { + s++; + } + *ps = s; + return ret; +} + +int peek(char **ps, char *es, char *toks) { + char *s; + + s = *ps; + while (s < es && strchr(whitespace, *s)) { + s++; + } + *ps = s; + return *s && strchr(toks, *s); +} + +struct cmd *parseline(char**, char*); +struct cmd *parsepipe(char**, char*); +struct cmd *parseexec(char**, char*); +struct cmd *nulterminate(struct cmd*); + +struct cmd* parsecmd(char *s) { + char *es; + struct cmd *cmd; + + es = s + strlen(s); + cmd = parseline(&s, es); + peek(&s, es, ""); + if (s != es) { + printf(2, "leftovers: %s\n", s); + panic("syntax"); + } + nulterminate(cmd); + return cmd; +} + +struct cmd* parseline(char **ps, char *es) { + struct cmd *cmd; + + cmd = parsepipe(ps, es); + while (peek(ps, es, "&")) { + gettoken(ps, es, 0, 0); + cmd = backcmd(cmd); + } + if (peek(ps, es, ";")) { + gettoken(ps, es, 0, 0); + cmd = listcmd(cmd, parseline(ps, es)); + } + return cmd; +} + +struct cmd* parsepipe(char **ps, char *es) { + struct cmd *cmd; + + cmd = parseexec(ps, es); + if (peek(ps, es, "|")) { + gettoken(ps, es, 0, 0); + cmd = pipecmd(cmd, parsepipe(ps, es)); + } + return cmd; +} + +struct cmd* parseredirs(struct cmd *cmd, char **ps, char *es) { + int tok; + char *q, *eq; + + while (peek(ps, es, "<>")) { + tok = gettoken(ps, es, 0, 0); + if (gettoken(ps, es, &q, &eq) != 'a') { + panic("missing file for redirection"); + } + switch (tok) { + case '<': + cmd = redircmd(cmd, q, eq, O_RDONLY, 0); + break; + case '>': + cmd = redircmd(cmd, q, eq, O_WRONLY | O_CREATE, 1); + break; + case '+': // >> + cmd = redircmd(cmd, q, eq, O_WRONLY | O_CREATE, 1); + break; + } + } + return cmd; +} + +struct cmd* parseblock(char **ps, char *es) { + struct cmd *cmd; + + if (!peek(ps, es, "(")) { + panic("parseblock"); + } + gettoken(ps, es, 0, 0); + cmd = parseline(ps, es); + if (!peek(ps, es, ")")) { + panic("syntax - missing )"); + } + gettoken(ps, es, 0, 0); + cmd = parseredirs(cmd, ps, es); + return cmd; +} + +struct cmd* parseexec(char **ps, char *es) { + char *q, *eq; + int tok, argc; + struct execcmd *cmd; + struct cmd *ret; + + if (peek(ps, es, "(")) { + return parseblock(ps, es); + } + + ret = execcmd(); + cmd = (struct execcmd*)ret; + + argc = 0; + ret = parseredirs(ret, ps, es); + while (!peek(ps, es, "|)&;")) { + if ((tok = gettoken(ps, es, &q, &eq)) == 0) { + break; + } + if (tok != 'a') { + panic("syntax"); + } + cmd->argv[argc] = q; + cmd->eargv[argc] = eq; + argc++; + if (argc >= MAXARGS) { + panic("too many args"); + } + ret = parseredirs(ret, ps, es); + } + cmd->argv[argc] = 0; + cmd->eargv[argc] = 0; + return ret; +} + +// NUL-terminate all the counted strings. +struct cmd* nulterminate(struct cmd *cmd) { + int i; + struct backcmd *bcmd; + struct execcmd *ecmd; + struct listcmd *lcmd; + struct pipecmd *pcmd; + struct redircmd *rcmd; + + if (cmd == 0) { + return 0; + } + + switch (cmd->type) { + case EXEC: + ecmd = (struct execcmd*)cmd; + for (i = 0; ecmd->argv[i]; i++) { + *ecmd->eargv[i] = 0; + } + break; + + case REDIR: + rcmd = (struct redircmd*)cmd; + nulterminate(rcmd->cmd); + *rcmd->efile = 0; + break; + + case PIPE: + pcmd = (struct pipecmd*)cmd; + nulterminate(pcmd->left); + nulterminate(pcmd->right); + break; + + case LIST: + lcmd = (struct listcmd*)cmd; + nulterminate(lcmd->left); + nulterminate(lcmd->right); + break; + + case BACK: + bcmd = (struct backcmd*)cmd; + nulterminate(bcmd->cmd); + break; + } + return cmd; +} diff --git a/sign.pl b/sign.pl new file mode 100755 index 0000000..d793035 --- /dev/null +++ b/sign.pl @@ -0,0 +1,19 @@ +#!/usr/bin/perl + +open(SIG, $ARGV[0]) || die "open $ARGV[0]: $!"; + +$n = sysread(SIG, $buf, 1000); + +if($n > 510){ + print STDERR "boot block too large: $n bytes (max 510)\n"; + exit 1; +} + +print STDERR "boot block is $n bytes (max 510)\n"; + +$buf .= "\0" x (510-$n); +$buf .= "\x55\xAA"; + +open(SIG, ">$ARGV[0]") || die "open >$ARGV[0]: $!"; +print SIG $buf; +close SIG; diff --git a/sleeplock.c b/sleeplock.c new file mode 100644 index 0000000..f45e910 --- /dev/null +++ b/sleeplock.c @@ -0,0 +1,48 @@ +// Sleeping locks + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "x86.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "spinlock.h" +#include "sleeplock.h" + +void initsleeplock(struct sleeplock *lk, char *name) { + initlock(&lk->lk, "sleep lock"); + lk->name = name; + lk->locked = 0; + lk->pid = 0; +} + +void acquiresleep(struct sleeplock *lk) { + acquire(&lk->lk); + while (lk->locked) { + sleep(lk, &lk->lk); + } + lk->locked = 1; + lk->pid = myproc()->pid; + release(&lk->lk); +} + +void releasesleep(struct sleeplock *lk) { + acquire(&lk->lk); + lk->locked = 0; + lk->pid = 0; + wakeup(lk); + release(&lk->lk); +} + +int holdingsleep(struct sleeplock *lk) { + int r; + + acquire(&lk->lk); + r = lk->locked && (lk->pid == myproc()->pid); + release(&lk->lk); + return r; +} + + + diff --git a/sleeplock.h b/sleeplock.h new file mode 100644 index 0000000..832e871 --- /dev/null +++ b/sleeplock.h @@ -0,0 +1,10 @@ +// Long-term locks for processes +struct sleeplock { + uint locked; // Is the lock held? + struct spinlock lk; // spinlock protecting this sleep lock + + // For debugging: + char *name; // Name of lock. + int pid; // Process holding lock +}; + diff --git a/spinlock.c b/spinlock.c new file mode 100644 index 0000000..f0b345f --- /dev/null +++ b/spinlock.c @@ -0,0 +1,121 @@ +// Mutual exclusion spin locks. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "x86.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "spinlock.h" + +void initlock(struct spinlock *lk, char *name) { + lk->name = name; + lk->locked = 0; + lk->cpu = 0; +} + +// Acquire the lock. +// Loops (spins) until the lock is acquired. +// Holding a lock for a long time may cause +// other CPUs to waste time spinning to acquire it. +void acquire(struct spinlock *lk) { + pushcli(); // disable interrupts to avoid deadlock. + if (holding(lk)) { + panic("acquire"); + } + + // The xchg is atomic. + while (xchg(&lk->locked, 1) != 0) { + ; + } + + // Tell the C compiler and the processor to not move loads or stores + // past this point, to ensure that the critical section's memory + // references happen after the lock is acquired. + __sync_synchronize(); + + // Record info about lock acquisition for debugging. + lk->cpu = mycpu(); + getcallerpcs(&lk, lk->pcs); +} + +// Release the lock. +void release(struct spinlock *lk) { + if (!holding(lk)) { + panic("release"); + } + + lk->pcs[0] = 0; + lk->cpu = 0; + + // Tell the C compiler and the processor to not move loads or stores + // past this point, to ensure that all the stores in the critical + // section are visible to other cores before the lock is released. + // Both the C compiler and the hardware may re-order loads and + // stores; __sync_synchronize() tells them both not to. + __sync_synchronize(); + + // Release the lock, equivalent to lk->locked = 0. + // This code can't use a C assignment, since it might + // not be atomic. A real OS would use C atomics here. + asm volatile ("movl $0, %0" : "+m" (lk->locked) :); + + popcli(); +} + +// Record the current call stack in pcs[] by following the %ebp chain. +void getcallerpcs(void *v, uint pcs[]) { + uint *ebp; + int i; + + ebp = (uint*)v - 2; + for (i = 0; i < 10; i++) { + if (ebp == 0 || ebp < (uint*)KERNBASE || ebp == (uint*)0xffffffff) { + break; + } + pcs[i] = ebp[1]; // saved %eip + ebp = (uint*)ebp[0]; // saved %ebp + } + for (; i < 10; i++) { + pcs[i] = 0; + } +} + +// Check whether this cpu is holding the lock. +int holding(struct spinlock *lock) { + int r; + pushcli(); + r = lock->locked && lock->cpu == mycpu(); + popcli(); + return r; +} + + +// Pushcli/popcli are like cli/sti except that they are matched: +// it takes two popcli to undo two pushcli. Also, if interrupts +// are off, then pushcli, popcli leaves them off. + +void pushcli(void) { + int eflags; + + eflags = readeflags(); + cli(); + if (mycpu()->ncli == 0) { + mycpu()->intena = eflags & FL_IF; + } + mycpu()->ncli += 1; +} + +void popcli(void) { + if (readeflags() & FL_IF) { + panic("popcli - interruptible"); + } + if (--mycpu()->ncli < 0) { + panic("popcli"); + } + if (mycpu()->ncli == 0 && mycpu()->intena) { + sti(); + } +} + diff --git a/spinlock.h b/spinlock.h new file mode 100644 index 0000000..d719bac --- /dev/null +++ b/spinlock.h @@ -0,0 +1,11 @@ +// Mutual exclusion lock. +struct spinlock { + uint locked; // Is the lock held? + + // For debugging: + char *name; // Name of lock. + struct cpu *cpu; // The cpu holding the lock. + uint pcs[10]; // The call stack (an array of program counters) + // that locked the lock. +}; + diff --git a/stat.h b/stat.h new file mode 100644 index 0000000..e54ba86 --- /dev/null +++ b/stat.h @@ -0,0 +1,11 @@ +#define T_DIR 1 // Directory +#define T_FILE 2 // File +#define T_DEV 3 // Device + +struct stat { + short type; // Type of file + int dev; // File system's disk device + uint ino; // Inode number + short nlink; // Number of links to file + uint size; // Size of file in bytes +}; diff --git a/stressfs.c b/stressfs.c new file mode 100644 index 0000000..87f10c1 --- /dev/null +++ b/stressfs.c @@ -0,0 +1,51 @@ +// Demonstrate that moving the "acquire" in iderw after the loop that +// appends to the idequeue results in a race. + +// For this to work, you should also add a spin within iderw's +// idequeue traversal loop. Adding the following demonstrated a panic +// after about 5 runs of stressfs in QEMU on a 2.1GHz CPU: +// for (i = 0; i < 40000; i++) +// asm volatile(""); + +#include "types.h" +#include "stat.h" +#include "user.h" +#include "fs.h" +#include "fcntl.h" + +int main(int argc, char *argv[]) { + int fd, i; + char path[] = "stressfs0"; + char data[512]; + + printf(1, "stressfs starting\n"); + memset(data, 'a', sizeof(data)); + + for (i = 0; i < 4; i++) { + if (fork() > 0) { + break; + } + } + + printf(1, "write %d\n", i); + + path[8] += i; + fd = open(path, O_CREATE | O_RDWR); + for (i = 0; i < 20; i++) { +// printf(fd, "%d\n", i); + write(fd, data, sizeof(data)); + } + close(fd); + + printf(1, "read\n"); + + fd = open(path, O_RDONLY); + for (i = 0; i < 20; i++) { + read(fd, data, sizeof(data)); + } + close(fd); + + wait(); + + exit(); +} diff --git a/string.c b/string.c new file mode 100644 index 0000000..ffdf897 --- /dev/null +++ b/string.c @@ -0,0 +1,103 @@ +#include "types.h" +#include "x86.h" + +void* memset(void *dst, int c, uint n) { + if ((int)dst % 4 == 0 && n % 4 == 0) { + c &= 0xFF; + stosl(dst, (c << 24) | (c << 16) | (c << 8) | c, n / 4); + } + else { + stosb(dst, c, n); + } + return dst; +} + +int memcmp(const void *v1, const void *v2, uint n) { + const uchar *s1, *s2; + + s1 = v1; + s2 = v2; + while (n-- > 0) { + if (*s1 != *s2) { + return *s1 - *s2; + } + s1++, s2++; + } + + return 0; +} + +void* memmove(void *dst, const void *src, uint n) { + const char *s; + char *d; + + s = src; + d = dst; + if (s < d && s + n > d) { + s += n; + d += n; + while (n-- > 0) { + *--d = *--s; + } + } + else { + while (n-- > 0) { + *d++ = *s++; + } + } + + return dst; +} + +// memcpy exists to placate GCC. Use memmove. +void* memcpy(void *dst, const void *src, uint n) { + return memmove(dst, src, n); +} + +int strncmp(const char *p, const char *q, uint n) { + while (n > 0 && *p && *p == *q) { + n--, p++, q++; + } + if (n == 0) { + return 0; + } + return (uchar) * p - (uchar) * q; +} + +char* strncpy(char *s, const char *t, int n) { + char *os; + + os = s; + while (n-- > 0 && (*s++ = *t++) != 0) { + ; + } + while (n-- > 0) { + *s++ = 0; + } + return os; +} + +// Like strncpy but guaranteed to NUL-terminate. +char* safestrcpy(char *s, const char *t, int n) { + char *os; + + os = s; + if (n <= 0) { + return os; + } + while (--n > 0 && (*s++ = *t++) != 0) { + ; + } + *s = 0; + return os; +} + +int strlen(const char *s) { + int n; + + for (n = 0; s[n]; n++) { + ; + } + return n; +} + diff --git a/swtch.S b/swtch.S new file mode 100644 index 0000000..c395b87 --- /dev/null +++ b/swtch.S @@ -0,0 +1,29 @@ +# Context switch +# +# void swtch(struct context **old, struct context *new); +# +# Save the current registers on the stack, creating +# a struct context, and save its address in *old. +# Switch stacks to new and pop previously-saved registers. + +.globl swtch +swtch: + movl 4(%esp), %eax + movl 8(%esp), %edx + + # Save old callee-saved registers + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + # Switch stacks + movl %esp, (%eax) + movl %edx, %esp + + # Load new callee-saved registers + popl %edi + popl %esi + popl %ebx + popl %ebp + ret diff --git a/syscall.c b/syscall.c new file mode 100644 index 0000000..5086c9a --- /dev/null +++ b/syscall.c @@ -0,0 +1,95 @@ +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "x86.h" +#include "syscall.h" +#include "syscalltable.h" + +// User code makes a system call with INT T_SYSCALL. +// System call number in %eax. +// Arguments on the stack, from the user call to the C +// library system call function. The saved user %esp points +// to a saved program counter, and then the first argument. + +// Fetch the int at addr from the current process. +int fetchint(uint addr, int *ip) { + struct proc *curproc = myproc(); + + if (addr >= curproc->sz || addr + 4 > curproc->sz) { + return -1; + } + *ip = *(int*)(addr); + return 0; +} + +// Fetch the nul-terminated string at addr from the current process. +// Doesn't actually copy the string - just sets *pp to point at it. +// Returns length of string, not including nul. +int fetchstr(uint addr, char **pp) { + char *s, *ep; + struct proc *curproc = myproc(); + + if (addr >= curproc->sz) { + return -1; + } + *pp = (char*)addr; + ep = (char*)curproc->sz; + for (s = *pp; s < ep; s++) { + if (*s == 0) { + return s - *pp; + } + } + return -1; +} + +// Fetch the nth 32-bit system call argument. +int argint(int n, int *ip) { + return fetchint((myproc()->tf->esp) + 4 + 4 * n, ip); +} + +// Fetch the nth word-sized system call argument as a pointer +// to a block of memory of size bytes. Check that the pointer +// lies within the process address space. +int argptr(int n, char **pp, int size) { + int i; + struct proc *curproc = myproc(); + + if (argint(n, &i) < 0) { + return -1; + } + if (size < 0 || (uint)i >= curproc->sz || (uint)i + size > curproc->sz) { + return -1; + } + *pp = (char*)i; + return 0; +} + +// Fetch the nth word-sized system call argument as a string pointer. +// Check that the pointer is valid and the string is nul-terminated. +// (There is no shared writable memory, so the string can't change +// between this check and being used by the kernel.) +int argstr(int n, char **pp) { + int addr; + if (argint(n, &addr) < 0) { + return -1; + } + return fetchstr(addr, pp); +} + +void syscall(void) { + int num; + struct proc *curproc = myproc(); + + num = curproc->tf->eax; + if (num > 0 && num < NELEM(syscalls) && syscalls[num]) { + curproc->tf->eax = syscalls[num](); + } + else { + cprintf("%d %s: unknown sys call %d\n", + curproc->pid, curproc->name, num); + curproc->tf->eax = -1; + } +} diff --git a/sysfile.c b/sysfile.c new file mode 100644 index 0000000..faf89dc --- /dev/null +++ b/sysfile.c @@ -0,0 +1,450 @@ +// +// File-system system calls. +// Mostly argument checking, since we don't trust +// user code, and calls into file.c and fs.c. +// + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "stat.h" +#include "mmu.h" +#include "proc.h" +#include "fs.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "file.h" +#include "fcntl.h" + +// Fetch the nth word-sized system call argument as a file descriptor +// and return both the descriptor and the corresponding struct file. +static int argfd(int n, int *pfd, struct file **pf) { + int fd; + struct file *f; + + if (argint(n, &fd) < 0) { + return -1; + } + if (fd < 0 || fd >= NOFILE || (f = myproc()->ofile[fd]) == 0) { + return -1; + } + if (pfd) { + *pfd = fd; + } + if (pf) { + *pf = f; + } + return 0; +} + +// Allocate a file descriptor for the given file. +// Takes over file reference from caller on success. +static int fdalloc(struct file *f) { + int fd; + struct proc *curproc = myproc(); + + for (fd = 0; fd < NOFILE; fd++) { + if (curproc->ofile[fd] == 0) { + curproc->ofile[fd] = f; + return fd; + } + } + return -1; +} + +int sys_dup(void) { + struct file *f; + int fd; + + if (argfd(0, 0, &f) < 0) { + return -1; + } + if ((fd = fdalloc(f)) < 0) { + return -1; + } + filedup(f); + return fd; +} + +int sys_read(void) { + struct file *f; + int n; + char *p; + + if (argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argptr(1, &p, n) < 0) { + return -1; + } + return fileread(f, p, n); +} + +int sys_write(void) { + struct file *f; + int n; + char *p; + + if (argfd(0, 0, &f) < 0 || argint(2, &n) < 0 || argptr(1, &p, n) < 0) { + return -1; + } + return filewrite(f, p, n); +} + +int sys_close(void) { + int fd; + struct file *f; + + if (argfd(0, &fd, &f) < 0) { + return -1; + } + myproc()->ofile[fd] = 0; + fileclose(f); + return 0; +} + +int sys_fstat(void) { + struct file *f; + struct stat *st; + + if (argfd(0, 0, &f) < 0 || argptr(1, (void*)&st, sizeof(*st)) < 0) { + return -1; + } + return filestat(f, st); +} + +void cleanupsyslink(struct inode * ip) { + ilock(ip); + ip->nlink--; + iupdate(ip); + iunlockput(ip); + end_op(); +} + +// Create the path new as a link to the same inode as old. +int sys_link(void) { + char name[DIRSIZ], *new, *old; + struct inode *dp, *ip; + + if (argstr(0, &old) < 0 || argstr(1, &new) < 0) { + return -1; + } + + begin_op(); + if ((ip = namei(old)) == 0) { + end_op(); + return -1; + } + + ilock(ip); + if (ip->type == T_DIR) { + iunlockput(ip); + end_op(); + return -1; + } + + ip->nlink++; + iupdate(ip); + iunlock(ip); + + if ((dp = nameiparent(new, name)) == 0) { + cleanupsyslink(ip); + return -1; + } + ilock(dp); + if (dp->dev != ip->dev || dirlink(dp, name, ip->inum) < 0) { + iunlockput(dp); + cleanupsyslink(ip); + return -1; + } + iunlockput(dp); + iput(ip); + + end_op(); + + return 0; +} + +// Is the directory dp empty except for "." and ".." ? +static int isdirempty(struct inode *dp) { + int off; + struct dirent de; + + for (off = 2 * sizeof(de); off < dp->size; off += sizeof(de)) { + if (readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) { + panic("isdirempty: readi"); + } + if (de.inum != 0) { + return 0; + } + } + return 1; +} + + +int sys_unlink(void) { + struct inode *ip, *dp; + struct dirent de; + char name[DIRSIZ], *path; + uint off; + + if (argstr(0, &path) < 0) { + return -1; + } + + begin_op(); + if ((dp = nameiparent(path, name)) == 0) { + end_op(); + return -1; + } + + ilock(dp); + + // Cannot unlink "." or "..". + if (namecmp(name, ".") == 0 || namecmp(name, "..") == 0) { + iunlockput(dp); + end_op(); + return -1; + } + + if ((ip = dirlookup(dp, name, &off)) == 0) { + iunlockput(dp); + end_op(); + return -1; + } + ilock(ip); + + if (ip->nlink < 1) { + panic("unlink: nlink < 1"); + } + if (ip->type == T_DIR && !isdirempty(ip)) { + iunlockput(ip); + iunlockput(dp); + end_op(); + return -1; + } + + memset(&de, 0, sizeof(de)); + if (writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) { + panic("unlink: writei"); + } + if (ip->type == T_DIR) { + dp->nlink--; + iupdate(dp); + } + iunlockput(dp); + + ip->nlink--; + iupdate(ip); + iunlockput(ip); + + end_op(); + + return 0; +} + +static struct inode* create(char *path, short type, short major, short minor) { + struct inode *ip, *dp; + char name[DIRSIZ]; + + if ((dp = nameiparent(path, name)) == 0) { + return 0; + } + ilock(dp); + + if ((ip = dirlookup(dp, name, 0)) != 0) { + iunlockput(dp); + ilock(ip); + if (type == T_FILE && ip->type == T_FILE) { + return ip; + } + iunlockput(ip); + return 0; + } + + if ((ip = ialloc(dp->dev, type)) == 0) { + panic("create: ialloc"); + } + + ilock(ip); + ip->major = major; + ip->minor = minor; + ip->nlink = 1; + iupdate(ip); + + if (type == T_DIR) { // Create . and .. entries. + dp->nlink++; // for ".." + iupdate(dp); + // No ip->nlink++ for ".": avoid cyclic ref count. + if (dirlink(ip, ".", ip->inum) < 0 || dirlink(ip, "..", dp->inum) < 0) { + panic("create dots"); + } + } + + if (dirlink(dp, name, ip->inum) < 0) { + panic("create: dirlink"); + } + + iunlockput(dp); + + return ip; +} + +int sys_open(void) { + char *path; + int fd, omode; + struct file *f; + struct inode *ip; + + if (argstr(0, &path) < 0 || argint(1, &omode) < 0) { + return -1; + } + + begin_op(); + + if (omode & O_CREATE) { + ip = create(path, T_FILE, 0, 0); + if (ip == 0) { + end_op(); + return -1; + } + } + else { + if ((ip = namei(path)) == 0) { + end_op(); + return -1; + } + ilock(ip); + if (ip->type == T_DIR && omode != O_RDONLY) { + iunlockput(ip); + end_op(); + return -1; + } + } + + if ((f = filealloc()) == 0 || (fd = fdalloc(f)) < 0) { + if (f) { + fileclose(f); + } + iunlockput(ip); + end_op(); + return -1; + } + iunlock(ip); + end_op(); + + f->type = FD_INODE; + f->ip = ip; + f->off = 0; + f->readable = !(omode & O_WRONLY); + f->writable = (omode & O_WRONLY) || (omode & O_RDWR); + return fd; +} + +int sys_mkdir(void) { + char *path; + struct inode *ip; + + begin_op(); + if (argstr(0, &path) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0) { + end_op(); + return -1; + } + iunlockput(ip); + end_op(); + return 0; +} + +int sys_mknod(void) { + struct inode *ip; + char *path; + int major, minor; + + begin_op(); + if ((argstr(0, &path)) < 0 || + argint(1, &major) < 0 || + argint(2, &minor) < 0 || + (ip = create(path, T_DEV, major, minor)) == 0) { + end_op(); + return -1; + } + iunlockput(ip); + end_op(); + return 0; +} + +int sys_chdir(void) { + char *path; + struct inode *ip; + struct proc *curproc = myproc(); + + begin_op(); + if (argstr(0, &path) < 0 || (ip = namei(path)) == 0) { + end_op(); + return -1; + } + ilock(ip); + if (ip->type != T_DIR) { + iunlockput(ip); + end_op(); + return -1; + } + iunlock(ip); + iput(curproc->cwd); + end_op(); + curproc->cwd = ip; + return 0; +} + +int sys_exec(void) { + char *path, *argv[MAXARG]; + int i; + uint uargv, uarg; + + if (argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0) { + return -1; + } + memset(argv, 0, sizeof(argv)); + for (i = 0;; i++) { + if (i >= NELEM(argv)) { + return -1; + } + if (fetchint(uargv + 4 * i, (int*)&uarg) < 0) { + return -1; + } + if (uarg == 0) { + argv[i] = 0; + break; + } + if (fetchstr(uarg, &argv[i]) < 0) { + return -1; + } + } + return exec(path, argv); +} + +int sys_pipe(void) { + int *fd; + struct file *rf, *wf; + int fd0, fd1; + + if (argptr(0, (void*)&fd, 2 * sizeof(fd[0])) < 0) { + return -1; + } + if (pipealloc(&rf, &wf) < 0) { + return -1; + } + fd0 = -1; + if ((fd0 = fdalloc(rf)) < 0 || (fd1 = fdalloc(wf)) < 0) { + if (fd0 >= 0) { + myproc()->ofile[fd0] = 0; + } + fileclose(rf); + fileclose(wf); + return -1; + } + fd[0] = fd0; + fd[1] = fd1; + return 0; +} + +int sys_getch(void) { + return consoleget(); +} \ No newline at end of file diff --git a/sysproc.c b/sysproc.c new file mode 100644 index 0000000..30af968 --- /dev/null +++ b/sysproc.c @@ -0,0 +1,79 @@ +#include "types.h" +#include "x86.h" +#include "defs.h" +#include "date.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" + +int sys_fork(void) { + return fork(); +} + +int sys_exit(void) { + exit(); + return 0; // not reached +} + +int sys_wait(void) { + return wait(); +} + +int sys_kill(void) { + int pid; + + if (argint(0, &pid) < 0) { + return -1; + } + return kill(pid); +} + +int sys_getpid(void) { + return myproc()->pid; +} + +int sys_sbrk(void) { + int addr; + int n; + + if (argint(0, &n) < 0) { + return -1; + } + addr = myproc()->sz; + if (growproc(n) < 0) { + return -1; + } + return addr; +} + +int sys_sleep(void) { + int n; + uint ticks0; + + if (argint(0, &n) < 0) { + return -1; + } + acquire(&tickslock); + ticks0 = ticks; + while (ticks - ticks0 < n) { + if (myproc()->killed) { + release(&tickslock); + return -1; + } + sleep(&ticks, &tickslock); + } + release(&tickslock); + return 0; +} + +// return how many clock tick interrupts have occurred +// since start. +int sys_uptime(void) { + uint xticks; + + acquire(&tickslock); + xticks = ticks; + release(&tickslock); + return xticks; +} diff --git a/trap.c b/trap.c new file mode 100644 index 0000000..fe81ba6 --- /dev/null +++ b/trap.c @@ -0,0 +1,111 @@ +#include "types.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "x86.h" +#include "traps.h" +#include "spinlock.h" + +// Interrupt descriptor table (shared by all CPUs). +struct gatedesc idt[256]; +extern uint vectors[]; // in vectors.S: array of 256 entry pointers +struct spinlock tickslock; +uint ticks; + +void tvinit(void) { + int i; + + for (i = 0; i < 256; i++) { + SETGATE(idt[i], 0, SEG_KCODE << 3, vectors[i], 0); + } + SETGATE(idt[T_SYSCALL], 1, SEG_KCODE << 3, vectors[T_SYSCALL], DPL_USER); + + initlock(&tickslock, "time"); +} + +void idtinit(void) { + lidt(idt, sizeof(idt)); +} + +void trap(struct trapframe *tf) { + if (tf->trapno == T_SYSCALL) { + if (myproc()->killed) { + exit(); + } + myproc()->tf = tf; + syscall(); + if (myproc()->killed) { + exit(); + } + return; + } + + switch (tf->trapno) { + case T_IRQ0 + IRQ_TIMER: + if (cpuid() == 0) { + acquire(&tickslock); + ticks++; + wakeup(&ticks); + release(&tickslock); + } + lapiceoi(); + break; + case T_IRQ0 + IRQ_IDE: + ideintr(); + lapiceoi(); + break; + case T_IRQ0 + IRQ_IDE + 1: + // Bochs generates spurious IDE1 interrupts. + break; + case T_IRQ0 + IRQ_KBD: + kbdintr(); + lapiceoi(); + break; + case T_IRQ0 + IRQ_COM1: + uartintr(); + lapiceoi(); + break; + case T_IRQ0 + 7: + case T_IRQ0 + IRQ_SPURIOUS: + cprintf("cpu%d: spurious interrupt at %x:%x\n", + cpuid(), tf->cs, tf->eip); + lapiceoi(); + break; + + + default: + if (myproc() == 0 || (tf->cs & 3) == 0) { + // In kernel, it must be our mistake. + cprintf("unexpected trap %d from cpu %d eip %x (cr2=0x%x)\n", + tf->trapno, cpuid(), tf->eip, rcr2()); + panic("trap"); + } + // In user space, assume process misbehaved. + cprintf("pid %d %s: trap %d err %d on cpu %d " + "eip 0x%x addr 0x%x--kill proc\n", + myproc()->pid, myproc()->name, tf->trapno, + tf->err, cpuid(), tf->eip, rcr2()); + myproc()->killed = 1; + } + + // Force process exit if it has been killed and is in user space. + // (If it is still executing in the kernel, let it keep running + // until it gets to the regular system call return.) + if (myproc() && myproc()->killed && (tf->cs & 3) == DPL_USER) { + exit(); + } + + // Force process to give up CPU on clock tick. + // If interrupts were on while locks held, would need to check nlock. + if (myproc() && myproc()->state == RUNNING && + tf->trapno == T_IRQ0 + IRQ_TIMER) { + yield(); + } + + // Check if the process has been killed since we yielded + if (myproc() && myproc()->killed && (tf->cs & 3) == DPL_USER) { + exit(); + } +} diff --git a/trapasm.S b/trapasm.S new file mode 100644 index 0000000..d70262e --- /dev/null +++ b/trapasm.S @@ -0,0 +1,32 @@ +#include "mmu.h" + + # vectors.S sends all traps here. +.globl alltraps +alltraps: + # Build trap frame. + pushl %ds + pushl %es + pushl %fs + pushl %gs + pushal + + # Set up data segments. + movw $(SEG_KDATA<<3), %ax + movw %ax, %ds + movw %ax, %es + + # Call trap(tf), where tf=%esp + pushl %esp + call trap + addl $4, %esp + + # Return falls through to trapret... +.globl trapret +trapret: + popal + popl %gs + popl %fs + popl %es + popl %ds + addl $0x8, %esp # trapno and errcode + iret diff --git a/traps.h b/traps.h new file mode 100644 index 0000000..0bd1fd8 --- /dev/null +++ b/traps.h @@ -0,0 +1,38 @@ +// x86 trap and interrupt constants. + +// Processor-defined: +#define T_DIVIDE 0 // divide error +#define T_DEBUG 1 // debug exception +#define T_NMI 2 // non-maskable interrupt +#define T_BRKPT 3 // breakpoint +#define T_OFLOW 4 // overflow +#define T_BOUND 5 // bounds check +#define T_ILLOP 6 // illegal opcode +#define T_DEVICE 7 // device not available +#define T_DBLFLT 8 // double fault +// #define T_COPROC 9 // reserved (not used since 486) +#define T_TSS 10 // invalid task switch segment +#define T_SEGNP 11 // segment not present +#define T_STACK 12 // stack exception +#define T_GPFLT 13 // general protection fault +#define T_PGFLT 14 // page fault +// #define T_RES 15 // reserved +#define T_FPERR 16 // floating point error +#define T_ALIGN 17 // aligment check +#define T_MCHK 18 // machine check +#define T_SIMDERR 19 // SIMD floating point error + +// These are arbitrarily chosen, but with care not to overlap +// processor defined exceptions or interrupt vectors. +#define T_SYSCALL 64 // system call +#define T_DEFAULT 500 // catchall + +#define T_IRQ0 32 // IRQ 0 corresponds to int T_IRQ + +#define IRQ_TIMER 0 +#define IRQ_KBD 1 +#define IRQ_COM1 4 +#define IRQ_IDE 14 +#define IRQ_ERROR 19 +#define IRQ_SPURIOUS 31 + diff --git a/types.h b/types.h new file mode 100644 index 0000000..e4adf64 --- /dev/null +++ b/types.h @@ -0,0 +1,4 @@ +typedef unsigned int uint; +typedef unsigned short ushort; +typedef unsigned char uchar; +typedef uint pde_t; diff --git a/uart.c b/uart.c new file mode 100644 index 0000000..fba91b1 --- /dev/null +++ b/uart.c @@ -0,0 +1,75 @@ +// Intel 8250 serial port (UART). + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "traps.h" +#include "spinlock.h" +#include "sleeplock.h" +#include "fs.h" +#include "file.h" +#include "mmu.h" +#include "proc.h" +#include "x86.h" + +#define COM1 0x3f8 + +static int uart; // is there a uart? + +void uartinit(void) { + char *p; + + // Turn off the FIFO + outb(COM1 + 2, 0); + + // 9600 baud, 8 data bits, 1 stop bit, parity off. + outb(COM1 + 3, 0x80); // Unlock divisor + outb(COM1 + 0, 115200 / 9600); + outb(COM1 + 1, 0); + outb(COM1 + 3, 0x03); // Lock divisor, 8 data bits. + outb(COM1 + 4, 0); + outb(COM1 + 1, 0x01); // Enable receive interrupts. + + // If status is 0xFF, no serial port. + if (inb(COM1 + 5) == 0xFF) { + return; + } + uart = 1; + + // Acknowledge pre-existing interrupt conditions; + // enable interrupts. + inb(COM1 + 2); + inb(COM1 + 0); + ioapicenable(IRQ_COM1, 0); + + // Announce that we're here. + for (p = "xv6...\n"; *p; p++) { + uartputc(*p); + } +} + +void uartputc(int c) { + int i; + + if (!uart) { + return; + } + for (i = 0; i < 128 && !(inb(COM1 + 5) & 0x20); i++) { + microdelay(10); + } + outb(COM1 + 0, c); +} + +static int uartgetc(void) { + if (!uart) { + return -1; + } + if (!(inb(COM1 + 5) & 0x01)) { + return -1; + } + return inb(COM1 + 0); +} + +void uartintr(void) { + consoleintr(uartgetc); +} diff --git a/ulib.c b/ulib.c new file mode 100644 index 0000000..6287a7b --- /dev/null +++ b/ulib.c @@ -0,0 +1,106 @@ +#include "types.h" +#include "stat.h" +#include "fcntl.h" +#include "user.h" +#include "x86.h" + +char*strcpy(char *s, const char *t) { + char *os; + + os = s; + while ((*s++ = *t++) != 0) { + ; + } + return os; +} + +int strcmp(const char *p, const char *q) { + while (*p && *p == *q) { + p++, q++; + } + return (uchar) * p - (uchar) * q; +} + +uint strlen(const char *s) { + int n; + + for (n = 0; s[n]; n++) { + ; + } + return n; +} + +void* memset(void *dst, int c, uint n) { + stosb(dst, c, n); + return dst; +} + +char* strchr(const char *s, char c) { + for (; *s; s++) { + if (*s == c) { + return (char*)s; + } + } + return 0; +} + +char* gets(char *buf, int max) { + int i, cc; + char c; + + for (i = 0; i + 1 < max;) { + cc = read(0, &c, 1); + if (cc < 1) { + break; + } + buf[i++] = c; + if (c == '\n' || c == '\r') { + break; + } + } + buf[i] = '\0'; + return buf; +} + +int stat(const char *n, struct stat *st) { + int fd; + int r; + + fd = open(n, O_RDONLY); + if (fd < 0) { + return -1; + } + r = fstat(fd, st); + close(fd); + return r; +} + +int atoi(const char *s) { + int n; + + n = 0; + while ('0' <= *s && *s <= '9') { + n = n * 10 + *s++ - '0'; + } + return n; +} + +void* memmove(void *vdst, const void *vsrc, int n) { + char *dst; + const char *src; + + dst = vdst; + src = vsrc; + while (n-- > 0) { + *dst++ = *src++; + } + return vdst; +} + + +__attribute__((noreturn)) +int _init(int argc, char* argv[]) { + main(argc, argv); + exit(); +} + diff --git a/umalloc.c b/umalloc.c new file mode 100644 index 0000000..5ee8a47 --- /dev/null +++ b/umalloc.c @@ -0,0 +1,95 @@ +#include "types.h" +#include "stat.h" +#include "user.h" +#include "param.h" + +// Memory allocator by Kernighan and Ritchie, +// The C programming Language, 2nd ed. Section 8.7. + +typedef long Align; + +union header { + struct { + union header *ptr; + uint size; + } s; + Align x; +}; + +typedef union header Header; + +static Header base; +static Header *freep; + +void free(void *ap) { + Header *bp, *p; + + bp = (Header*)ap - 1; + for (p = freep; !(bp > p && bp < p->s.ptr); p = p->s.ptr) { + if (p >= p->s.ptr && (bp > p || bp < p->s.ptr)) { + break; + } + } + if (bp + bp->s.size == p->s.ptr) { + bp->s.size += p->s.ptr->s.size; + bp->s.ptr = p->s.ptr->s.ptr; + } + else { + bp->s.ptr = p->s.ptr; + } + if (p + p->s.size == bp) { + p->s.size += bp->s.size; + p->s.ptr = bp->s.ptr; + } + else { + p->s.ptr = bp; + } + freep = p; +} + +static Header* morecore(uint nu) { + char *p; + Header *hp; + + if (nu < 4096) { + nu = 4096; + } + p = sbrk(nu * sizeof(Header)); + if (p == (char*)-1) { + return 0; + } + hp = (Header*)p; + hp->s.size = nu; + free((void*)(hp + 1)); + return freep; +} + +void* malloc(uint nbytes) { + Header *p, *prevp; + uint nunits; + + nunits = (nbytes + sizeof(Header) - 1) / sizeof(Header) + 1; + if ((prevp = freep) == 0) { + base.s.ptr = freep = prevp = &base; + base.s.size = 0; + } + for (p = prevp->s.ptr;; prevp = p, p = p->s.ptr) { + if (p->s.size >= nunits) { + if (p->s.size == nunits) { + prevp->s.ptr = p->s.ptr; + } + else { + p->s.size -= nunits; + p += p->s.size; + p->s.size = nunits; + } + freep = prevp; + return (void*)(p + 1); + } + if (p == freep) { + if ((p = morecore(nunits)) == 0) { + return 0; + } + } + } +} diff --git a/user.h b/user.h new file mode 100644 index 0000000..37dac1f --- /dev/null +++ b/user.h @@ -0,0 +1,43 @@ +struct stat; +struct rtcdate; + +// system calls +int fork(void); +int exit(void) __attribute__((noreturn)); +int wait(void); +int pipe(int*); +int write(int, const void*, int); +int read(int, void*, int); +int close(int); +int kill(int); +int exec(char*, char**); +int open(const char*, int); +int mknod(const char*, short, short); +int unlink(const char*); +int fstat(int fd, struct stat*); +int link(const char*, const char*); +int mkdir(const char*); +int chdir(const char*); +int dup(int); +int getpid(void); +char* sbrk(int); +int sleep(int); +int uptime(void); +int getch(void); + +// ulib.c +int stat(const char*, struct stat*); +char* strcpy(char*, const char*); +void *memmove(void*, const void*, int); +char* strchr(const char*, char c); +int strcmp(const char*, const char*); +void printf(int, const char*, ...); +char* gets(char*, int max); +uint strlen(const char*); +void* memset(void*, int, uint); +void* malloc(uint); +void free(void*); +int atoi(const char*); + +// entry-point +extern int main(int, char*[]); diff --git a/usertests.c b/usertests.c new file mode 100644 index 0000000..a8a8d92 --- /dev/null +++ b/usertests.c @@ -0,0 +1,1779 @@ +#include "param.h" +#include "types.h" +#include "stat.h" +#include "user.h" +#include "fs.h" +#include "fcntl.h" +#include "syscall.h" +#include "traps.h" +#include "memlayout.h" + +char buf[8192]; +char name[3]; +char *echoargv[] = { "echo", "ALL", "TESTS", "PASSED", 0 }; +int stdout = 1; + +// does chdir() call iput(p->cwd) in a transaction? +void iputtest(void) { + printf(stdout, "iput test\n"); + + if (mkdir("iputdir") < 0) { + printf(stdout, "mkdir failed\n"); + exit(); + } + if (chdir("iputdir") < 0) { + printf(stdout, "chdir iputdir failed\n"); + exit(); + } + if (unlink("../iputdir") < 0) { + printf(stdout, "unlink ../iputdir failed\n"); + exit(); + } + if (chdir("/") < 0) { + printf(stdout, "chdir / failed\n"); + exit(); + } + printf(stdout, "iput test ok\n"); +} + +// does exit() call iput(p->cwd) in a transaction? +void exitiputtest(void) { + int pid; + + printf(stdout, "exitiput test\n"); + + pid = fork(); + if (pid < 0) { + printf(stdout, "fork failed\n"); + exit(); + } + if (pid == 0) { + if (mkdir("iputdir") < 0) { + printf(stdout, "mkdir failed\n"); + exit(); + } + if (chdir("iputdir") < 0) { + printf(stdout, "child chdir failed\n"); + exit(); + } + if (unlink("../iputdir") < 0) { + printf(stdout, "unlink ../iputdir failed\n"); + exit(); + } + exit(); + } + wait(); + printf(stdout, "exitiput test ok\n"); +} + +// does the error path in open() for attempt to write a +// directory call iput() in a transaction? +// needs a hacked kernel that pauses just after the namei() +// call in sys_open(): +// if((ip = namei(path)) == 0) +// return -1; +// { +// int i; +// for(i = 0; i < 10000; i++) +// yield(); +// } + +void openiputtest(void) { + int pid; + + printf(stdout, "openiput test\n"); + if (mkdir("oidir") < 0) { + printf(stdout, "mkdir oidir failed\n"); + exit(); + } + pid = fork(); + if (pid < 0) { + printf(stdout, "fork failed\n"); + exit(); + } + if (pid == 0) { + int fd = open("oidir", O_RDWR); + if (fd >= 0) { + printf(stdout, "open directory for write succeeded\n"); + exit(); + } + exit(); + } + sleep(1); + if (unlink("oidir") != 0) { + printf(stdout, "unlink failed\n"); + exit(); + } + wait(); + printf(stdout, "openiput test ok\n"); +} + +// simple file system tests + +void opentest(void) { + int fd; + + printf(stdout, "open test\n"); + fd = open("echo", 0); + if (fd < 0) { + printf(stdout, "open echo failed!\n"); + exit(); + } + close(fd); + fd = open("doesnotexist", 0); + if (fd >= 0) { + printf(stdout, "open doesnotexist succeeded!\n"); + exit(); + } + printf(stdout, "open test ok\n"); +} + +void writetest(void) { + int fd; + int i; + + printf(stdout, "small file test\n"); + fd = open("small", O_CREATE | O_RDWR); + if (fd >= 0) { + printf(stdout, "creat small succeeded; ok\n"); + } + else { + printf(stdout, "error: creat small failed!\n"); + exit(); + } + for (i = 0; i < 100; i++) { + if (write(fd, "aaaaaaaaaa", 10) != 10) { + printf(stdout, "error: write aa %d new file failed\n", i); + exit(); + } + if (write(fd, "bbbbbbbbbb", 10) != 10) { + printf(stdout, "error: write bb %d new file failed\n", i); + exit(); + } + } + printf(stdout, "writes ok\n"); + close(fd); + fd = open("small", O_RDONLY); + if (fd >= 0) { + printf(stdout, "open small succeeded ok\n"); + } + else { + printf(stdout, "error: open small failed!\n"); + exit(); + } + i = read(fd, buf, 2000); + if (i == 2000) { + printf(stdout, "read succeeded ok\n"); + } + else { + printf(stdout, "read failed\n"); + exit(); + } + close(fd); + + if (unlink("small") < 0) { + printf(stdout, "unlink small failed\n"); + exit(); + } + printf(stdout, "small file test ok\n"); +} + +void writetest1(void) { + int i, fd, n; + + printf(stdout, "big files test\n"); + + fd = open("big", O_CREATE | O_RDWR); + if (fd < 0) { + printf(stdout, "error: creat big failed!\n"); + exit(); + } + + for (i = 0; i < MAXFILE; i++) { + ((int*)buf)[0] = i; + if (write(fd, buf, 512) != 512) { + printf(stdout, "error: write big file failed\n", i); + exit(); + } + } + + close(fd); + + fd = open("big", O_RDONLY); + if (fd < 0) { + printf(stdout, "error: open big failed!\n"); + exit(); + } + + n = 0; + for (;;) { + i = read(fd, buf, 512); + if (i == 0) { + if (n == MAXFILE - 1) { + printf(stdout, "read only %d blocks from big", n); + exit(); + } + break; + } + else if (i != 512) { + printf(stdout, "read failed %d\n", i); + exit(); + } + if (((int*)buf)[0] != n) { + printf(stdout, "read content of block %d is %d\n", + n, ((int*)buf)[0]); + exit(); + } + n++; + } + close(fd); + if (unlink("big") < 0) { + printf(stdout, "unlink big failed\n"); + exit(); + } + printf(stdout, "big files ok\n"); +} + +void createtest(void) { + int i, fd; + + printf(stdout, "many creates, followed by unlink test\n"); + + name[0] = 'a'; + name[2] = '\0'; + for (i = 0; i < 52; i++) { + name[1] = '0' + i; + fd = open(name, O_CREATE | O_RDWR); + close(fd); + } + name[0] = 'a'; + name[2] = '\0'; + for (i = 0; i < 52; i++) { + name[1] = '0' + i; + unlink(name); + } + printf(stdout, "many creates, followed by unlink; ok\n"); +} + +void dirtest(void){ + printf(stdout, "mkdir test\n"); + + if (mkdir("dir0") < 0) { + printf(stdout, "mkdir failed\n"); + exit(); + } + + if (chdir("dir0") < 0) { + printf(stdout, "chdir dir0 failed\n"); + exit(); + } + + if (chdir("..") < 0) { + printf(stdout, "chdir .. failed\n"); + exit(); + } + + if (unlink("dir0") < 0) { + printf(stdout, "unlink dir0 failed\n"); + exit(); + } + printf(stdout, "mkdir test ok\n"); +} + +void exectest(void) { + printf(stdout, "exec test\n"); + if (exec("echo", echoargv) < 0) { + printf(stdout, "exec echo failed\n"); + exit(); + } +} + +// simple fork and pipe read/write + +void pipe1(void) { + int fds[2], pid; + int seq, i, n, cc, total; + + if (pipe(fds) != 0) { + printf(1, "pipe() failed\n"); + exit(); + } + pid = fork(); + seq = 0; + if (pid == 0) { + close(fds[0]); + for (n = 0; n < 5; n++) { + for (i = 0; i < 1033; i++) { + buf[i] = seq++; + } + if (write(fds[1], buf, 1033) != 1033) { + printf(1, "pipe1 oops 1\n"); + exit(); + } + } + exit(); + } + else if (pid > 0) { + close(fds[1]); + total = 0; + cc = 1; + while ((n = read(fds[0], buf, cc)) > 0) { + for (i = 0; i < n; i++) { + if ((buf[i] & 0xff) != (seq++ & 0xff)) { + printf(1, "pipe1 oops 2\n"); + return; + } + } + total += n; + cc = cc * 2; + if (cc > sizeof(buf)) { + cc = sizeof(buf); + } + } + if (total != 5 * 1033) { + printf(1, "pipe1 oops 3 total %d\n", total); + exit(); + } + close(fds[0]); + wait(); + } + else { + printf(1, "fork() failed\n"); + exit(); + } + printf(1, "pipe1 ok\n"); +} + +// meant to be run w/ at most two CPUs +void preempt(void) { + int pid1, pid2, pid3; + int pfds[2]; + + printf(1, "preempt: "); + pid1 = fork(); + if (pid1 == 0) { + for (;;) { + ; + } + } + + pid2 = fork(); + if (pid2 == 0) { + for (;;) { + ; + } + } + + pipe(pfds); + pid3 = fork(); + if (pid3 == 0) { + close(pfds[0]); + if (write(pfds[1], "x", 1) != 1) { + printf(1, "preempt write error"); + } + close(pfds[1]); + for (;;) { + ; + } + } + + close(pfds[1]); + if (read(pfds[0], buf, sizeof(buf)) != 1) { + printf(1, "preempt read error"); + return; + } + close(pfds[0]); + printf(1, "kill... "); + kill(pid1); + kill(pid2); + kill(pid3); + printf(1, "wait... "); + wait(); + wait(); + wait(); + printf(1, "preempt ok\n"); +} + +// try to find any races between exit and wait +void exitwait(void) { + int i, pid; + + for (i = 0; i < 100; i++) { + pid = fork(); + if (pid < 0) { + printf(1, "fork failed\n"); + return; + } + if (pid) { + if (wait() != pid) { + printf(1, "wait wrong pid\n"); + return; + } + } + else { + exit(); + } + } + printf(1, "exitwait ok\n"); +} + +void mem(void) { + void *m1, *m2; + int pid, ppid; + + printf(1, "mem test\n"); + ppid = getpid(); + if ((pid = fork()) == 0) { + m1 = 0; + while ((m2 = malloc(10001)) != 0) { + *(char**)m2 = m1; + m1 = m2; + } + while (m1) { + m2 = *(char**)m1; + free(m1); + m1 = m2; + } + m1 = malloc(1024 * 20); + if (m1 == 0) { + printf(1, "couldn't allocate mem?!!\n"); + kill(ppid); + exit(); + } + free(m1); + printf(1, "mem ok\n"); + exit(); + } + else { + wait(); + } +} + +// More file system tests + +// two processes write to the same file descriptor +// is the offset shared? does inode locking work? +void sharedfd(void) { + int fd, pid, i, n, nc, np; + char buf[10]; + + printf(1, "sharedfd test\n"); + + unlink("sharedfd"); + fd = open("sharedfd", O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "fstests: cannot open sharedfd for writing"); + return; + } + pid = fork(); + memset(buf, pid == 0 ? 'c' : 'p', sizeof(buf)); + for (i = 0; i < 1000; i++) { + if (write(fd, buf, sizeof(buf)) != sizeof(buf)) { + printf(1, "fstests: write sharedfd failed\n"); + break; + } + } + if (pid == 0) { + exit(); + } + else { + wait(); + } + close(fd); + fd = open("sharedfd", 0); + if (fd < 0) { + printf(1, "fstests: cannot open sharedfd for reading\n"); + return; + } + nc = np = 0; + while ((n = read(fd, buf, sizeof(buf))) > 0) { + for (i = 0; i < sizeof(buf); i++) { + if (buf[i] == 'c') { + nc++; + } + if (buf[i] == 'p') { + np++; + } + } + } + close(fd); + unlink("sharedfd"); + if (nc == 10000 && np == 10000) { + printf(1, "sharedfd ok\n"); + } + else { + printf(1, "sharedfd oops %d %d\n", nc, np); + exit(); + } +} + +// four processes write different files at the same +// time, to test block allocation. +void fourfiles(void) { + int fd, pid, i, j, n, total, pi; + char *names[] = { "f0", "f1", "f2", "f3" }; + char *fname; + + printf(1, "fourfiles test\n"); + + for (pi = 0; pi < 4; pi++) { + fname = names[pi]; + unlink(fname); + + pid = fork(); + if (pid < 0) { + printf(1, "fork failed\n"); + exit(); + } + + if (pid == 0) { + fd = open(fname, O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "create failed\n"); + exit(); + } + + memset(buf, '0' + pi, 512); + for (i = 0; i < 12; i++) { + if ((n = write(fd, buf, 500)) != 500) { + printf(1, "write failed %d\n", n); + exit(); + } + } + exit(); + } + } + + for (pi = 0; pi < 4; pi++) { + wait(); + } + + for (i = 0; i < 2; i++) { + fname = names[i]; + fd = open(fname, 0); + total = 0; + while ((n = read(fd, buf, sizeof(buf))) > 0) { + for (j = 0; j < n; j++) { + if (buf[j] != '0' + i) { + printf(1, "wrong char\n"); + exit(); + } + } + total += n; + } + close(fd); + if (total != 12 * 500) { + printf(1, "wrong length %d\n", total); + exit(); + } + unlink(fname); + } + + printf(1, "fourfiles ok\n"); +} + +// four processes create and delete different files in same directory +void createdelete(void) { + enum { N = 20 }; + int pid, i, fd, pi; + char name[32]; + + printf(1, "createdelete test\n"); + + for (pi = 0; pi < 4; pi++) { + pid = fork(); + if (pid < 0) { + printf(1, "fork failed\n"); + exit(); + } + + if (pid == 0) { + name[0] = 'p' + pi; + name[2] = '\0'; + for (i = 0; i < N; i++) { + name[1] = '0' + i; + fd = open(name, O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "create failed\n"); + exit(); + } + close(fd); + if (i > 0 && (i % 2) == 0) { + name[1] = '0' + (i / 2); + if (unlink(name) < 0) { + printf(1, "unlink failed\n"); + exit(); + } + } + } + exit(); + } + } + + for (pi = 0; pi < 4; pi++) { + wait(); + } + + name[0] = name[1] = name[2] = 0; + for (i = 0; i < N; i++) { + for (pi = 0; pi < 4; pi++) { + name[0] = 'p' + pi; + name[1] = '0' + i; + fd = open(name, 0); + if ((i == 0 || i >= N / 2) && fd < 0) { + printf(1, "oops createdelete %s didn't exist\n", name); + exit(); + } + else if ((i >= 1 && i < N / 2) && fd >= 0) { + printf(1, "oops createdelete %s did exist\n", name); + exit(); + } + if (fd >= 0) { + close(fd); + } + } + } + + for (i = 0; i < N; i++) { + for (pi = 0; pi < 4; pi++) { + name[0] = 'p' + i; + name[1] = '0' + i; + unlink(name); + } + } + + printf(1, "createdelete ok\n"); +} + +// can I unlink a file and still read it? +void unlinkread(void) { + int fd, fd1; + + printf(1, "unlinkread test\n"); + fd = open("unlinkread", O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "create unlinkread failed\n"); + exit(); + } + write(fd, "hello", 5); + close(fd); + + fd = open("unlinkread", O_RDWR); + if (fd < 0) { + printf(1, "open unlinkread failed\n"); + exit(); + } + if (unlink("unlinkread") != 0) { + printf(1, "unlink unlinkread failed\n"); + exit(); + } + + fd1 = open("unlinkread", O_CREATE | O_RDWR); + write(fd1, "yyy", 3); + close(fd1); + + if (read(fd, buf, sizeof(buf)) != 5) { + printf(1, "unlinkread read failed"); + exit(); + } + if (buf[0] != 'h') { + printf(1, "unlinkread wrong data\n"); + exit(); + } + if (write(fd, buf, 10) != 10) { + printf(1, "unlinkread write failed\n"); + exit(); + } + close(fd); + unlink("unlinkread"); + printf(1, "unlinkread ok\n"); +} + +void linktest(void) { + int fd; + + printf(1, "linktest\n"); + + unlink("lf1"); + unlink("lf2"); + + fd = open("lf1", O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "create lf1 failed\n"); + exit(); + } + if (write(fd, "hello", 5) != 5) { + printf(1, "write lf1 failed\n"); + exit(); + } + close(fd); + + if (link("lf1", "lf2") < 0) { + printf(1, "link lf1 lf2 failed\n"); + exit(); + } + unlink("lf1"); + + if (open("lf1", 0) >= 0) { + printf(1, "unlinked lf1 but it is still there!\n"); + exit(); + } + + fd = open("lf2", 0); + if (fd < 0) { + printf(1, "open lf2 failed\n"); + exit(); + } + if (read(fd, buf, sizeof(buf)) != 5) { + printf(1, "read lf2 failed\n"); + exit(); + } + close(fd); + + if (link("lf2", "lf2") >= 0) { + printf(1, "link lf2 lf2 succeeded! oops\n"); + exit(); + } + + unlink("lf2"); + if (link("lf2", "lf1") >= 0) { + printf(1, "link non-existant succeeded! oops\n"); + exit(); + } + + if (link(".", "lf1") >= 0) { + printf(1, "link . lf1 succeeded! oops\n"); + exit(); + } + + printf(1, "linktest ok\n"); +} + +// test concurrent create/link/unlink of the same file +void concreate(void) { + char file[3]; + int i, pid, n, fd; + char fa[40]; + struct { + ushort inum; + char name[14]; + } de; + + printf(1, "concreate test\n"); + file[0] = 'C'; + file[2] = '\0'; + for (i = 0; i < 40; i++) { + file[1] = '0' + i; + unlink(file); + pid = fork(); + if (pid && (i % 3) == 1) { + link("C0", file); + } + else if (pid == 0 && (i % 5) == 1) { + link("C0", file); + } + else { + fd = open(file, O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "concreate create %s failed\n", file); + exit(); + } + close(fd); + } + if (pid == 0) { + exit(); + } + else { + wait(); + } + } + + memset(fa, 0, sizeof(fa)); + fd = open(".", 0); + n = 0; + while (read(fd, &de, sizeof(de)) > 0) { + if (de.inum == 0) { + continue; + } + if (de.name[0] == 'C' && de.name[2] == '\0') { + i = de.name[1] - '0'; + if (i < 0 || i >= sizeof(fa)) { + printf(1, "concreate weird file %s\n", de.name); + exit(); + } + if (fa[i]) { + printf(1, "concreate duplicate file %s\n", de.name); + exit(); + } + fa[i] = 1; + n++; + } + } + close(fd); + + if (n != 40) { + printf(1, "concreate not enough files in directory listing\n"); + exit(); + } + + for (i = 0; i < 40; i++) { + file[1] = '0' + i; + pid = fork(); + if (pid < 0) { + printf(1, "fork failed\n"); + exit(); + } + if (((i % 3) == 0 && pid == 0) || + ((i % 3) == 1 && pid != 0)) { + close(open(file, 0)); + close(open(file, 0)); + close(open(file, 0)); + close(open(file, 0)); + } + else { + unlink(file); + unlink(file); + unlink(file); + unlink(file); + } + if (pid == 0) { + exit(); + } + else { + wait(); + } + } + + printf(1, "concreate ok\n"); +} + +// another concurrent link/unlink/create test, +// to look for deadlocks. +void linkunlink() { + int pid, i; + + printf(1, "linkunlink test\n"); + + unlink("x"); + pid = fork(); + if (pid < 0) { + printf(1, "fork failed\n"); + exit(); + } + + unsigned int x = (pid ? 1 : 97); + for (i = 0; i < 100; i++) { + x = x * 1103515245 + 12345; + if ((x % 3) == 0) { + close(open("x", O_RDWR | O_CREATE)); + } + else if ((x % 3) == 1) { + link("cat", "x"); + } + else { + unlink("x"); + } + } + + if (pid) { + wait(); + } + else { + exit(); + } + + printf(1, "linkunlink ok\n"); +} + +// directory that uses indirect blocks +void bigdir(void) { + int i, fd; + char name[10]; + + printf(1, "bigdir test\n"); + unlink("bd"); + + fd = open("bd", O_CREATE); + if (fd < 0) { + printf(1, "bigdir create failed\n"); + exit(); + } + close(fd); + + for (i = 0; i < 500; i++) { + name[0] = 'x'; + name[1] = '0' + (i / 64); + name[2] = '0' + (i % 64); + name[3] = '\0'; + if (link("bd", name) != 0) { + printf(1, "bigdir link failed\n"); + exit(); + } + } + + unlink("bd"); + for (i = 0; i < 500; i++) { + name[0] = 'x'; + name[1] = '0' + (i / 64); + name[2] = '0' + (i % 64); + name[3] = '\0'; + if (unlink(name) != 0) { + printf(1, "bigdir unlink failed"); + exit(); + } + } + + printf(1, "bigdir ok\n"); +} + +void subdir(void) { + int fd, cc; + + printf(1, "subdir test\n"); + + unlink("ff"); + if (mkdir("dd") != 0) { + printf(1, "subdir mkdir dd failed\n"); + exit(); + } + + fd = open("dd/ff", O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "create dd/ff failed\n"); + exit(); + } + write(fd, "ff", 2); + close(fd); + + if (unlink("dd") >= 0) { + printf(1, "unlink dd (non-empty dir) succeeded!\n"); + exit(); + } + + if (mkdir("/dd/dd") != 0) { + printf(1, "subdir mkdir dd/dd failed\n"); + exit(); + } + + fd = open("dd/dd/ff", O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "create dd/dd/ff failed\n"); + exit(); + } + write(fd, "FF", 2); + close(fd); + + fd = open("dd/dd/../ff", 0); + if (fd < 0) { + printf(1, "open dd/dd/../ff failed\n"); + exit(); + } + cc = read(fd, buf, sizeof(buf)); + if (cc != 2 || buf[0] != 'f') { + printf(1, "dd/dd/../ff wrong content\n"); + exit(); + } + close(fd); + + if (link("dd/dd/ff", "dd/dd/ffff") != 0) { + printf(1, "link dd/dd/ff dd/dd/ffff failed\n"); + exit(); + } + + if (unlink("dd/dd/ff") != 0) { + printf(1, "unlink dd/dd/ff failed\n"); + exit(); + } + if (open("dd/dd/ff", O_RDONLY) >= 0) { + printf(1, "open (unlinked) dd/dd/ff succeeded\n"); + exit(); + } + + if (chdir("dd") != 0) { + printf(1, "chdir dd failed\n"); + exit(); + } + if (chdir("dd/../../dd") != 0) { + printf(1, "chdir dd/../../dd failed\n"); + exit(); + } + if (chdir("dd/../../../dd") != 0) { + printf(1, "chdir dd/../../dd failed\n"); + exit(); + } + if (chdir("./..") != 0) { + printf(1, "chdir ./.. failed\n"); + exit(); + } + + fd = open("dd/dd/ffff", 0); + if (fd < 0) { + printf(1, "open dd/dd/ffff failed\n"); + exit(); + } + if (read(fd, buf, sizeof(buf)) != 2) { + printf(1, "read dd/dd/ffff wrong len\n"); + exit(); + } + close(fd); + + if (open("dd/dd/ff", O_RDONLY) >= 0) { + printf(1, "open (unlinked) dd/dd/ff succeeded!\n"); + exit(); + } + + if (open("dd/ff/ff", O_CREATE | O_RDWR) >= 0) { + printf(1, "create dd/ff/ff succeeded!\n"); + exit(); + } + if (open("dd/xx/ff", O_CREATE | O_RDWR) >= 0) { + printf(1, "create dd/xx/ff succeeded!\n"); + exit(); + } + if (open("dd", O_CREATE) >= 0) { + printf(1, "create dd succeeded!\n"); + exit(); + } + if (open("dd", O_RDWR) >= 0) { + printf(1, "open dd rdwr succeeded!\n"); + exit(); + } + if (open("dd", O_WRONLY) >= 0) { + printf(1, "open dd wronly succeeded!\n"); + exit(); + } + if (link("dd/ff/ff", "dd/dd/xx") == 0) { + printf(1, "link dd/ff/ff dd/dd/xx succeeded!\n"); + exit(); + } + if (link("dd/xx/ff", "dd/dd/xx") == 0) { + printf(1, "link dd/xx/ff dd/dd/xx succeeded!\n"); + exit(); + } + if (link("dd/ff", "dd/dd/ffff") == 0) { + printf(1, "link dd/ff dd/dd/ffff succeeded!\n"); + exit(); + } + if (mkdir("dd/ff/ff") == 0) { + printf(1, "mkdir dd/ff/ff succeeded!\n"); + exit(); + } + if (mkdir("dd/xx/ff") == 0) { + printf(1, "mkdir dd/xx/ff succeeded!\n"); + exit(); + } + if (mkdir("dd/dd/ffff") == 0) { + printf(1, "mkdir dd/dd/ffff succeeded!\n"); + exit(); + } + if (unlink("dd/xx/ff") == 0) { + printf(1, "unlink dd/xx/ff succeeded!\n"); + exit(); + } + if (unlink("dd/ff/ff") == 0) { + printf(1, "unlink dd/ff/ff succeeded!\n"); + exit(); + } + if (chdir("dd/ff") == 0) { + printf(1, "chdir dd/ff succeeded!\n"); + exit(); + } + if (chdir("dd/xx") == 0) { + printf(1, "chdir dd/xx succeeded!\n"); + exit(); + } + + if (unlink("dd/dd/ffff") != 0) { + printf(1, "unlink dd/dd/ff failed\n"); + exit(); + } + if (unlink("dd/ff") != 0) { + printf(1, "unlink dd/ff failed\n"); + exit(); + } + if (unlink("dd") == 0) { + printf(1, "unlink non-empty dd succeeded!\n"); + exit(); + } + if (unlink("dd/dd") < 0) { + printf(1, "unlink dd/dd failed\n"); + exit(); + } + if (unlink("dd") < 0) { + printf(1, "unlink dd failed\n"); + exit(); + } + + printf(1, "subdir ok\n"); +} + +// test writes that are larger than the log. +void bigwrite(void) { + int fd, sz; + + printf(1, "bigwrite test\n"); + + unlink("bigwrite"); + for (sz = 499; sz < 12 * 512; sz += 471) { + fd = open("bigwrite", O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "cannot create bigwrite\n"); + exit(); + } + int i; + for (i = 0; i < 2; i++) { + int cc = write(fd, buf, sz); + if (cc != sz) { + printf(1, "write(%d) ret %d\n", sz, cc); + exit(); + } + } + close(fd); + unlink("bigwrite"); + } + + printf(1, "bigwrite ok\n"); +} + +void bigfile(void) { + int fd, i, total, cc; + + printf(1, "bigfile test\n"); + + unlink("bigfile"); + fd = open("bigfile", O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "cannot create bigfile"); + exit(); + } + for (i = 0; i < 20; i++) { + memset(buf, i, 600); + if (write(fd, buf, 600) != 600) { + printf(1, "write bigfile failed\n"); + exit(); + } + } + close(fd); + + fd = open("bigfile", 0); + if (fd < 0) { + printf(1, "cannot open bigfile\n"); + exit(); + } + total = 0; + for (i = 0;; i++) { + cc = read(fd, buf, 300); + if (cc < 0) { + printf(1, "read bigfile failed\n"); + exit(); + } + if (cc == 0) { + break; + } + if (cc != 300) { + printf(1, "short read bigfile\n"); + exit(); + } + if (buf[0] != i / 2 || buf[299] != i / 2) { + printf(1, "read bigfile wrong data\n"); + exit(); + } + total += cc; + } + close(fd); + if (total != 20 * 600) { + printf(1, "read bigfile wrong total\n"); + exit(); + } + unlink("bigfile"); + + printf(1, "bigfile test ok\n"); +} + +void fourteen(void) { + int fd; + + // DIRSIZ is 14. + printf(1, "fourteen test\n"); + + if (mkdir("12345678901234") != 0) { + printf(1, "mkdir 12345678901234 failed\n"); + exit(); + } + if (mkdir("12345678901234/123456789012345") != 0) { + printf(1, "mkdir 12345678901234/123456789012345 failed\n"); + exit(); + } + fd = open("123456789012345/123456789012345/123456789012345", O_CREATE); + if (fd < 0) { + printf(1, "create 123456789012345/123456789012345/123456789012345 failed\n"); + exit(); + } + close(fd); + fd = open("12345678901234/12345678901234/12345678901234", 0); + if (fd < 0) { + printf(1, "open 12345678901234/12345678901234/12345678901234 failed\n"); + exit(); + } + close(fd); + + if (mkdir("12345678901234/12345678901234") == 0) { + printf(1, "mkdir 12345678901234/12345678901234 succeeded!\n"); + exit(); + } + if (mkdir("123456789012345/12345678901234") == 0) { + printf(1, "mkdir 12345678901234/123456789012345 succeeded!\n"); + exit(); + } + + printf(1, "fourteen ok\n"); +} + +void rmdot(void) { + printf(1, "rmdot test\n"); + if (mkdir("dots") != 0) { + printf(1, "mkdir dots failed\n"); + exit(); + } + if (chdir("dots") != 0) { + printf(1, "chdir dots failed\n"); + exit(); + } + if (unlink(".") == 0) { + printf(1, "rm . worked!\n"); + exit(); + } + if (unlink("..") == 0) { + printf(1, "rm .. worked!\n"); + exit(); + } + if (chdir("/") != 0) { + printf(1, "chdir / failed\n"); + exit(); + } + if (unlink("dots/.") == 0) { + printf(1, "unlink dots/. worked!\n"); + exit(); + } + if (unlink("dots/..") == 0) { + printf(1, "unlink dots/.. worked!\n"); + exit(); + } + if (unlink("dots") != 0) { + printf(1, "unlink dots failed!\n"); + exit(); + } + printf(1, "rmdot ok\n"); +} + +void dirfile(void) { + int fd; + + printf(1, "dir vs file\n"); + + fd = open("dirfile", O_CREATE); + if (fd < 0) { + printf(1, "create dirfile failed\n"); + exit(); + } + close(fd); + if (chdir("dirfile") == 0) { + printf(1, "chdir dirfile succeeded!\n"); + exit(); + } + fd = open("dirfile/xx", 0); + if (fd >= 0) { + printf(1, "create dirfile/xx succeeded!\n"); + exit(); + } + fd = open("dirfile/xx", O_CREATE); + if (fd >= 0) { + printf(1, "create dirfile/xx succeeded!\n"); + exit(); + } + if (mkdir("dirfile/xx") == 0) { + printf(1, "mkdir dirfile/xx succeeded!\n"); + exit(); + } + if (unlink("dirfile/xx") == 0) { + printf(1, "unlink dirfile/xx succeeded!\n"); + exit(); + } + if (link("README", "dirfile/xx") == 0) { + printf(1, "link to dirfile/xx succeeded!\n"); + exit(); + } + if (unlink("dirfile") != 0) { + printf(1, "unlink dirfile failed!\n"); + exit(); + } + + fd = open(".", O_RDWR); + if (fd >= 0) { + printf(1, "open . for writing succeeded!\n"); + exit(); + } + fd = open(".", 0); + if (write(fd, "x", 1) > 0) { + printf(1, "write . succeeded!\n"); + exit(); + } + close(fd); + + printf(1, "dir vs file OK\n"); +} + +// test that iput() is called at the end of _namei() +void iref(void) { + int i, fd; + + printf(1, "empty file name\n"); + + // the 50 is NINODE + for (i = 0; i < 50 + 1; i++) { + if (mkdir("irefd") != 0) { + printf(1, "mkdir irefd failed\n"); + exit(); + } + if (chdir("irefd") != 0) { + printf(1, "chdir irefd failed\n"); + exit(); + } + + mkdir(""); + link("README", ""); + fd = open("", O_CREATE); + if (fd >= 0) { + close(fd); + } + fd = open("xx", O_CREATE); + if (fd >= 0) { + close(fd); + } + unlink("xx"); + } + + chdir("/"); + printf(1, "empty file name OK\n"); +} + +// test that fork fails gracefully +// the forktest binary also does this, but it runs out of proc entries first. +// inside the bigger usertests binary, we run out of memory first. +void forktest(void) { + int n, pid; + + printf(1, "fork test\n"); + + for (n = 0; n < 1000; n++) { + pid = fork(); + if (pid < 0) { + break; + } + if (pid == 0) { + exit(); + } + } + + if (n == 1000) { + printf(1, "fork claimed to work 1000 times!\n"); + exit(); + } + + for (; n > 0; n--) { + if (wait() < 0) { + printf(1, "wait stopped early\n"); + exit(); + } + } + + if (wait() != -1) { + printf(1, "wait got too many\n"); + exit(); + } + + printf(1, "fork test OK\n"); +} + +void sbrktest(void) { + int fds[2], pid, pids[10], ppid; + char *a, *b, *c, *lastaddr, *oldbrk, *p, scratch; + uint amt; + + printf(stdout, "sbrk test\n"); + oldbrk = sbrk(0); + + // can one sbrk() less than a page? + a = sbrk(0); + int i; + for (i = 0; i < 5000; i++) { + b = sbrk(1); + if (b != a) { + printf(stdout, "sbrk test failed %d %x %x\n", i, a, b); + exit(); + } + *b = 1; + a = b + 1; + } + pid = fork(); + if (pid < 0) { + printf(stdout, "sbrk test fork failed\n"); + exit(); + } + c = sbrk(1); + c = sbrk(1); + if (c != a + 1) { + printf(stdout, "sbrk test failed post-fork\n"); + exit(); + } + if (pid == 0) { + exit(); + } + wait(); + + // can one grow address space to something big? +#define BIG (100 * 1024 * 1024) + a = sbrk(0); + amt = (BIG) -(uint)a; + p = sbrk(amt); + if (p != a) { + printf(stdout, "sbrk test failed to grow big address space; enough phys mem?\n"); + exit(); + } + lastaddr = (char*) (BIG - 1); +#pragma GCC diagnostic ignored "-Wstringop-overflow" + *lastaddr = 99; + + // can one de-allocate? + a = sbrk(0); + c = sbrk(-4096); + if (c == (char*)0xffffffff) { + printf(stdout, "sbrk could not deallocate\n"); + exit(); + } + c = sbrk(0); + if (c != a - 4096) { + printf(stdout, "sbrk deallocation produced wrong address, a %x c %x\n", a, c); + exit(); + } + + // can one re-allocate that page? + a = sbrk(0); + c = sbrk(4096); + if (c != a || sbrk(0) != a + 4096) { + printf(stdout, "sbrk re-allocation failed, a %x c %x\n", a, c); + exit(); + } + if (*lastaddr == 99) { + // should be zero + printf(stdout, "sbrk de-allocation didn't really deallocate\n"); + exit(); + } + + a = sbrk(0); + c = sbrk(-(sbrk(0) - oldbrk)); + if (c != a) { + printf(stdout, "sbrk downsize failed, a %x c %x\n", a, c); + exit(); + } + + // can we read the kernel's memory? + for (a = (char*)(KERNBASE); a < (char*) (KERNBASE + 2000000); a += 50000) { + ppid = getpid(); + pid = fork(); + if (pid < 0) { + printf(stdout, "fork failed\n"); + exit(); + } + if (pid == 0) { + printf(stdout, "oops could read %x = %x\n", a, *a); + kill(ppid); + exit(); + } + wait(); + } + + // if we run the system out of memory, does it clean up the last + // failed allocation? + if (pipe(fds) != 0) { + printf(1, "pipe() failed\n"); + exit(); + } + for (i = 0; i < sizeof(pids) / sizeof(pids[0]); i++) { + if ((pids[i] = fork()) == 0) { + // allocate a lot of memory + sbrk(BIG - (uint)sbrk(0)); + write(fds[1], "x", 1); + // sit around until killed + for (;;) { sleep(1000); + } + } + if (pids[i] != -1) { + read(fds[0], &scratch, 1); + } + } + // if those failed allocations freed up the pages they did allocate, + // we'll be able to allocate here + c = sbrk(4096); + for (i = 0; i < sizeof(pids) / sizeof(pids[0]); i++) { + if (pids[i] == -1) { + continue; + } + kill(pids[i]); + wait(); + } + if (c == (char*)0xffffffff) { + printf(stdout, "failed sbrk leaked memory\n"); + exit(); + } + + if (sbrk(0) > oldbrk) { + sbrk(-(sbrk(0) - oldbrk)); + } + + printf(stdout, "sbrk test OK\n"); +} + +void validateint(int *p) { + int res; + asm ("mov %%esp, %%ebx\n\t" + "mov %3, %%esp\n\t" + "int %2\n\t" + "mov %%ebx, %%esp" : + "=a" (res) : + "a" (SYS_sleep), "n" (T_SYSCALL), "c" (p) : + "ebx"); +} + +void validatetest(void) { + int hi, pid; + uint p; + + printf(stdout, "validate test\n"); + hi = 1100 * 1024; + + for (p = 0; p <= (uint)hi; p += 4096) { + if ((pid = fork()) == 0) { + // try to crash the kernel by passing in a badly placed integer + validateint((int*)p); + exit(); + } + sleep(0); + sleep(0); + kill(pid); + wait(); + + // try to crash the kernel by passing in a bad string pointer + if (link("nosuchfile", (char*)p) != -1) { + printf(stdout, "link should not succeed\n"); + exit(); + } + } + + printf(stdout, "validate ok\n"); +} + +// does unintialized data start out zero? +char uninit[10000]; +void bsstest(void) { + int i; + + printf(stdout, "bss test\n"); + for (i = 0; i < sizeof(uninit); i++) { + if (uninit[i] != '\0') { + printf(stdout, "bss test failed\n"); + exit(); + } + } + printf(stdout, "bss test ok\n"); +} + +// does exec return an error if the arguments +// are larger than a page? or does it write +// below the stack and wreck the instructions/data? +void bigargtest(void) { + int pid, fd; + + unlink("bigarg-ok"); + pid = fork(); + if (pid == 0) { + static char *args[MAXARG]; + int i; + for (i = 0; i < MAXARG - 1; i++) { + args[i] = "bigargs test: failed\n "; + } + args[MAXARG - 1] = 0; + printf(stdout, "bigarg test\n"); + exec("echo", args); + printf(stdout, "bigarg test ok\n"); + fd = open("bigarg-ok", O_CREATE); + close(fd); + exit(); + } + else if (pid < 0) { + printf(stdout, "bigargtest: fork failed\n"); + exit(); + } + wait(); + fd = open("bigarg-ok", 0); + if (fd < 0) { + printf(stdout, "bigarg test failed!\n"); + exit(); + } + close(fd); + unlink("bigarg-ok"); +} + +// what happens when the file system runs out of blocks? +// answer: balloc panics, so this test is not useful. +void fsfull() { + int nfiles; + int fsblocks = 0; + + printf(1, "fsfull test\n"); + + for (nfiles = 0;; nfiles++) { + char name[64]; + name[0] = 'f'; + name[1] = '0' + nfiles / 1000; + name[2] = '0' + (nfiles % 1000) / 100; + name[3] = '0' + (nfiles % 100) / 10; + name[4] = '0' + (nfiles % 10); + name[5] = '\0'; + printf(1, "writing %s\n", name); + int fd = open(name, O_CREATE | O_RDWR); + if (fd < 0) { + printf(1, "open %s failed\n", name); + break; + } + int total = 0; + while (1) { + int cc = write(fd, buf, 512); + if (cc < 512) { + break; + } + total += cc; + fsblocks++; + } + printf(1, "wrote %d bytes\n", total); + close(fd); + if (total == 0) { + break; + } + } + + while (nfiles >= 0) { + char name[64]; + name[0] = 'f'; + name[1] = '0' + nfiles / 1000; + name[2] = '0' + (nfiles % 1000) / 100; + name[3] = '0' + (nfiles % 100) / 10; + name[4] = '0' + (nfiles % 10); + name[5] = '\0'; + unlink(name); + nfiles--; + } + + printf(1, "fsfull test finished\n"); +} + +void uio() { + #define RTC_ADDR 0x70 + #define RTC_DATA 0x71 + + ushort port = 0; + uchar val = 0; + int pid; + + printf(1, "uio test\n"); + pid = fork(); + if (pid == 0) { + port = RTC_ADDR; + val = 0x09; /* year */ + /* http://wiki.osdev.org/Inline_Assembly/Examples */ + asm volatile ("outb %0,%1" : : "a" (val), "d" (port)); + port = RTC_DATA; + asm volatile ("inb %1,%0" : "=a" (val) : "d" (port)); + printf(1, "uio: uio succeeded; test FAILED\n"); + exit(); + } + else if (pid < 0) { + printf(1, "fork failed\n"); + exit(); + } + wait(); + printf(1, "uio test done\n"); +} + +void argptest(){ + int fd; + fd = open("init", O_RDONLY); + if (fd < 0) { + printf(2, "open failed\n"); + exit(); + } + read(fd, sbrk(0) - 1, -1); + close(fd); + printf(1, "arg test passed\n"); +} + +unsigned long randstate = 1; +unsigned int rand() { + randstate = randstate * 1664525 + 1013904223; + return randstate; +} + +int main(int argc, char *argv[]) { + printf(1, "usertests starting\n"); + + if (open("usertests.ran", 0) >= 0) { + printf(1, "already ran user tests -- rebuild fs.img\n"); + exit(); + } + close(open("usertests.ran", O_CREATE)); + + argptest(); + createdelete(); + linkunlink(); + concreate(); + fourfiles(); + sharedfd(); + + bigargtest(); + bigwrite(); + bigargtest(); + bsstest(); + sbrktest(); + validatetest(); + + opentest(); + writetest(); + writetest1(); + createtest(); + + openiputtest(); + exitiputtest(); + iputtest(); + + mem(); + pipe1(); + preempt(); + exitwait(); + + rmdot(); + fourteen(); + bigfile(); + subdir(); + linktest(); + unlinkread(); + dirfile(); + iref(); + forktest(); + bigdir(); // slow + + uio(); + + exectest(); + + exit(); +} diff --git a/vectors.pl b/vectors.pl new file mode 100755 index 0000000..57b49dd --- /dev/null +++ b/vectors.pl @@ -0,0 +1,47 @@ +#!/usr/bin/perl -w + +# Generate vectors.S, the trap/interrupt entry points. +# There has to be one entry point per interrupt number +# since otherwise there's no way for trap() to discover +# the interrupt number. + +print "# generated by vectors.pl - do not edit\n"; +print "# handlers\n"; +print ".globl alltraps\n"; +for(my $i = 0; $i < 256; $i++){ + print ".globl vector$i\n"; + print "vector$i:\n"; + if(!($i == 8 || ($i >= 10 && $i <= 14) || $i == 17)){ + print " pushl \$0\n"; + } + print " pushl \$$i\n"; + print " jmp alltraps\n"; +} + +print "\n# vector table\n"; +print ".data\n"; +print ".globl vectors\n"; +print "vectors:\n"; +for(my $i = 0; $i < 256; $i++){ + print " .long vector$i\n"; +} + +# sample output: +# # handlers +# .globl alltraps +# .globl vector0 +# vector0: +# pushl $0 +# pushl $0 +# jmp alltraps +# ... +# +# # vector table +# .data +# .globl vectors +# vectors: +# .long vector0 +# .long vector1 +# .long vector2 +# ... + diff --git a/vm.c b/vm.c new file mode 100644 index 0000000..5ac4945 --- /dev/null +++ b/vm.c @@ -0,0 +1,392 @@ +#include "param.h" +#include "types.h" +#include "defs.h" +#include "x86.h" +#include "memlayout.h" +#include "mmu.h" +#include "proc.h" +#include "elf.h" + +extern char data[]; // defined by kernel.ld +pde_t *kpgdir; // for use in scheduler() + +// Set up CPU's kernel segment descriptors. +// Run once on entry on each CPU. +void seginit(void) { + struct cpu *c; + + // Map "logical" addresses to virtual addresses using identity map. + // Cannot share a CODE descriptor for both kernel and user + // because it would have to have DPL_USR, but the CPU forbids + // an interrupt from CPL=0 to DPL=3. + c = &cpus[cpuid()]; + c->gdt[SEG_KCODE] = SEG(STA_X | STA_R, 0, 0xffffffff, 0); + c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); + c->gdt[SEG_UCODE] = SEG(STA_X | STA_R, 0, 0xffffffff, DPL_USER); + c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER); + lgdt(c->gdt, sizeof(c->gdt)); +} + +// Return the address of the PTE in page table pgdir +// that corresponds to virtual address va. If alloc!=0, +// create any required page table pages. +static pte_t * walkpgdir(pde_t *pgdir, const void *va, int alloc) { + pde_t *pde; + pte_t *pgtab; + + pde = &pgdir[PDX(va)]; + if (*pde & PTE_P) { + pgtab = (pte_t*)P2V(PTE_ADDR(*pde)); + } + else { + if (!alloc || (pgtab = (pte_t*)kalloc()) == 0) { + return 0; + } + // Make sure all those PTE_P bits are zero. + memset(pgtab, 0, PGSIZE); + // The permissions here are overly generous, but they can + // be further restricted by the permissions in the page table + // entries, if necessary. + *pde = V2P(pgtab) | PTE_P | PTE_W | PTE_U; + } + return &pgtab[PTX(va)]; +} + +// Create PTEs for virtual addresses starting at va that refer to +// physical addresses starting at pa. va and size might not +// be page-aligned. +static int mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm) { + char *a, *last; + pte_t *pte; + + a = (char*)PGROUNDDOWN((uint)va); + last = (char*)PGROUNDDOWN(((uint)va) + size - 1); + for (;;) { + if ((pte = walkpgdir(pgdir, a, 1)) == 0) { + return -1; + } + if (*pte & PTE_P) { + panic("remap"); + } + *pte = pa | perm | PTE_P; + if (a == last) { + break; + } + a += PGSIZE; + pa += PGSIZE; + } + return 0; +} + +// There is one page table per process, plus one that's used when +// a CPU is not running any process (kpgdir). The kernel uses the +// current process's page table during system calls and interrupts; +// page protection bits prevent user code from using the kernel's +// mappings. +// +// setupkvm() and exec() set up every page table like this: +// +// 0..KERNBASE: user memory (text+data+stack+heap), mapped to +// phys memory allocated by the kernel +// KERNBASE..KERNBASE+EXTMEM: mapped to 0..EXTMEM (for I/O space) +// KERNBASE+EXTMEM..data: mapped to EXTMEM..V2P(data) +// for the kernel's instructions and r/o data +// data..KERNBASE+PHYSTOP: mapped to V2P(data)..PHYSTOP, +// rw data + free physical memory +// 0xfe000000..0: mapped direct (devices such as ioapic) +// +// The kernel allocates physical memory for its heap and for user memory +// between V2P(end) and the end of physical memory (PHYSTOP) +// (directly addressable from end..P2V(PHYSTOP)). + +// This table defines the kernel's mappings, which are present in +// every process's page table. +static struct kmap { + void *virt; + uint phys_start; + uint phys_end; + int perm; +} kmap[] = { + { (void*)KERNBASE, 0, EXTMEM, PTE_W}, // I/O space + { (void*)KERNLINK, V2P(KERNLINK), V2P(data), 0}, // kern text+rodata + { (void*)data, V2P(data), PHYSTOP, PTE_W}, // kern data+memory + { (void*)DEVSPACE, DEVSPACE, 0, PTE_W}, // more devices +}; + +// Set up kernel part of a page table. +pde_t*setupkvm(void) { + pde_t *pgdir; + struct kmap *k; + + if ((pgdir = (pde_t*)kalloc()) == 0) { + return 0; + } + memset(pgdir, 0, PGSIZE); + if (P2V(PHYSTOP) > (void*)DEVSPACE) { + panic("PHYSTOP too high"); + } + for (k = kmap; k < &kmap[NELEM(kmap)]; k++) { + if (mappages(pgdir, k->virt, k->phys_end - k->phys_start, + (uint)k->phys_start, k->perm) < 0) { + freevm(pgdir); + return 0; + } + } + return pgdir; +} + +// Allocate one page table for the machine for the kernel address +// space for scheduler processes. +void kvmalloc(void) { + kpgdir = setupkvm(); + switchkvm(); +} + +// Switch h/w page table register to the kernel-only page table, +// for when no process is running. +void switchkvm(void) { + lcr3(V2P(kpgdir)); // switch to the kernel page table +} + +// Switch TSS and h/w page table to correspond to process p. +void switchuvm(struct proc *p) { + if (p == 0) { + panic("switchuvm: no process"); + } + if (p->kstack == 0) { + panic("switchuvm: no kstack"); + } + if (p->pgdir == 0) { + panic("switchuvm: no pgdir"); + } + + pushcli(); + mycpu()->gdt[SEG_TSS] = SEG16(STS_T32A, &mycpu()->ts, + sizeof(mycpu()->ts) - 1, 0); + mycpu()->gdt[SEG_TSS].s = 0; + mycpu()->ts.ss0 = SEG_KDATA << 3; + mycpu()->ts.esp0 = (uint)p->kstack + KSTACKSIZE; + // setting IOPL=0 in eflags *and* iomb beyond the tss segment limit + // forbids I/O instructions (e.g., inb and outb) from user space + mycpu()->ts.iomb = (ushort) 0xFFFF; + ltr(SEG_TSS << 3); + lcr3(V2P(p->pgdir)); // switch to process's address space + popcli(); +} + +// Load the initcode into address 0 of pgdir. +// sz must be less than a page. +void inituvm(pde_t *pgdir, char *init, uint sz) { + char *mem; + + if (sz >= PGSIZE) { + panic("inituvm: more than a page"); + } + mem = kalloc(); + memset(mem, 0, PGSIZE); + mappages(pgdir, 0, PGSIZE, V2P(mem), PTE_W | PTE_U); + memmove(mem, init, sz); +} + +// Load a program segment into pgdir. addr must be page-aligned +// and the pages from addr to addr+sz must already be mapped. +int loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz) { + uint i, pa, n; + pte_t *pte; + + if ((uint) addr % PGSIZE != 0) { + panic("loaduvm: addr must be page aligned"); + } + for (i = 0; i < sz; i += PGSIZE) { + if ((pte = walkpgdir(pgdir, addr + i, 0)) == 0) { + panic("loaduvm: address should exist"); + } + pa = PTE_ADDR(*pte); + if (sz - i < PGSIZE) { + n = sz - i; + } + else { + n = PGSIZE; + } + if (readi(ip, P2V(pa), offset + i, n) != n) { + return -1; + } + } + return 0; +} + +// Allocate page tables and physical memory to grow process from oldsz to +// newsz, which need not be page aligned. Returns new size or 0 on error. +int allocuvm(pde_t *pgdir, uint oldsz, uint newsz) { + char *mem; + uint a; + + if (newsz >= KERNBASE) { + return 0; + } + if (newsz < oldsz) { + return oldsz; + } + + a = PGROUNDUP(oldsz); + for (; a < newsz; a += PGSIZE) { + mem = kalloc(); + if (mem == 0) { + cprintf("allocuvm out of memory\n"); + deallocuvm(pgdir, newsz, oldsz); + return 0; + } + memset(mem, 0, PGSIZE); + if (mappages(pgdir, (char*)a, PGSIZE, V2P(mem), PTE_W | PTE_U) < 0) { + cprintf("allocuvm out of memory (2)\n"); + deallocuvm(pgdir, newsz, oldsz); + kfree(mem); + return 0; + } + } + return newsz; +} + +// Deallocate user pages to bring the process size from oldsz to +// newsz. oldsz and newsz need not be page-aligned, nor does newsz +// need to be less than oldsz. oldsz can be larger than the actual +// process size. Returns the new process size. +int deallocuvm(pde_t *pgdir, uint oldsz, uint newsz) { + pte_t *pte; + uint a, pa; + + if (newsz >= oldsz) { + return oldsz; + } + + a = PGROUNDUP(newsz); + for (; a < oldsz; a += PGSIZE) { + pte = walkpgdir(pgdir, (char*)a, 0); + if (!pte) { + a = PGADDR(PDX(a) + 1, 0, 0) - PGSIZE; + } + else if ((*pte & PTE_P) != 0) { + pa = PTE_ADDR(*pte); + if (pa == 0) { + panic("kfree"); + } + char *v = P2V(pa); + kfree(v); + *pte = 0; + } + } + return newsz; +} + +// Free a page table and all the physical memory pages +// in the user part. +void freevm(pde_t *pgdir) { + uint i; + + if (pgdir == 0) { + panic("freevm: no pgdir"); + } + deallocuvm(pgdir, KERNBASE, 0); + for (i = 0; i < NPDENTRIES; i++) { + if (pgdir[i] & PTE_P) { + char * v = P2V(PTE_ADDR(pgdir[i])); + kfree(v); + } + } + kfree((char*)pgdir); +} + +// Clear PTE_U on a page. Used to create an inaccessible +// page beneath the user stack. +void clearpteu(pde_t *pgdir, char *uva) { + pte_t *pte; + + pte = walkpgdir(pgdir, uva, 0); + if (pte == 0) { + panic("clearpteu"); + } + *pte &= ~PTE_U; +} + +// Given a parent process's page table, create a copy +// of it for a child. +pde_t* copyuvm(pde_t *pgdir, uint sz) { + pde_t *d; + pte_t *pte; + uint pa, i, flags; + char *mem; + + if ((d = setupkvm()) == 0) { + return 0; + } + for (i = 0; i < sz; i += PGSIZE) { + if ((pte = walkpgdir(pgdir, (void *) i, 0)) == 0) { + panic("copyuvm: pte should exist"); + } + if (!(*pte & PTE_P)) { + panic("copyuvm: page not present"); + } + pa = PTE_ADDR(*pte); + flags = PTE_FLAGS(*pte); + if ((mem = kalloc()) == 0) { + freevm(d); + return 0; + } + memmove(mem, (char*)P2V(pa), PGSIZE); + if (mappages(d, (void*)i, PGSIZE, V2P(mem), flags) < 0) { + kfree(mem); + freevm(d); + return 0; + } + } + return d; +} + + +// Map user virtual address to kernel address. +char*uva2ka(pde_t *pgdir, char *uva) { + pte_t *pte; + + pte = walkpgdir(pgdir, uva, 0); + if ((*pte & PTE_P) == 0) { + return 0; + } + if ((*pte & PTE_U) == 0) { + return 0; + } + return (char*)P2V(PTE_ADDR(*pte)); +} + +// Copy len bytes from p to user address va in page table pgdir. +// Most useful when pgdir is not the current page table. +// uva2ka ensures this only works for PTE_U pages. +int copyout(pde_t *pgdir, uint va, void *p, uint len) { + char *buf, *pa0; + uint n, va0; + + buf = (char*)p; + while (len > 0) { + va0 = (uint)PGROUNDDOWN(va); + pa0 = uva2ka(pgdir, (char*)va0); + if (pa0 == 0) { + return -1; + } + n = PGSIZE - (va - va0); + if (n > len) { + n = len; + } + memmove(pa0 + (va - va0), buf, n); + len -= n; + buf += n; + va = va0 + PGSIZE; + } + return 0; +} + + + + + + + + diff --git a/wc.c b/wc.c new file mode 100644 index 0000000..0869cf8 --- /dev/null +++ b/wc.c @@ -0,0 +1,52 @@ +#include "types.h" +#include "stat.h" +#include "user.h" + +char buf[512]; + +void wc(int fd, char *name) { + int i, n; + int l, w, c, inword; + + l = w = c = 0; + inword = 0; + while ((n = read(fd, buf, sizeof(buf))) > 0) { + for (i = 0; i < n; i++) { + c++; + if (buf[i] == '\n') { + l++; + } + if (strchr(" \r\t\n\v", buf[i])) { + inword = 0; + } + else if (!inword) { + w++; + inword = 1; + } + } + } + if (n < 0) { + printf(1, "wc: read error\n"); + exit(); + } + printf(1, "%d %d %d %s\n", l, w, c, name); +} + +int main(int argc, char *argv[]) { + int fd, i; + + if (argc <= 1) { + wc(0, ""); + exit(); + } + + for (i = 1; i < argc; i++) { + if ((fd = open(argv[i], 0)) < 0) { + printf(1, "wc: cannot open %s\n", argv[i]); + exit(); + } + wc(fd, argv[i]); + close(fd); + } + exit(); +} diff --git a/x86.h b/x86.h new file mode 100644 index 0000000..118f725 --- /dev/null +++ b/x86.h @@ -0,0 +1,148 @@ +// Routines to let C code use special x86 instructions. + +static inline uchar inb(ushort port) { + uchar data; + + asm volatile ("in %1,%0" : "=a" (data) : "d" (port)); + return data; +} + +static inline void insl(int port, void *addr, int cnt) { + asm volatile ("cld; rep insl" : + "=D" (addr), "=c" (cnt) : + "d" (port), "0" (addr), "1" (cnt) : + "memory", "cc"); +} + +static inline void outb(ushort port, uchar data) { + asm volatile ("out %0,%1" : : "a" (data), "d" (port)); +} + +static inline void outw(ushort port, ushort data) { + asm volatile ("out %0,%1" : : "a" (data), "d" (port)); +} + +static inline void outsl(int port, const void *addr, int cnt) { + asm volatile ("cld; rep outsl" : + "=S" (addr), "=c" (cnt) : + "d" (port), "0" (addr), "1" (cnt) : + "cc"); +} + +static inline void stosb(void *addr, int data, int cnt) { + asm volatile ("cld; rep stosb" : + "=D" (addr), "=c" (cnt) : + "0" (addr), "1" (cnt), "a" (data) : + "memory", "cc"); +} + +static inline void stosl(void *addr, int data, int cnt) { + asm volatile ("cld; rep stosl" : + "=D" (addr), "=c" (cnt) : + "0" (addr), "1" (cnt), "a" (data) : + "memory", "cc"); +} + +struct segdesc; + +static inline void lgdt(struct segdesc *p, int size) { + volatile ushort pd[3]; + + pd[0] = size - 1; + pd[1] = (uint)p; + pd[2] = (uint)p >> 16; + + asm volatile ("lgdt (%0)" : : "r" (pd)); +} + +struct gatedesc; + +static inline void lidt(struct gatedesc *p, int size) { + volatile ushort pd[3]; + + pd[0] = size - 1; + pd[1] = (uint)p; + pd[2] = (uint)p >> 16; + + asm volatile ("lidt (%0)" : : "r" (pd)); +} + +static inline void ltr(ushort sel) { + asm volatile ("ltr %0" : : "r" (sel)); +} + +static inline uint readeflags(void) { + uint eflags; + asm volatile ("pushfl; popl %0" : "=r" (eflags)); + return eflags; +} + +static inline void loadgs(ushort v) { + asm volatile ("movw %0, %%gs" : : "r" (v)); +} + +static inline void cli(void) { + asm volatile ("cli"); +} + +static inline void sti(void) { + asm volatile ("sti"); +} + +static inline uint xchg(volatile uint *addr, uint newval) { + uint result; + + // The + in "+m" denotes a read-modify-write operand. + asm volatile ("lock; xchgl %0, %1" : + "+m" (*addr), "=a" (result) : + "1" (newval) : + "cc"); + return result; +} + +static inline uint rcr2(void) { + uint val; + asm volatile ("movl %%cr2,%0" : "=r" (val)); + return val; +} + +static inline void lcr3(uint val) { + asm volatile ("movl %0,%%cr3" : : "r" (val)); +} + +// Layout of the trap frame built on the stack by the +// hardware and by trapasm.S, and passed to trap(). +struct trapframe { + // registers as pushed by pusha + uint edi; + uint esi; + uint ebp; + uint oesp; // useless & ignored + uint ebx; + uint edx; + uint ecx; + uint eax; + + // rest of trap frame + ushort gs; + ushort padding1; + ushort fs; + ushort padding2; + ushort es; + ushort padding3; + ushort ds; + ushort padding4; + uint trapno; + + // below here defined by x86 hardware + uint err; + uint eip; + ushort cs; + ushort padding5; + uint eflags; + + // below here only when crossing rings, such as from user to kernel + uint esp; + ushort ss; + ushort padding6; +}; diff --git a/zombie.c b/zombie.c new file mode 100644 index 0000000..a340cf3 --- /dev/null +++ b/zombie.c @@ -0,0 +1,13 @@ +// Create a zombie process that +// must be reparented at exit. + +#include "types.h" +#include "stat.h" +#include "user.h" + +int main(int argc, char* argv[]) { + if (fork() > 0) { + sleep(5); // Let child exit before parent. + } + exit(); +}