# By convention, the _start symbol specifies the ELF entry point. # Since we haven't set up virtual memory yet, our entry point is # the physical address of 'entry'. .globl _start _start = V2P_WO(entry)
# Entering xv6 on boot processor, with paging off. .globl entry entry: # Turn on page size extension for 4Mbyte pages movl %cr4, %eax orl $(CR4_PSE), %eax movl %eax, %cr4 # Set page directory movl $(V2P_WO(entrypgdir)), %eax movl %eax, %cr3 # Turn on paging. movl %cr0, %eax orl $(CR0_PG|CR0_WP), %eax movl %eax, %cr0
# Set up the stack pointer. movl $(stack + KSTACKSIZE), %esp
# Jump to main(), and switch to executing at # high addresses. The indirect call is needed because # the assembler produces a PC-relative instruction # for a direct jump. mov $main, %eax jmp *%eax
.comm stack, KSTACKSIZE
由entry创建的页表有足够的映射来允许内核的C代码开始运行。
1 2 3 4 5 6 7 8 9 10
// Allocate one page table for the machine for the kernel address // space for scheduler processes. void kvmalloc(void) { kpgdir = setupkvm(); switchkvm(); }
#define EXTMEM 0x100000 // Start of extended memory #define PHYSTOP 0xE000000 // Top physical memory #define DEVSPACE 0xFE000000 // Other devices are at high addresses
// Key addresses for address space layout (see kmap in vm.c for layout) #define KERNBASE 0x80000000 // First kernel virtual address #define KERNLINK (KERNBASE+EXTMEM) // Address where kernel is linked
// Create PTEs for virtual addresses starting at va that refer to // physical addresses starting at pa. va and size might not // be page-aligned. staticint mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm) { char *a, *last; pte_t *pte;
a = (char*)PGROUNDDOWN((uint)va); last = (char*)PGROUNDDOWN(((uint)va) + size - 1); for(;;){ if((pte = walkpgdir(pgdir, a, 1)) == 0) return-1; if(*pte & PTE_P) panic("remap"); *pte = pa | perm | PTE_P; if(a == last) break; a += PGSIZE; pa += PGSIZE; } return0; }
// Return the address of the PTE in page table pgdir // that corresponds to virtual address va. If alloc!=0, // create any required page table pages. staticpte_t * walkpgdir(pde_t *pgdir, constvoid *va, int alloc) { pde_t *pde; pte_t *pgtab;
pde = &pgdir[PDX(va)]; if(*pde & PTE_P){ pgtab = (pte_t*)P2V(PTE_ADDR(*pde)); } else { if(!alloc || (pgtab = (pte_t*)kalloc()) == 0) return0; // Make sure all those PTE_P bits are zero. memset(pgtab, 0, PGSIZE); // The permissions here are overly generous, but they can // be further restricted by the permissions in the page table // entries, if necessary. *pde = V2P(pgtab) | PTE_P | PTE_W | PTE_U; } return &pgtab[PTX(va)]; }
// Bootstrap processor starts running C code here. // Allocate a real stack and switch to it, first // doing some setup required for memory allocator to work. int main(void) { kinit1(end, P2V(4*1024*1024)); // phys page allocator kvmalloc(); // kernel page table mpinit(); // detect other processors lapicinit(); // interrupt controller seginit(); // segment descriptors picinit(); // disable pic ioapicinit(); // another interrupt controller consoleinit(); // console hardware uartinit(); // serial port pinit(); // process table tvinit(); // trap vectors binit(); // buffer cache fileinit(); // file table ideinit(); // disk startothers(); // start other processors kinit2(P2V(4*1024*1024), P2V(PHYSTOP)); // must come after startothers() userinit(); // first user process mpmain(); // finish this processor's setup }
函数 main 调用 kinit1 和 kinit2 来初始化分配器。 进行两次调用的原因是,对于 main 的大部分调用来说,不能使用锁或超过 4 MB 的内存。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
// Initialization happens in two phases. // 1. main() calls kinit1() while still using entrypgdir to place just // the pages mapped by entrypgdir on free list. // 2. main() calls kinit2() with the rest of the physical pages // after installing a full page table that maps them on all cores. void kinit1(void *vstart, void *vend) { initlock(&kmem.lock, "kmem"); kmem.use_lock = 0; freerange(vstart, vend); }
//PAGEBREAK: 21 // Free the page of physical memory pointed at by v, // which normally should have been returned by a // call to kalloc(). (The exception is when // initializing the allocator; see kinit above.) void kfree(char *v) { structrun *r;
if((uint)v % PGSIZE || v < end || V2P(v) >= PHYSTOP) panic("kfree");
// Fill with junk to catch dangling refs. memset(v, 1, PGSIZE);
// Allocate one 4096-byte page of physical memory. // Returns a pointer that the kernel can use. // Returns 0 if the memory cannot be allocated. char* kalloc(void) { structrun *r;
// Allocate page tables and physical memory to grow process from oldsz to // newsz, which need not be page aligned. Returns new size or 0 on error. int allocuvm(pde_t *pgdir, uint oldsz, uint newsz) { char *mem; uint a;
// Deallocate user pages to bring the process size from oldsz to // newsz. oldsz and newsz need not be page-aligned, nor does newsz // need to be less than oldsz. oldsz can be larger than the actual // process size. Returns the new process size. int deallocuvm(pde_t *pgdir, uint oldsz, uint newsz) { pte_t *pte; uint a, pa;
if(newsz >= oldsz) return oldsz;
a = PGROUNDUP(newsz); for(; a < oldsz; a += PGSIZE){ pte = walkpgdir(pgdir, (char*)a, 0); if(!pte) a = PGADDR(PDX(a) + 1, 0, 0) - PGSIZE; elseif((*pte & PTE_P) != 0){ pa = PTE_ADDR(*pte); if(pa == 0) panic("kfree"); char *v = P2V(pa); kfree(v); *pte = 0; } } return newsz; }
// Switch TSS and h/w page table to correspond to process p. void switchuvm(struct proc *p) { if(p == 0) panic("switchuvm: no process"); if(p->kstack == 0) panic("switchuvm: no kstack"); if(p->pgdir == 0) panic("switchuvm: no pgdir");
pushcli(); mycpu()->gdt[SEG_TSS] = SEG16(STS_T32A, &mycpu()->ts, sizeof(mycpu()->ts)-1, 0); mycpu()->gdt[SEG_TSS].s = 0; mycpu()->ts.ss0 = SEG_KDATA << 3; mycpu()->ts.esp0 = (uint)p->kstack + KSTACKSIZE; // setting IOPL=0 in eflags *and* iomb beyond the tss segment limit // forbids I/O instructions (e.g., inb and outb) from user space mycpu()->ts.iomb = (ushort) 0xFFFF; ltr(SEG_TSS << 3); lcr3(V2P(p->pgdir)); // switch to process's address space popcli(); }
x86 硬件将页表条目缓存在 Translation Look-aside Buffer(TLB) 中,当 xv6 更改页表时,它必须使缓存的条目无效。如果它没有使缓存的条目无效,那么在稍后的某个时刻,TLB 可能会使用旧的映射,指向同时已分配给另一个进程的物理页,因此,进程可能能在其他进程的内存上修改。