diff -up kvm-60/kernel/include/asm-x86/kvm_host.h.cr3-cache kvm-60/kernel/include/asm-x86/kvm_host.h --- kvm-60/kernel/include/asm-x86/kvm_host.h.cr3-cache 2008-01-23 10:04:14.000000000 +0100 +++ kvm-60/kernel/include/asm-x86/kvm_host.h 2008-01-29 16:04:20.000000000 +0100 @@ -140,6 +140,7 @@ union kvm_mmu_page_role { unsigned pad_for_nice_hex_output : 6; unsigned metaphysical : 1; unsigned access : 3; + unsigned invalid : 1; }; }; @@ -180,11 +181,11 @@ struct kvm_mmu { gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva); void (*prefetch_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page); - hpa_t root_hpa; + hpa_t root_hpa[KVM_CR3_CACHE_SIZE]; int root_level; int shadow_root_level; - u64 *pae_root; + u64 *pae_root[KVM_CR3_CACHE_SIZE]; }; struct kvm_vcpu_arch { @@ -198,6 +199,10 @@ struct kvm_vcpu_arch { unsigned long cr0; unsigned long cr2; unsigned long cr3; + struct kvm_cr3_cache *cr3_cache; + unsigned int cr3_cache_idx; + unsigned int cr3_cache_limit; + gpa_t guest_cr3_gpa[KVM_CR3_CACHE_SIZE]; unsigned long cr4; unsigned long cr8; u64 pdptrs[4]; /* pae */ @@ -320,6 +325,7 @@ struct kvm_vcpu_stat { u32 fpu_reload; u32 insn_emulation; u32 insn_emulation_fail; + u32 cr3_cache_synced; }; struct descriptor_table { diff -up kvm-60/kernel/include/asm-x86/kvm_para.h.cr3-cache kvm-60/kernel/include/asm-x86/kvm_para.h --- kvm-60/kernel/include/asm-x86/kvm_para.h.cr3-cache 2008-01-23 10:04:14.000000000 +0100 +++ kvm-60/kernel/include/asm-x86/kvm_para.h 2008-01-29 16:04:20.000000000 +0100 @@ -10,7 +10,22 @@ * paravirtualization, the appropriate feature bit should be checked. */ #define KVM_CPUID_FEATURES 0x40000001 +#define KVM_FEATURE_CR3_CACHE 1 +#define KVM_MSR_SET_CR3_CACHE 0x87655678 + +#define KVM_CR3_CACHE_SIZE 4 + +struct kvm_cr3_cache_entry { + u64 guest_cr3; + u64 host_cr3; +}; + +struct kvm_cr3_cache { + struct kvm_cr3_cache_entry entry[KVM_CR3_CACHE_SIZE]; + u32 max_idx; +}; + #ifdef __KERNEL__ #include diff -up kvm-60/kernel/vmx.c.cr3-cache kvm-60/kernel/vmx.c --- kvm-60/kernel/vmx.c.cr3-cache 2008-01-23 10:04:14.000000000 +0100 +++ kvm-60/kernel/vmx.c 2008-01-29 16:04:20.000000000 +0100 @@ -752,6 +752,30 @@ static int vmx_get_msr(struct kvm_vcpu * return 0; } +int vmx_cr3_cache_msr(struct kvm_vcpu *vcpu, u64 data) +{ + struct page *page; + hva_t cr3_cache_hva; + + if (data != PAGE_ALIGN(data)) { + printk("data must be aligned!\n"); + return -EINVAL; + } + + down_read(¤t->mm->mmap_sem); + /*XXX: release on unload */ + page = gva_to_page(vcpu, data); + up_read(¤t->mm->mmap_sem); + cr3_cache_hva = (hva_t)__va(page_to_phys(page)); + + vcpu->arch.cr3_cache = (void *)cr3_cache_hva; + vcpu->arch.cr3_cache->max_idx = vcpu->arch.cr3_cache_limit; + + printk(KERN_ERR "using CR3 cache\n"); + + return 0; +} + /* * Writes msr value into into the appropriate "register". * Returns 0 on success, non-0 otherwise. @@ -791,6 +815,9 @@ static int vmx_set_msr(struct kvm_vcpu * case MSR_IA32_TIME_STAMP_COUNTER: guest_write_tsc(data); break; + case KVM_MSR_SET_CR3_CACHE: + ret = vmx_cr3_cache_msr(vcpu, data); + break; default: msr = find_msr_entry(vmx, msr_index); if (msr) { @@ -1277,11 +1304,25 @@ static void vmx_set_cr0(struct kvm_vcpu vmx_fpu_activate(vcpu); } -static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) + +static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3_hpa) { - vmcs_writel(GUEST_CR3, cr3); + struct kvm_cr3_cache *cache; + int idx; + + vmcs_writel(GUEST_CR3, cr3_hpa); if (vcpu->arch.cr0 & X86_CR0_PE) vmx_fpu_deactivate(vcpu); + + if (!vcpu->arch.cr3_cache) + return; + + idx = vcpu->arch.cr3_cache_idx; + cache = vcpu->arch.cr3_cache; + + cache->entry[idx].host_cr3 = cr3_hpa; + cache->entry[idx].guest_cr3 = vcpu->arch.cr3; + vmcs_writel(CR3_TARGET_VALUE0 + idx*2, cr3_hpa); } static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) @@ -1494,6 +1535,39 @@ out: up_write(¤t->mm->mmap_sem); return r; } +/* + * Set up the cr3 validity hardware cache. + */ +static void vmcs_setup_cr3_cache(struct kvm_vcpu *vcpu) +{ + unsigned int cr3_target_values, i; + u64 msr_val; + + rdmsrl(MSR_IA32_VMX_MISC, msr_val); + + printk("MSR_IA32_VMX_MISC: %016Lx\n", msr_val); + + /* + * 9 bits of "CR3 target values": + */ + cr3_target_values = (msr_val >> 16) & ((1 << 10) - 1); + printk(" cr3 target values: %d\n", cr3_target_values); + if (cr3_target_values > KVM_CR3_CACHE_SIZE) { + printk("KVM: limiting cr3 cache size from %d to %d\n", + cr3_target_values, KVM_CR3_CACHE_SIZE); + cr3_target_values = KVM_CR3_CACHE_SIZE; + } + + vcpu->arch.cr3_cache_idx = 0; + vcpu->arch.cr3_cache_limit = cr3_target_values; + /* + * Initialize. TODO: set this to guest physical memory. + */ + for (i = 0; i < cr3_target_values; i++) + vmcs_writel(CR3_TARGET_VALUE0 + i*2, -1UL); + + vmcs_write32(CR3_TARGET_COUNT, cr3_target_values); +} /* * Sets up the vmcs for emulated real mode. @@ -1538,7 +1612,7 @@ static int vmx_vcpu_setup(struct vcpu_vm vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf); vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); - vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ + vmcs_setup_cr3_cache(&vmx->vcpu); vmcs_writel(HOST_CR0, read_cr0()); /* 22.2.3 */ vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ @@ -2336,6 +2410,55 @@ static void fixup_rmode_irq(struct vcpu_ | vmx->rmode.irq.vector; } +static void kvm_cr3_cache_sync(struct kvm_vcpu *vcpu) +{ + void *guest_cr3_hva; + hpa_t guest_cr3_hpa; + struct kvm_cr3_cache *cache; + int j; + int idx = vcpu->arch.cr3_cache_idx; + + if (!vcpu->arch.cr3_cache) + return; + + guest_cr3_hpa = vmcs_readl(GUEST_CR3); + /* + * Are they in sync already? + */ + if (guest_cr3_hpa == vcpu->arch.mmu.root_hpa[idx]) + return; + + cache = vcpu->arch.cr3_cache; +#ifdef CONFIG_X86_64 + if (vcpu->arch.mmu.shadow_root_level == 4) { + for (j = 0; j < vcpu->arch.cr3_cache_limit; j++) { + hpa_t root = cache->entry[j].host_cr3; + if (root != guest_cr3_hpa) + continue; + vcpu->arch.cr3 = cache->entry[j].guest_cr3; + vcpu->arch.cr3_cache_idx = j; + vcpu->arch.mmu.root_hpa[j] = cache->entry[j].host_cr3; + ++vcpu->stat.cr3_cache_synced; + return; + } + WARN_ON(j == KVM_CR3_CACHE_SIZE-1); + } +#endif + + guest_cr3_hva = __va(guest_cr3_hpa); + for (j = 0; j < vcpu->arch.cr3_cache_limit; j++) { + u64 *root = vcpu->arch.mmu.pae_root[j]; + WARN_ON(!root); + if (root != guest_cr3_hva) + continue; + vcpu->arch.cr3 = vcpu->arch.guest_cr3_gpa[j]; + vcpu->arch.cr3_cache_idx = j; + vcpu->arch.mmu.root_hpa[j] = __pa(vcpu->arch.mmu.pae_root[j]); + break; + } + WARN_ON(j == KVM_CR3_CACHE_SIZE); +} + static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -2346,6 +2469,8 @@ static void vmx_vcpu_run(struct kvm_vcpu */ vmcs_writel(HOST_CR0, read_cr0()); + WARN_ON(vmcs_readl(GUEST_CR3) != vcpu->arch.mmu.root_hpa[vcpu->arch.cr3_cache_idx]); + asm( /* Store host registers */ #ifdef CONFIG_X86_64 @@ -2460,6 +2585,12 @@ static void vmx_vcpu_run(struct kvm_vcpu , "ebx", "edi", "rsi" #endif ); + /* + * Figure out whether vcpu->cr3 needs updating because + * the guest makde use of the cr3 cache. + */ + kvm_cr3_cache_sync(vcpu); + WARN_ON(vmcs_readl(GUEST_CR3) != vcpu->arch.mmu.root_hpa[vcpu->arch.cr3_cache_idx]); vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); if (vmx->rmode.irq.pending) diff -up kvm-60/kernel/paging_tmpl.h.cr3-cache kvm-60/kernel/paging_tmpl.h --- kvm-60/kernel/paging_tmpl.h.cr3-cache 2008-01-23 10:04:14.000000000 +0100 +++ kvm-60/kernel/paging_tmpl.h 2008-01-29 16:04:20.000000000 +0100 @@ -140,7 +140,7 @@ walk: } #endif ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || - (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0); + (vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0); pt_access = ACC_ALL; @@ -280,10 +280,10 @@ static u64 *FNAME(fetch)(struct kvm_vcpu if (!is_present_pte(walker->ptes[walker->level - 1])) return NULL; - shadow_addr = vcpu->arch.mmu.root_hpa; + shadow_addr = vcpu->arch.mmu.root_hpa[vcpu->arch.cr3_cache_idx]; level = vcpu->arch.mmu.shadow_root_level; if (level == PT32E_ROOT_LEVEL) { - shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; + shadow_addr = vcpu->arch.mmu.pae_root[vcpu->arch.cr3_cache_idx][(addr >> 30) & 3]; shadow_addr &= PT64_BASE_ADDR_MASK; --level; } diff -up kvm-60/kernel/mmu.c.cr3-cache kvm-60/kernel/mmu.c --- kvm-60/kernel/mmu.c.cr3-cache 2008-01-23 10:04:14.000000000 +0100 +++ kvm-60/kernel/mmu.c 2008-01-29 16:04:20.000000000 +0100 @@ -257,6 +257,16 @@ static int mmu_topup_memory_cache(struct } return 0; } +static void kvm_cr3_cache_clear(struct kvm_vcpu *vcpu) +{ + struct kvm_cr3_cache *cache; + + if (!vcpu->arch.cr3_cache) + return; + cache = vcpu->arch.cr3_cache; + memset(cache->entry, 0, sizeof(cache->entry)); +} + static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) { @@ -667,7 +677,8 @@ static struct kvm_mmu_page *kvm_mmu_look index = kvm_page_table_hashfn(gfn); bucket = &kvm->arch.mmu_page_hash[index]; hlist_for_each_entry(sp, node, bucket, hash_link) - if (sp->gfn == gfn && !sp->role.metaphysical) { + if (sp->gfn == gfn && !sp->role.metaphysical && + !sp->role.invalid) { pgprintk("%s: found role %x\n", __FUNCTION__, sp->role.word); return sp; @@ -795,8 +806,10 @@ static void kvm_mmu_zap_page(struct kvm if (!sp->root_count) { hlist_del(&sp->hash_link); kvm_mmu_free_page(kvm, sp); - } else + } else { list_move(&sp->link, &kvm->arch.active_mmu_pages); + sp->role.invalid = 1; + } kvm_mmu_reset_last_pte_updated(kvm); } @@ -882,6 +895,7 @@ struct page *gva_to_page(struct kvm_vcpu return NULL; return gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); } +EXPORT_SYMBOL(gva_to_page); static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, unsigned pt_access, unsigned pte_access, @@ -975,7 +989,7 @@ static int __nonpaging_map(struct kvm_vc gfn_t gfn, struct page *page) { int level = PT32E_ROOT_LEVEL; - hpa_t table_addr = vcpu->arch.mmu.root_hpa; + hpa_t table_addr = vcpu->arch.mmu.root_hpa[vcpu->arch.cr3_cache_idx]; int pt_write = 0; for (; ; level--) { @@ -1045,60 +1059,83 @@ static void nonpaging_prefetch_page(stru static void mmu_free_roots(struct kvm_vcpu *vcpu) { - int i; + int i, j = 0; struct kvm_mmu_page *sp; - if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) - return; + /* + * Skip to the next cr3 filter entry and free it (if it's occupied). + */ + vcpu->arch.cr3_cache_idx++; + if (unlikely(vcpu->arch.cr3_cache_idx >= vcpu->arch.cr3_cache_limit)) + vcpu->arch.cr3_cache_idx = 0; + j = vcpu->arch.cr3_cache_idx; + /* + * Clear the guest-visible entry. + */ + if (vcpu->arch.cr3_cache) { + vcpu->arch.cr3_cache->entry[j].guest_cr3 = 0; + vcpu->arch.cr3_cache->entry[j].host_cr3 = 0; + } + spin_lock(&vcpu->kvm->mmu_lock); #ifdef CONFIG_X86_64 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { - hpa_t root = vcpu->arch.mmu.root_hpa; + hpa_t root = vcpu->arch.mmu.root_hpa[j]; + + if (!VALID_PAGE(root)) { + spin_unlock(&vcpu->kvm->mmu_lock); + return; + } sp = page_header(root); --sp->root_count; - vcpu->arch.mmu.root_hpa = INVALID_PAGE; + vcpu->arch.mmu.root_hpa[j] = INVALID_PAGE; spin_unlock(&vcpu->kvm->mmu_lock); return; } #endif - for (i = 0; i < 4; ++i) { - hpa_t root = vcpu->arch.mmu.pae_root[i]; - - if (root) { - root &= PT64_BASE_ADDR_MASK; - sp = page_header(root); - --sp->root_count; + ASSERT(vcpu->arch.mmu.pae_root[j]); + if (VALID_PAGE(vcpu->arch.mmu.pae_root[j][0])) { + vcpu->arch.guest_cr3_gpa[j] = INVALID_PAGE; + for (i = 0; i < 4; ++i) { + hpa_t root = vcpu->arch.mmu.pae_root[j][i]; + + if (root) { + root &= PT64_BASE_ADDR_MASK; + sp = page_header(root); + --sp->root_count; + } + vcpu->arch.mmu.pae_root[j][i] = INVALID_PAGE; } - vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; } spin_unlock(&vcpu->kvm->mmu_lock); - vcpu->arch.mmu.root_hpa = INVALID_PAGE; + vcpu->arch.mmu.root_hpa[j] = INVALID_PAGE; } static void mmu_alloc_roots(struct kvm_vcpu *vcpu) { - int i; + int i, j; gfn_t root_gfn; struct kvm_mmu_page *sp; root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; + j = vcpu->arch.cr3_cache_idx; #ifdef CONFIG_X86_64 if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { - hpa_t root = vcpu->arch.mmu.root_hpa; + hpa_t root = vcpu->arch.mmu.root_hpa[j]; ASSERT(!VALID_PAGE(root)); sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, 0, ACC_ALL, NULL, NULL); root = __pa(sp->spt); ++sp->root_count; - vcpu->arch.mmu.root_hpa = root; + vcpu->arch.mmu.root_hpa[j] = root; return; } #endif for (i = 0; i < 4; ++i) { - hpa_t root = vcpu->arch.mmu.pae_root[i]; + hpa_t root = vcpu->arch.mmu.pae_root[j][i]; ASSERT(!VALID_PAGE(root)); if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) { @@ -1114,9 +1151,14 @@ static void mmu_alloc_roots(struct kvm_v ACC_ALL, NULL, NULL); root = __pa(sp->spt); ++sp->root_count; - vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; + vcpu->arch.mmu.pae_root[j][i] = root | PT_PRESENT_MASK; } - vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); + vcpu->arch.mmu.root_hpa[j] = __pa(vcpu->arch.mmu.pae_root[j]); + /* + * Store the guest-side address too, we need it if a guest + * exits the VM, to rediscover what cr3 it changed to: + */ + vcpu->arch.guest_cr3_gpa[j] = vcpu->arch.cr3; } static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) @@ -1136,7 +1178,7 @@ static int nonpaging_page_fault(struct k return r; ASSERT(vcpu); - ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); + ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa[j])); gfn = gva >> PAGE_SHIFT; @@ -1146,12 +1188,19 @@ static int nonpaging_page_fault(struct k static void nonpaging_free(struct kvm_vcpu *vcpu) { - mmu_free_roots(vcpu); + int j; + + /* + * This will cycle through all existing roots and free them. + */ + for (j = 0; j < KVM_CR3_CACHE_SIZE; j++) + mmu_free_roots(vcpu); } static int nonpaging_init_context(struct kvm_vcpu *vcpu) { struct kvm_mmu *context = &vcpu->arch.mmu; + int i; context->new_cr3 = nonpaging_new_cr3; context->page_fault = nonpaging_page_fault; @@ -1160,7 +1209,8 @@ static int nonpaging_init_context(struct context->prefetch_page = nonpaging_prefetch_page; context->root_level = 0; context->shadow_root_level = PT32E_ROOT_LEVEL; - context->root_hpa = INVALID_PAGE; + for (i = 0; i < KVM_CR3_CACHE_SIZE; i++) + context->root_hpa[i] = INVALID_PAGE; return 0; } @@ -1199,6 +1249,7 @@ static void paging_free(struct kvm_vcpu static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) { struct kvm_mmu *context = &vcpu->arch.mmu; + int i; ASSERT(is_pae(vcpu)); context->new_cr3 = paging_new_cr3; @@ -1208,7 +1259,8 @@ static int paging64_init_context_common( context->free = paging_free; context->root_level = level; context->shadow_root_level = level; - context->root_hpa = INVALID_PAGE; + for (i = 0; i < KVM_CR3_CACHE_SIZE; i++) + context->root_hpa[i] = INVALID_PAGE; return 0; } @@ -1220,6 +1272,7 @@ static int paging64_init_context(struct static int paging32_init_context(struct kvm_vcpu *vcpu) { struct kvm_mmu *context = &vcpu->arch.mmu; + int i; context->new_cr3 = paging_new_cr3; context->page_fault = paging32_page_fault; @@ -1228,7 +1281,8 @@ static int paging32_init_context(struct context->prefetch_page = paging32_prefetch_page; context->root_level = PT32_ROOT_LEVEL; context->shadow_root_level = PT32E_ROOT_LEVEL; - context->root_hpa = INVALID_PAGE; + for (i = 0; i < KVM_CR3_CACHE_SIZE; i++) + context->root_hpa[i] = INVALID_PAGE; return 0; } @@ -1240,7 +1294,7 @@ static int paging32E_init_context(struct static int init_kvm_mmu(struct kvm_vcpu *vcpu) { ASSERT(vcpu); - ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); + ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa[vcpu->arch.cr3_cache_idx])); if (!is_paging(vcpu)) return nonpaging_init_context(vcpu); @@ -1254,11 +1308,14 @@ static int init_kvm_mmu(struct kvm_vcpu static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) { + int j; ASSERT(vcpu); - if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) { - vcpu->arch.mmu.free(vcpu); - vcpu->arch.mmu.root_hpa = INVALID_PAGE; - } + + for(j = 0; j < KVM_CR3_CACHE_SIZE; j++) + if (VALID_PAGE(vcpu->arch.mmu.root_hpa[j])) { + vcpu->arch.mmu.free(vcpu); + vcpu->arch.mmu.root_hpa[j] = INVALID_PAGE; + } } int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) @@ -1271,6 +1328,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_reset_context) int kvm_mmu_load(struct kvm_vcpu *vcpu) { int r; + int j = vcpu->arch.cr3_cache_idx; r = mmu_topup_memory_caches(vcpu); if (r) @@ -1279,8 +1337,8 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) kvm_mmu_free_some_pages(vcpu); mmu_alloc_roots(vcpu); spin_unlock(&vcpu->kvm->mmu_lock); - kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); - kvm_mmu_flush_tlb(vcpu); + /* setting CR3 will flush the TLB */ + kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa[j]); out: return r; } @@ -1288,7 +1346,9 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load); void kvm_mmu_unload(struct kvm_vcpu *vcpu) { - mmu_free_roots(vcpu); + int j; + for (j = 0; j < KVM_CR3_CACHE_SIZE; j++) + mmu_free_roots(vcpu); } static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, @@ -1449,6 +1509,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu * */ pgprintk("misaligned: gpa %llx bytes %d role %x\n", gpa, bytes, sp->role.word); + kvm_cr3_cache_clear(vcpu); kvm_mmu_zap_page(vcpu->kvm, sp); ++vcpu->kvm->stat.mmu_flooded; continue; @@ -1567,19 +1628,24 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); static void free_mmu_pages(struct kvm_vcpu *vcpu) { struct kvm_mmu_page *sp; + int j; while (!list_empty(&vcpu->kvm->arch.active_mmu_pages)) { sp = container_of(vcpu->kvm->arch.active_mmu_pages.next, struct kvm_mmu_page, link); kvm_mmu_zap_page(vcpu->kvm, sp); } - free_page((unsigned long)vcpu->arch.mmu.pae_root); + for (j = 0; j < KVM_CR3_CACHE_SIZE; j++) { + ASSERT(vcpu->arch.mmu.pae_root[j]); + free_page((unsigned long)vcpu->arch.mmu.pae_root[j]); + vcpu->arch.mmu.pae_root[j] = NULL; + } } static int alloc_mmu_pages(struct kvm_vcpu *vcpu) { struct page *page; - int i; + int i, j; ASSERT(vcpu); @@ -1589,17 +1655,23 @@ static int alloc_mmu_pages(struct kvm_vc else vcpu->kvm->arch.n_free_mmu_pages = vcpu->kvm->arch.n_alloc_mmu_pages; - /* - * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. - * Therefore we need to allocate shadow page tables in the first - * 4GB of memory, which happens to fit the DMA32 zone. - */ - page = alloc_page(GFP_KERNEL | __GFP_DMA32); - if (!page) - goto error_1; - vcpu->arch.mmu.pae_root = page_address(page); - for (i = 0; i < 4; ++i) - vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; + + for (j = 0; j < KVM_CR3_CACHE_SIZE; j++) { + /* + * When emulating 32-bit mode, cr3 is only 32 bits even on + * x86_64. Therefore we need to allocate shadow page tables + * in the first 4GB of memory, which happens to fit the DMA32 + * zone. + */ + page = alloc_page(GFP_KERNEL | __GFP_DMA32); + if (!page) + goto error_1; + + ASSERT(!vcpu->arch.mmu.pae_root[j]); + vcpu->arch.mmu.pae_root[j] = page_address(page); + for (i = 0; i < 4; ++i) + vcpu->arch.mmu.pae_root[j][i] = INVALID_PAGE; + } return 0; @@ -1611,7 +1683,7 @@ error_1: int kvm_mmu_create(struct kvm_vcpu *vcpu) { ASSERT(vcpu); - ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); + ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa[vcpu->arch.cr3_cache_idx])); return alloc_mmu_pages(vcpu); } @@ -1619,7 +1691,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu int kvm_mmu_setup(struct kvm_vcpu *vcpu) { ASSERT(vcpu); - ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); + ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa[vcpu->arch.cr3_cache_idx])); return init_kvm_mmu(vcpu); } @@ -1779,15 +1851,16 @@ static void audit_mappings(struct kvm_vc { unsigned i; - if (vcpu->arch.mmu.root_level == 4) + if (vcpu->arch.mmu.root_level == 4) { audit_mappings_page(vcpu, vcpu->arch.mmu.root_hpa, 0, 4); - else + return; + } + for (j = 0; j < KVM_CR3_CACHE_SIZE; j++) { for (i = 0; i < 4; ++i) - if (vcpu->arch.mmu.pae_root[i] & PT_PRESENT_MASK) + if (vcpu->arch.mmu.pae_root[j][i] & PT_PRESENT_MASK) audit_mappings_page(vcpu, - vcpu->arch.mmu.pae_root[i], - i << 30, - 2); + vcpu->arch.mmu.pae_root[j][i], i << 30, 2); + } } static int count_rmaps(struct kvm_vcpu *vcpu) diff -up kvm-60/kernel/x86.c.cr3-cache kvm-60/kernel/x86.c --- kvm-60/kernel/x86.c.cr3-cache 2008-01-23 10:04:14.000000000 +0100 +++ kvm-60/kernel/x86.c 2008-01-29 16:04:39.000000000 +0100 @@ -67,6 +67,7 @@ struct kvm_stats_debugfs_item debugfs_en { "fpu_reload", VCPU_STAT(fpu_reload) }, { "insn_emulation", VCPU_STAT(insn_emulation) }, { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, + { "cr3_cache_synced", VCPU_STAT(cr3_cache_synced) }, { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, { "mmu_pte_write", VM_STAT(mmu_pte_write) }, { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, @@ -3123,12 +3124,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu * { struct page *page; struct kvm *kvm; - int r; + int r,i; BUG_ON(vcpu->kvm == NULL); kvm = vcpu->kvm; - vcpu->arch.mmu.root_hpa = INVALID_PAGE; + for (i=0; iarch.mmu.root_hpa[i] = INVALID_PAGE; if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0) vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; else diff -up kvm-60/kernel/mmu.h.cr3-cache kvm-60/kernel/mmu.h --- kvm-60/kernel/mmu.h.cr3-cache 2008-01-23 10:04:14.000000000 +0100 +++ kvm-60/kernel/mmu.h 2008-01-29 16:04:20.000000000 +0100 @@ -11,7 +11,8 @@ static inline void kvm_mmu_free_some_pag static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) { - if (likely(vcpu->arch.mmu.root_hpa != INVALID_PAGE)) + int idx = vcpu->arch.cr3_cache_idx; + if (likely(vcpu->arch.mmu.root_hpa[idx] != INVALID_PAGE)) return 0; return kvm_mmu_load(vcpu);