diff options
author | Neel Natu <neel@FreeBSD.org> | 2015-01-18 03:08:30 +0000 |
---|---|---|
committer | Neel Natu <neel@FreeBSD.org> | 2015-01-18 03:08:30 +0000 |
commit | d087a39935a31447dadf24d8e057fcaf1b45b9a9 (patch) | |
tree | 059a656c08c5c81e9f08be1740cebcdf26e3df13 /sys/amd64 | |
parent | 1bd468a4e9f24b938580c09cbb3b24010f07bb29 (diff) | |
download | src-d087a39935a31447dadf24d8e057fcaf1b45b9a9.tar.gz src-d087a39935a31447dadf24d8e057fcaf1b45b9a9.zip |
Simplify instruction restart logic in bhyve.
Keep track of the next instruction to be executed by the vcpu as 'nextrip'.
As a result the VM_RUN ioctl no longer takes the %rip where a vcpu should
start execution.
Also, instruction restart happens implicitly via 'vm_inject_exception()' or
explicitly via 'vm_restart_instruction()'. The APIs behave identically in
both kernel and userspace contexts. The main beneficiary is the instruction
emulation code that executes in both contexts.
bhyve(8) VM exit handlers now treat 'vmexit->rip' and 'vmexit->inst_length'
as readonly:
- Restarting an instruction is now done by calling 'vm_restart_instruction()'
as opposed to setting 'vmexit->inst_length' to 0 (e.g. emulate_inout())
- Resuming vcpu at an arbitrary %rip is now done by setting VM_REG_GUEST_RIP
as opposed to changing 'vmexit->rip' (e.g. vmexit_task_switch())
Differential Revision: https://reviews.freebsd.org/D1526
Reviewed by: grehan
MFC after: 2 weeks
Notes
Notes:
svn path=/head/; revision=277310
Diffstat (limited to 'sys/amd64')
-rw-r--r-- | sys/amd64/include/vmm_dev.h | 4 | ||||
-rw-r--r-- | sys/amd64/vmm/intel/vmx.c | 1 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm.c | 73 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm_dev.c | 4 |
4 files changed, 65 insertions, 17 deletions
diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h index f3354e341428..9d031a9525e5 100644 --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -54,7 +54,6 @@ struct vm_seg_desc { /* data or code segment */ struct vm_run { int cpuid; - uint64_t rip; /* start running here */ struct vm_exit vm_exit; }; @@ -238,6 +237,7 @@ enum { IOCNUM_LAPIC_MSI = 36, IOCNUM_LAPIC_LOCAL_IRQ = 37, IOCNUM_IOAPIC_PINCOUNT = 38, + IOCNUM_RESTART_INSTRUCTION = 39, /* PCI pass-thru */ IOCNUM_BIND_PPTDEV = 40, @@ -360,4 +360,6 @@ enum { _IOW('v', IOCNUM_RTC_SETTIME, struct vm_rtc_time) #define VM_RTC_GETTIME \ _IOR('v', IOCNUM_RTC_GETTIME, struct vm_rtc_time) +#define VM_RESTART_INSTRUCTION \ + _IOW('v', IOCNUM_RESTART_INSTRUCTION, int) #endif diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index a10a591d94d5..c83a8b29d724 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -2412,6 +2412,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) if (vm_mem_allocated(vmx->vm, gpa) || apic_access_fault(vmx, vcpu, gpa)) { vmexit->exitcode = VM_EXITCODE_PAGING; + vmexit->inst_length = 0; vmexit->u.paging.gpa = gpa; vmexit->u.paging.fault_type = ept_fault_type(qual); vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NESTED_FAULT, 1); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index ff32b33e5b90..615a6397aec5 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -109,6 +109,7 @@ struct vcpu { uint64_t guest_xcr0; /* (i) guest %xcr0 register */ void *stats; /* (a,i) statistics */ struct vm_exit exitinfo; /* (x) exit reason and collateral */ + uint64_t nextrip; /* (x) next instruction to execute */ }; #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) @@ -850,16 +851,26 @@ vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) } int -vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val) +vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val) { + struct vcpu *vcpu; + int error; - if (vcpu < 0 || vcpu >= VM_MAXCPU) + if (vcpuid < 0 || vcpuid >= VM_MAXCPU) return (EINVAL); if (reg >= VM_REG_LAST) return (EINVAL); - return (VMSETREG(vm->cookie, vcpu, reg, val)); + error = VMSETREG(vm->cookie, vcpuid, reg, val); + if (error || reg != VM_REG_GUEST_RIP) + return (error); + + /* Set 'nextrip' to match the value of %rip */ + VCPU_CTR1(vm, vcpuid, "Setting nextrip to %#lx", val); + vcpu = &vm->vcpu[vcpuid]; + vcpu->nextrip = val; + return (0); } static boolean_t @@ -1199,6 +1210,9 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu) vcpu = &vm->vcpu[vcpuid]; vme = &vcpu->exitinfo; + KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", + __func__, vme->inst_length)); + ftype = vme->u.paging.fault_type; KASSERT(ftype == VM_PROT_READ || ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, @@ -1224,9 +1238,6 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu) if (rv != KERN_SUCCESS) return (EFAULT); done: - /* restart execution at the faulting instruction */ - vm_restart_instruction(vm, vcpuid); - return (0); } @@ -1281,10 +1292,13 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) return (EFAULT); /* - * If the instruction length is not specified the update it now. + * If the instruction length was not specified then update it now + * along with 'nextrip'. */ - if (vme->inst_length == 0) + if (vme->inst_length == 0) { vme->inst_length = vie->num_processed; + vcpu->nextrip += vie->num_processed; + } /* return to userland unless this is an in-kernel emulated device */ if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { @@ -1433,7 +1447,7 @@ vm_run(struct vm *vm, struct vm_run *vmrun) int error, vcpuid; struct vcpu *vcpu; struct pcb *pcb; - uint64_t tscval, rip; + uint64_t tscval; struct vm_exit *vme; bool retu, intr_disabled; pmap_t pmap; @@ -1455,7 +1469,6 @@ vm_run(struct vm *vm, struct vm_run *vmrun) pmap = vmspace_pmap(vm->vmspace); vcpu = &vm->vcpu[vcpuid]; vme = &vcpu->exitinfo; - rip = vmrun->rip; restart: critical_enter(); @@ -1470,7 +1483,7 @@ restart: restore_guest_fpustate(vcpu); vcpu_require_state(vm, vcpuid, VCPU_RUNNING); - error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr); + error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, rptr, sptr); vcpu_require_state(vm, vcpuid, VCPU_FROZEN); save_guest_fpustate(vcpu); @@ -1481,6 +1494,7 @@ restart: if (error == 0) { retu = false; + vcpu->nextrip = vme->rip + vme->inst_length; switch (vme->exitcode) { case VM_EXITCODE_SUSPENDED: error = vm_handle_suspend(vm, vcpuid, &retu); @@ -1517,10 +1531,8 @@ restart: } } - if (error == 0 && retu == false) { - rip = vme->rip + vme->inst_length; + if (error == 0 && retu == false) goto restart; - } /* copy the exit information */ bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); @@ -1530,14 +1542,43 @@ restart: int vm_restart_instruction(void *arg, int vcpuid) { + struct vm *vm; struct vcpu *vcpu; - struct vm *vm = arg; + enum vcpu_state state; + uint64_t rip; + int error; + vm = arg; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) return (EINVAL); vcpu = &vm->vcpu[vcpuid]; - vcpu->exitinfo.inst_length = 0; + state = vcpu_get_state(vm, vcpuid, NULL); + if (state == VCPU_RUNNING) { + /* + * When a vcpu is "running" the next instruction is determined + * by adding 'rip' and 'inst_length' in the vcpu's 'exitinfo'. + * Thus setting 'inst_length' to zero will cause the current + * instruction to be restarted. + */ + vcpu->exitinfo.inst_length = 0; + VCPU_CTR1(vm, vcpuid, "restarting instruction at %#lx by " + "setting inst_length to zero", vcpu->exitinfo.rip); + } else if (state == VCPU_FROZEN) { + /* + * When a vcpu is "frozen" it is outside the critical section + * around VMRUN() and 'nextrip' points to the next instruction. + * Thus instruction restart is achieved by setting 'nextrip' + * to the vcpu's %rip. + */ + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RIP, &rip); + KASSERT(!error, ("%s: error %d getting rip", __func__, error)); + VCPU_CTR2(vm, vcpuid, "restarting instruction by updating " + "nextrip from %#lx to %#lx", vcpu->nextrip, rip); + vcpu->nextrip = rip; + } else { + panic("%s: invalid state %d", __func__, state); + } return (0); } diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index 9b39f145c71c..0293d19141ff 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -205,6 +205,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, case VM_ACTIVATE_CPU: case VM_SET_INTINFO: case VM_GET_INTINFO: + case VM_RESTART_INSTRUCTION: /* * XXX fragile, handle with care * Assumes that the first field of the ioctl data is the vcpu. @@ -506,6 +507,9 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, rtctime = (struct vm_rtc_time *)data; rtctime->secs = vrtc_get_time(sc->vm); break; + case VM_RESTART_INSTRUCTION: + error = vm_restart_instruction(sc->vm, vcpu); + break; default: error = ENOTTY; break; |