From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <linux-kernel-owner@vger.kernel.org>
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1754097AbbINOya (ORCPT <rfc822;w@1wt.eu>);
	Mon, 14 Sep 2015 10:54:30 -0400
Received: from thoth.sbs.de ([192.35.17.2]:48872 "EHLO thoth.sbs.de"
	rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
	id S1751708AbbINOy3 (ORCPT <rfc822;linux-kernel@vger.kernel.org>);
	Mon, 14 Sep 2015 10:54:29 -0400
Subject: Re: [PATCH] KVM: nVMX: nested VPID emulation
To: Wanpeng Li <wanpeng.li@hotmail.com>, Paolo Bonzini <pbonzini@redhat.com>
References: <BLU436-SMTP172159ECF89EA5C21830024805D0@phx.gbl>
Cc: kvm@vger.kernel.org, linux-kernel@vger.kernel.org
From: Jan Kiszka <jan.kiszka@siemens.com>
Message-ID: <55F6DF9D.5070508@siemens.com>
Date: Mon, 14 Sep 2015 16:54:21 +0200
User-Agent: Mozilla/5.0 (X11; U; Linux i686 (x86_64); de; rv:1.8.1.12)
 Gecko/20080226 SUSE/2.0.0.12-1.1 Thunderbird/2.0.0.12 Mnenhy/0.7.5.666
MIME-Version: 1.0
In-Reply-To: <BLU436-SMTP172159ECF89EA5C21830024805D0@phx.gbl>
Content-Type: text/plain; charset=iso-8859-15
Content-Transfer-Encoding: 7bit
Sender: linux-kernel-owner@vger.kernel.org
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org

On 2015-09-14 14:52, Wanpeng Li wrote:
> VPID is used to tag address space and avoid a TLB flush. Currently L0 use 
> the same VPID to run L1 and all its guests. KVM flushes VPID when switching 
> between L1 and L2. 
> 
> This patch advertises VPID to the L1 hypervisor, then address space of L1 and 
> L2 can be separately treated and avoid TLB flush when swithing between L1 and 
> L2. This patch gets ~3x performance improvement for lmbench 8p/64k ctxsw.
> 
> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
> ---
>  arch/x86/kvm/vmx.c | 39 ++++++++++++++++++++++++++++++++-------
>  1 file changed, 32 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index da1590e..06bc31e 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -1157,6 +1157,11 @@ static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
>  	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
>  }
>  
> +static inline bool nested_cpu_has_vpid(struct vmcs12 *vmcs12)
> +{
> +	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VPID);
> +}
> +
>  static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12)
>  {
>  	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT);
> @@ -2471,6 +2476,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
>  		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
>  		SECONDARY_EXEC_RDTSCP |
>  		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
> +		SECONDARY_EXEC_ENABLE_VPID |
>  		SECONDARY_EXEC_APIC_REGISTER_VIRT |
>  		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
>  		SECONDARY_EXEC_WBINVD_EXITING |
> @@ -4160,7 +4166,7 @@ static void allocate_vpid(struct vcpu_vmx *vmx)
>  	int vpid;
>  
>  	vmx->vpid = 0;
> -	if (!enable_vpid)
> +	if (!enable_vpid || is_guest_mode(&vmx->vcpu))
>  		return;
>  	spin_lock(&vmx_vpid_lock);
>  	vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
> @@ -6738,6 +6744,14 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
>  	}
>  	vmcs12 = kmap(page);
>  	vmcs12->launch_state = 0;
> +	if (enable_vpid) {
> +		if (nested_cpu_has_vpid(vmcs12)) {
> +			spin_lock(&vmx_vpid_lock);
> +			if (vmcs12->virtual_processor_id != 0)
> +				__clear_bit(vmcs12->virtual_processor_id, vmx_vpid_bitmap);
> +			spin_unlock(&vmx_vpid_lock);

Maybe enhance free_vpid (and also allocate_vpid) to work generically and
let the caller decide where to take the vpid from or where to store it?

> +		}
> +	}
>  	kunmap(page);
>  	nested_release_page(page);
>  
> @@ -9189,6 +9203,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	u32 exec_control;
> +	int vpid;
>  
>  	vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
>  	vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
> @@ -9438,13 +9453,21 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	else
>  		vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
>  
> +
>  	if (enable_vpid) {
> -		/*
> -		 * Trivially support vpid by letting L2s share their parent
> -		 * L1's vpid. TODO: move to a more elaborate solution, giving
> -		 * each L2 its own vpid and exposing the vpid feature to L1.
> -		 */
> -		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
> +		if (nested_cpu_has_vpid(vmcs12)) {
> +			if (vmcs12->virtual_processor_id == 0) {
> +				spin_lock(&vmx_vpid_lock);
> +				vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
> +				if (vpid < VMX_NR_VPIDS)
> +					__set_bit(vpid, vmx_vpid_bitmap);
> +				spin_unlock(&vmx_vpid_lock);
> +				vmcs_write16(VIRTUAL_PROCESSOR_ID, vpid);

It's a bit non-obvious that vpid == VMX_NR_VPIDS (no free vpids) will
lead to vpid == 0 when writing VIRTUAL_PROCESSOR_ID. You should leave at
least a comment. Or generalize allocate_vpid as that one is already
clearer in this regard.

> +			} else
> +				vmcs_write16(VIRTUAL_PROCESSOR_ID, vmcs12->virtual_processor_id);
> +		} else
> +			vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
> +
>  		vmx_flush_tlb(vcpu);
>  	}
>  
> @@ -9973,6 +9996,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
>  		vmcs12_save_pending_event(vcpu, vmcs12);
>  	}
>  
> +	if (nested_cpu_has_vpid(vmcs12))
> +		vmcs12->virtual_processor_id = vmcs_read16(VIRTUAL_PROCESSOR_ID);
>  	/*
>  	 * Drop what we picked up for L2 via vmx_complete_interrupts. It is
>  	 * preserved above and would only end up incorrectly in L1.
> 

Last but not least: the guest can now easily exhaust the host's pool of
vpid by simply spawning plenty of VCPUs for L2, no? Is this acceptable
or should there be some limit?

Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SES-DE
Corporate Competence Center Embedded Linux