[PATCH] Fix vmce addr/misc wrmsr bug

classic Classic list List threaded Threaded
3 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH] Fix vmce addr/misc wrmsr bug

Liu, Jinsong
Fix vmce addr/misc wrmsr bug

This patch fix a bug related to wrmsr vmce bank addr/misc
registers, since they are not read-only.

Intel SDM recommanded os mce driver clear bank status/addr/misc.
So guest mce driver may clear addr and misc registers.
Under such case, old vmce wrmsr logic would generate
a #GP fault at guest mce context, and make guest crash.

Signed-off-by: Liu, Jinsong <[hidden email]>

diff -r 755f440b3b78 xen/arch/x86/cpu/mcheck/vmce.c
--- a/xen/arch/x86/cpu/mcheck/vmce.c Wed Apr 18 18:33:36 2012 +0800
+++ b/xen/arch/x86/cpu/mcheck/vmce.c Sun May 06 03:05:20 2012 +0800
@@ -209,6 +209,14 @@
     struct domain_mca_msrs *vmce = dom_vmce(v->domain);
     struct bank_entry *entry = NULL;
 
+    /* Give the first entry of the list, it corresponds to current
+     * vMCE# injection. When vMCE# is finished processing by the
+     * the guest, this node will be deleted.
+     * Only error bank is written. Non-error banks simply return.
+     */
+    if ( !list_empty(&vmce->impact_header) )
+        entry = list_entry(vmce->impact_header.next, struct bank_entry, list);
+
     switch ( msr & (MSR_IA32_MC0_CTL | 3) )
     {
     case MSR_IA32_MC0_CTL:
@@ -216,32 +224,28 @@
             vmce->mci_ctl[bank] = val;
         break;
     case MSR_IA32_MC0_STATUS:
-        /* Give the first entry of the list, it corresponds to current
-         * vMCE# injection. When vMCE# is finished processing by the
-         * the guest, this node will be deleted.
-         * Only error bank is written. Non-error banks simply return.
-         */
-        if ( !list_empty(&vmce->impact_header) )
+        if ( entry && (entry->bank == bank) )
         {
-            entry = list_entry(vmce->impact_header.next,
-                               struct bank_entry, list);
-            if ( entry->bank == bank )
-                entry->mci_status = val;
+            entry->mci_status = val;
             mce_printk(MCE_VERBOSE,
-                       "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n",
-                       bank, val);
+                       "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", bank, val);
         }
-        else
-            mce_printk(MCE_VERBOSE,
-                       "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val);
         break;
     case MSR_IA32_MC0_ADDR:
-        mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n", bank);
-        ret = -1;
+        if ( entry && (entry->bank == bank) )
+        {
+            entry->mci_addr = val;
+            mce_printk(MCE_VERBOSE,
+                       "MCE: wr MC%u_ADDR %"PRIx64" in vMCE#\n", bank, val);
+        }
         break;
     case MSR_IA32_MC0_MISC:
-        mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n", bank);
-        ret = -1;
+        if ( entry && (entry->bank == bank) )
+        {
+            entry->mci_misc = val;
+            mce_printk(MCE_VERBOSE,
+                       "MCE: wr MC%u_MISC %"PRIx64" in vMCE#\n", bank, val);
+        }
         break;
     default:
         switch ( boot_cpu_data.x86_vendor )
_______________________________________________
Xen-devel mailing list
[hidden email]
http://lists.xen.org/xen-devel

vmce-bug-fix.patch (4K) Download Attachment
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Fix vmce addr/misc wrmsr bug

Jan Beulich-2
>>> On 06.05.12 at 14:13, "Liu, Jinsong" <[hidden email]> wrote:
> Fix vmce addr/misc wrmsr bug
>
> This patch fix a bug related to wrmsr vmce bank addr/misc
> registers, since they are not read-only.
>
> Intel SDM recommanded os mce driver clear bank status/addr/misc.
> So guest mce driver may clear addr and misc registers.
> Under such case, old vmce wrmsr logic would generate
> a #GP fault at guest mce context, and make guest crash.
>
> Signed-off-by: Liu, Jinsong <[hidden email]>
>
> diff -r 755f440b3b78 xen/arch/x86/cpu/mcheck/vmce.c
> --- a/xen/arch/x86/cpu/mcheck/vmce.c Wed Apr 18 18:33:36 2012 +0800
> +++ b/xen/arch/x86/cpu/mcheck/vmce.c Sun May 06 03:05:20 2012 +0800
> @@ -209,6 +209,14 @@
>      struct domain_mca_msrs *vmce = dom_vmce(v->domain);
>      struct bank_entry *entry = NULL;
>  
> +    /* Give the first entry of the list, it corresponds to current
> +     * vMCE# injection. When vMCE# is finished processing by the
> +     * the guest, this node will be deleted.
> +     * Only error bank is written. Non-error banks simply return.
> +     */
> +    if ( !list_empty(&vmce->impact_header) )
> +        entry = list_entry(vmce->impact_header.next, struct bank_entry, list);
> +
>      switch ( msr & (MSR_IA32_MC0_CTL | 3) )
>      {
>      case MSR_IA32_MC0_CTL:
> @@ -216,32 +224,28 @@
>              vmce->mci_ctl[bank] = val;
>          break;
>      case MSR_IA32_MC0_STATUS:
> -        /* Give the first entry of the list, it corresponds to current
> -         * vMCE# injection. When vMCE# is finished processing by the
> -         * the guest, this node will be deleted.
> -         * Only error bank is written. Non-error banks simply return.
> -         */
> -        if ( !list_empty(&vmce->impact_header) )
> +        if ( entry && (entry->bank == bank) )
>          {
> -            entry = list_entry(vmce->impact_header.next,
> -                               struct bank_entry, list);
> -            if ( entry->bank == bank )
> -                entry->mci_status = val;
> +            entry->mci_status = val;
>              mce_printk(MCE_VERBOSE,
> -                       "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n",
> -                       bank, val);
> +                       "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", bank, val);
>          }
> -        else
> -            mce_printk(MCE_VERBOSE,
> -                       "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, val);

Why do you delete this printk()? It'll be impossible to track down
ignored guest writes, should those cause a problem in the guest.

>          break;
>      case MSR_IA32_MC0_ADDR:
> -        mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n", bank);
> -        ret = -1;
> +        if ( entry && (entry->bank == bank) )
> +        {
> +            entry->mci_addr = val;
> +            mce_printk(MCE_VERBOSE,
> +                       "MCE: wr MC%u_ADDR %"PRIx64" in vMCE#\n", bank, val);
> +        }

The patch description talks only about clearing the register, yet you
silently accept all writes here. Would real hardware accept writes of
other than zero?

Further, just as above, ignore writes won't be possible to track down.

>          break;
>      case MSR_IA32_MC0_MISC:
> -        mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n", bank);
> -        ret = -1;
> +        if ( entry && (entry->bank == bank) )
> +        {
> +            entry->mci_misc = val;
> +            mce_printk(MCE_VERBOSE,
> +                       "MCE: wr MC%u_MISC %"PRIx64" in vMCE#\n", bank, val);
> +        }

Same here in both respects.

Jan

>          break;
>      default:
>          switch ( boot_cpu_data.x86_vendor )




_______________________________________________
Xen-devel mailing list
[hidden email]
http://lists.xen.org/xen-devel
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Fix vmce addr/misc wrmsr bug

Liu, Jinsong
Jan Beulich wrote:

>>>> On 06.05.12 at 14:13, "Liu, Jinsong" <[hidden email]>
>>>> wrote: Fix vmce addr/misc wrmsr bug
>>
>> This patch fix a bug related to wrmsr vmce bank addr/misc
>> registers, since they are not read-only.
>>
>> Intel SDM recommanded os mce driver clear bank status/addr/misc.
>> So guest mce driver may clear addr and misc registers.
>> Under such case, old vmce wrmsr logic would generate
>> a #GP fault at guest mce context, and make guest crash.
>>
>> Signed-off-by: Liu, Jinsong <[hidden email]>
>>
>> diff -r 755f440b3b78 xen/arch/x86/cpu/mcheck/vmce.c
>> --- a/xen/arch/x86/cpu/mcheck/vmce.c Wed Apr 18 18:33:36 2012 +0800
>> +++ b/xen/arch/x86/cpu/mcheck/vmce.c Sun May 06 03:05:20 2012 +0800
>>      @@ -209,6 +209,14 @@ struct domain_mca_msrs *vmce =
>>      dom_vmce(v->domain); struct bank_entry *entry = NULL;
>>
>> +    /* Give the first entry of the list, it corresponds to current
>> +     * vMCE# injection. When vMCE# is finished processing by the
>> +     * the guest, this node will be deleted.
>> +     * Only error bank is written. Non-error banks simply return. +
>> */ +    if ( !list_empty(&vmce->impact_header) )
>> +        entry = list_entry(vmce->impact_header.next, struct
>>      bank_entry, list); + switch ( msr & (MSR_IA32_MC0_CTL | 3) )
>>      {
>>      case MSR_IA32_MC0_CTL:
>> @@ -216,32 +224,28 @@
>>              vmce->mci_ctl[bank] = val;
>>          break;
>>      case MSR_IA32_MC0_STATUS:
>> -        /* Give the first entry of the list, it corresponds to
>> current
>> -         * vMCE# injection. When vMCE# is finished processing by the
>> -         * the guest, this node will be deleted.
>> -         * Only error bank is written. Non-error banks simply
>> return.
>> -         */
>> -        if ( !list_empty(&vmce->impact_header) )
>> +        if ( entry && (entry->bank == bank) )
>>          {
>> -            entry = list_entry(vmce->impact_header.next,
>> -                               struct bank_entry, list);
>> -            if ( entry->bank == bank )
>> -                entry->mci_status = val;
>> +            entry->mci_status = val;
>>              mce_printk(MCE_VERBOSE,
>> -                       "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n",
>> -                       bank, val);
>> +                       "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n",
>> bank, val);          }
>> -        else
>> -            mce_printk(MCE_VERBOSE,
>> -                       "MCE: wr MC%u_STATUS %"PRIx64"\n", bank,
>> val);
>
> Why do you delete this printk()? It'll be impossible to track down
> ignored guest writes, should those cause a problem in the guest.

OK, will add printk.

>
>>          break;
>>      case MSR_IA32_MC0_ADDR:
>> -        mce_printk(MCE_QUIET, "MCE: MC%u_ADDR is read-only\n",
>> bank);
>> -        ret = -1;
>> +        if ( entry && (entry->bank == bank) )
>> +        {
>> +            entry->mci_addr = val;
>> +            mce_printk(MCE_VERBOSE,
>> +                       "MCE: wr MC%u_ADDR %"PRIx64" in vMCE#\n",
>> bank, val); +        }
>
> The patch description talks only about clearing the register, yet you
> silently accept all writes here. Would real hardware accept writes of
> other than zero?

Yes, except write all 1's would cause GP. Will add code to handle writing all 1's case.

>
> Further, just as above, ignore writes won't be possible to track down.
>
>>          break;
>>      case MSR_IA32_MC0_MISC:
>> -        mce_printk(MCE_QUIET, "MCE: MC%u_MISC is read-only\n",
>> bank);
>> -        ret = -1;
>> +        if ( entry && (entry->bank == bank) )
>> +        {
>> +            entry->mci_misc = val;
>> +            mce_printk(MCE_VERBOSE,
>> +                       "MCE: wr MC%u_MISC %"PRIx64" in vMCE#\n",
>> bank, val); +        }
>
> Same here in both respects.

Same as above.

Thanks,
Jinsong

_______________________________________________
Xen-devel mailing list
[hidden email]
http://lists.xen.org/xen-devel