aboutsummaryrefslogtreecommitdiffstats
path: root/main/xen/xsa297-4.11-7.patch
blob: 940191d8e20f2d92e92d9c572cb815e29f6b00aa (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: x86/spec-ctrl: Introduce options to control VERW flushing

The Microarchitectural Data Sampling vulnerability is split into categories
with subtly different properties:

 MLPDS - Microarchitectural Load Port Data Sampling
 MSBDS - Microarchitectural Store Buffer Data Sampling
 MFBDS - Microarchitectural Fill Buffer Data Sampling
 MDSUM - Microarchitectural Data Sampling Uncacheable Memory

MDSUM is a special case of the other three, and isn't distinguished further.

These issues pertain to three microarchitectural buffers.  The Load Ports, the
Store Buffers and the Fill Buffers.  Each of these structures are flushed by
the new enhanced VERW functionality, but the conditions under which flushing
is necessary vary.

For this concise overview of the issues and default logic, the abbreviations
SP (Store Port), FB (Fill Buffer), LP (Load Port) and HT (Hyperthreading) are
used for brevity:

 * Vulnerable hardware is divided into two categories - parts which suffer
   from SP only, and parts with any other combination of vulnerabilities.

 * SP only has an HT interaction when the thread goes idle, due to the static
   partitioning of resources.  LP and FB have HT interactions at all points,
   due to the competitive sharing of resources.  All issues potentially leak
   data across the return-to-guest transition.

 * The microcode which implements VERW flushing also extends MSR_FLUSH_CMD, so
   we don't need to do both on the HVM return-to-guest path.  However, some
   parts are not vulnerable to L1TF (therefore have no MSR_FLUSH_CMD), but are
   vulnerable to MDS, so do require VERW on the HVM path.

Note that we deliberately support mds=1 even without MD_CLEAR in case the
microcode has been updated but the feature bit not exposed.

This is part of XSA-297, CVE-2018-12126, CVE-2018-12127, CVE-2018-12130, CVE-2019-11091.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>

diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
index 8260dfb..8108bbf 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1800,7 +1800,7 @@ is being interpreted as a custom timeout in milliseconds. Zero or boolean
 false disable the quirk workaround, which is also the default.
 
 ### spec-ctrl (x86)
-> `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb}=<bool>,
+> `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb,md-clear}=<bool>,
 >              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,eager-fpu,
 >              l1d-flush}=<bool> ]`
 
@@ -1824,9 +1824,10 @@ in place for guests to use.
 
 Use of a positive boolean value for either of these options is invalid.
 
-The booleans `pv=`, `hvm=`, `msr-sc=` and `rsb=` offer fine grained control
-over the alternative blocks used by Xen.  These impact Xen's ability to
-protect itself, and Xen's ability to virtualise support for guests to use.
+The booleans `pv=`, `hvm=`, `msr-sc=`, `rsb=` and `md-clear=` offer fine
+grained control over the alternative blocks used by Xen.  These impact Xen's
+ability to protect itself, and Xen's ability to virtualise support for guests
+to use.
 
 * `pv=` and `hvm=` offer control over all suboptions for PV and HVM guests
   respectively.
@@ -1835,6 +1836,11 @@ protect itself, and Xen's ability to virtualise support for guests to use.
   guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc.
 * `rsb=` offers control over whether to overwrite the Return Stack Buffer /
   Return Address Stack on entry to Xen.
+* `md-clear=` offers control over whether to use VERW to flush
+  microarchitectural buffers on idle and exit from Xen.  *Note: For
+  compatibility with development versions of this fix, `mds=` is also accepted
+  on Xen 4.12 and earlier as an alias.  Consult vendor documentation in
+  preference to here.*
 
 If Xen was compiled with INDIRECT\_THUNK support, `bti-thunk=` can be used to
 select which of the thunks gets patched into the `__x86_indirect_thunk_%reg`
diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
index fdd90a8..10fcd77 100644
--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -34,6 +34,8 @@ static bool __initdata opt_msr_sc_pv = true;
 static bool __initdata opt_msr_sc_hvm = true;
 static bool __initdata opt_rsb_pv = true;
 static bool __initdata opt_rsb_hvm = true;
+static int8_t __initdata opt_md_clear_pv = -1;
+static int8_t __initdata opt_md_clear_hvm = -1;
 
 /* Cmdline controls for Xen's speculative settings. */
 static enum ind_thunk {
@@ -58,6 +60,9 @@ paddr_t __read_mostly l1tf_addr_mask, __read_mostly l1tf_safe_maddr;
 static bool __initdata cpu_has_bug_l1tf;
 static unsigned int __initdata l1d_maxphysaddr;
 
+static bool __initdata cpu_has_bug_msbds_only; /* => minimal HT impact. */
+static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination. */
+
 static int __init parse_bti(const char *s)
 {
     const char *ss;
@@ -150,6 +155,8 @@ static int __init parse_spec_ctrl(const char *s)
         disable_common:
             opt_rsb_pv = false;
             opt_rsb_hvm = false;
+            opt_md_clear_pv = 0;
+            opt_md_clear_hvm = 0;
 
             opt_thunk = THUNK_JMP;
             opt_ibrs = 0;
@@ -172,11 +179,13 @@ static int __init parse_spec_ctrl(const char *s)
         {
             opt_msr_sc_pv = val;
             opt_rsb_pv = val;
+            opt_md_clear_pv = val;
         }
         else if ( (val = parse_boolean("hvm", s, ss)) >= 0 )
         {
             opt_msr_sc_hvm = val;
             opt_rsb_hvm = val;
+            opt_md_clear_hvm = val;
         }
         else if ( (val = parse_boolean("msr-sc", s, ss)) >= 0 )
         {
@@ -188,6 +197,12 @@ static int __init parse_spec_ctrl(const char *s)
             opt_rsb_pv = val;
             opt_rsb_hvm = val;
         }
+        else if ( (val = parse_boolean("md-clear", s, ss)) >= 0 ||
+                  (val = parse_boolean("mds", s, ss)) >= 0 )
+        {
+            opt_md_clear_pv = val;
+            opt_md_clear_hvm = val;
+        }
 
         /* Xen's speculative sidechannel mitigation settings. */
         else if ( !strncmp(s, "bti-thunk=", 10) )
@@ -373,7 +388,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
                "\n");
 
     /* Settings for Xen's protection, irrespective of guests. */
-    printk("  Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s, Other:%s%s\n",
+    printk("  Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s, Other:%s%s%s\n",
            thunk == THUNK_NONE      ? "N/A" :
            thunk == THUNK_RETPOLINE ? "RETPOLINE" :
            thunk == THUNK_LFENCE    ? "LFENCE" :
@@ -383,7 +398,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
            !boot_cpu_has(X86_FEATURE_SSBD)           ? "" :
            (default_xen_spec_ctrl & SPEC_CTRL_SSBD)  ? " SSBD+" : " SSBD-",
            opt_ibpb                                  ? " IBPB"  : "",
-           opt_l1d_flush                             ? " L1D_FLUSH" : "");
+           opt_l1d_flush                             ? " L1D_FLUSH" : "",
+           opt_md_clear_pv || opt_md_clear_hvm       ? " VERW"  : "");
 
     /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */
     if ( cpu_has_bug_l1tf || opt_pv_l1tf_hwdom || opt_pv_l1tf_domu )
@@ -786,6 +802,107 @@ static __init void l1tf_calculations(uint64_t caps)
                                             : (3ul << (paddr_bits - 2))));
 }
 
+/* Calculate whether this CPU is vulnerable to MDS. */
+static __init void mds_calculations(uint64_t caps)
+{
+    /* MDS is only known to affect Intel Family 6 processors at this time. */
+    if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+         boot_cpu_data.x86 != 6 )
+        return;
+
+    /* Any processor advertising MDS_NO should be not vulnerable to MDS. */
+    if ( caps & ARCH_CAPS_MDS_NO )
+        return;
+
+    switch ( boot_cpu_data.x86_model )
+    {
+        /*
+         * Core processors since at least Nehalem are vulnerable.
+         */
+    case 0x1f: /* Auburndale / Havendale */
+    case 0x1e: /* Nehalem */
+    case 0x1a: /* Nehalem EP */
+    case 0x2e: /* Nehalem EX */
+    case 0x25: /* Westmere */
+    case 0x2c: /* Westmere EP */
+    case 0x2f: /* Westmere EX */
+    case 0x2a: /* SandyBridge */
+    case 0x2d: /* SandyBridge EP/EX */
+    case 0x3a: /* IvyBridge */
+    case 0x3e: /* IvyBridge EP/EX */
+    case 0x3c: /* Haswell */
+    case 0x3f: /* Haswell EX/EP */
+    case 0x45: /* Haswell D */
+    case 0x46: /* Haswell H */
+    case 0x3d: /* Broadwell */
+    case 0x47: /* Broadwell H */
+    case 0x4f: /* Broadwell EP/EX */
+    case 0x56: /* Broadwell D */
+    case 0x4e: /* Skylake M */
+    case 0x5e: /* Skylake D */
+        cpu_has_bug_mds = true;
+        break;
+
+        /*
+         * Some Core processors have per-stepping vulnerability.
+         */
+    case 0x55: /* Skylake-X / Cascade Lake */
+        if ( boot_cpu_data.x86_mask <= 5 )
+            cpu_has_bug_mds = true;
+        break;
+
+    case 0x8e: /* Kaby / Coffee / Whiskey Lake M */
+        if ( boot_cpu_data.x86_mask <= 0xb )
+            cpu_has_bug_mds = true;
+        break;
+
+    case 0x9e: /* Kaby / Coffee / Whiskey Lake D */
+        if ( boot_cpu_data.x86_mask <= 0xc )
+            cpu_has_bug_mds = true;
+        break;
+
+        /*
+         * Very old and very new Atom processors are not vulnerable.
+         */
+    case 0x1c: /* Pineview */
+    case 0x26: /* Lincroft */
+    case 0x27: /* Penwell */
+    case 0x35: /* Cloverview */
+    case 0x36: /* Cedarview */
+    case 0x7a: /* Goldmont */
+        break;
+
+        /*
+         * Middling Atom processors are vulnerable to just the Store Buffer
+         * aspect.
+         */
+    case 0x37: /* Baytrail / Valleyview (Silvermont) */
+    case 0x4a: /* Merrifield */
+    case 0x4c: /* Cherrytrail / Brasswell */
+    case 0x4d: /* Avaton / Rangely (Silvermont) */
+    case 0x5a: /* Moorefield */
+    case 0x5d:
+    case 0x65:
+    case 0x6e:
+    case 0x75:
+        /*
+         * Knights processors (which are based on the Silvermont/Airmont
+         * microarchitecture) are similarly only affected by the Store Buffer
+         * aspect.
+         */
+    case 0x57: /* Knights Landing */
+    case 0x85: /* Knights Mill */
+        cpu_has_bug_msbds_only = true;
+        break;
+
+    default:
+        printk("Unrecognised CPU model %#x - assuming vulnerable to MDS\n",
+               boot_cpu_data.x86_model);
+        cpu_has_bug_mds = true;
+        break;
+    }
+}
+
 void __init init_speculation_mitigations(void)
 {
     enum ind_thunk thunk = THUNK_DEFAULT;
@@ -978,6 +1095,47 @@ void __init init_speculation_mitigations(void)
             "enabled.  Please assess your configuration and choose an\n"
             "explicit 'smt=<bool>' setting.  See XSA-273.\n");
 
+    mds_calculations(caps);
+
+    /*
+     * By default, enable PV and HVM mitigations on MDS-vulnerable hardware.
+     * This will only be a token effort for MLPDS/MFBDS when HT is enabled,
+     * but it is somewhat better than nothing.
+     */
+    if ( opt_md_clear_pv == -1 )
+        opt_md_clear_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
+                           boot_cpu_has(X86_FEATURE_MD_CLEAR));
+    if ( opt_md_clear_hvm == -1 )
+        opt_md_clear_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
+                            boot_cpu_has(X86_FEATURE_MD_CLEAR));
+
+    /*
+     * Enable MDS defences as applicable.  The PV blocks need using all the
+     * time, and the Idle blocks need using if either PV or HVM defences are
+     * used.
+     *
+     * HVM is more complicated.  The MD_CLEAR microcode extends L1D_FLUSH with
+     * equivelent semantics to avoid needing to perform both flushes on the
+     * HVM path.  The HVM blocks don't need activating if our hypervisor told
+     * us it was handling L1D_FLUSH, or we are using L1D_FLUSH ourselves.
+     */
+    if ( opt_md_clear_pv )
+        setup_force_cpu_cap(X86_FEATURE_SC_VERW_PV);
+    if ( opt_md_clear_pv || opt_md_clear_hvm )
+        setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE);
+    if ( opt_md_clear_hvm && !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush )
+        setup_force_cpu_cap(X86_FEATURE_SC_VERW_HVM);
+
+    /*
+     * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT
+     * active and no explicit SMT choice.
+     */
+    if ( opt_smt == -1 && cpu_has_bug_mds && hw_smt_enabled )
+        warning_add(
+            "Booted on MLPDS/MFBDS-vulnerable hardware with SMT/Hyperthreading\n"
+            "enabled.  Mitigations will not be fully effective.  Please\n"
+            "choose an explicit smt=<bool> setting.  See XSA-297.\n");
+
     print_details(thunk, caps);
 
     /*