From 6bad09c708d906922fb59d7e2c06d5de9a633ca3 Mon Sep 17 00:00:00 2001 From: George Dunlap Date: Thu, 10 Oct 2019 17:57:49 +0100 Subject: [PATCH 03/11] x86/mm: Separate out partial_pte tristate into individual flags At the moment, partial_pte is a tri-state that contains two distinct bits of information: 1. If zero, the pte at index [nr_validated_ptes] is un-validated. If non-zero, the pte was last seen with PGT_partial set. 2. If positive, the pte at index [nr_validated_ptes] does not hold a general reference count. If negative, it does. To make future patches more clear, separate out this functionality into two distinct, named bits: PTF_partial_set (for #1) and PTF_partial_general_ref (for #2). Additionally, a number of functions which need this information also take other flags to control behavior (such as `preemptible` and `defer`). These are hard to read in the caller (since you only see 'true' or 'false'), and ugly when many are added together. In preparation for adding yet another flag in a future patch, collapse all of these into a single `flag` variable. NB that this does mean checking for what was previously the '-1' condition a bit more ugly in the put_page_from_lNe functions (since you have to check for both partial_set and general ref); but this clause will go away in a future patch. Also note that the original comment had an off-by-one error: partial_flags (like partial_pte before it) concerns plNe[nr_validated_ptes], not plNe[nr_validated_ptes+1]. No functional change intended. This is part of XSA-299. Reported-by: George Dunlap Signed-off-by: George Dunlap Reviewed-by: Jan Beulich --- xen/arch/x86/mm.c | 165 ++++++++++++++++++++++++--------------- xen/include/asm-x86/mm.h | 41 +++++++--- 2 files changed, 128 insertions(+), 78 deletions(-) diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 0cbca48a02..84ee48ec3f 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -651,20 +651,34 @@ static int alloc_segdesc_page(struct page_info *page) static int __get_page_type(struct page_info *page, unsigned long type, int preemptible); +/* + * The following flags are used to specify behavior of various get and + * put commands. The first two are also stored in page->partial_flags + * to indicate the state of the page pointed to by + * page->pte[page->nr_validated_entries]. See the comment in mm.h for + * more information. + */ +#define PTF_partial_set (1 << 0) +#define PTF_partial_general_ref (1 << 1) +#define PTF_preemptible (1 << 2) +#define PTF_defer (1 << 3) + static int get_page_and_type_from_mfn( mfn_t mfn, unsigned long type, struct domain *d, - int partial, int preemptible) + unsigned int flags) { struct page_info *page = mfn_to_page(mfn); int rc; + bool preemptible = flags & PTF_preemptible, + partial_ref = flags & PTF_partial_general_ref; - if ( likely(partial >= 0) && + if ( likely(!partial_ref) && unlikely(!get_page_from_mfn(mfn, d)) ) return -EINVAL; rc = __get_page_type(page, type, preemptible); - if ( unlikely(rc) && partial >= 0 && + if ( unlikely(rc) && !partial_ref && (!preemptible || page != current->arch.old_guest_table) ) put_page(page); @@ -1146,7 +1160,7 @@ get_page_from_l1e( define_get_linear_pagetable(l2); static int get_page_from_l2e( - l2_pgentry_t l2e, unsigned long pfn, struct domain *d, int partial) + l2_pgentry_t l2e, unsigned long pfn, struct domain *d, unsigned int flags) { unsigned long mfn = l2e_get_pfn(l2e); int rc; @@ -1163,8 +1177,9 @@ get_page_from_l2e( if ( !(l2e_get_flags(l2e) & _PAGE_PSE) ) { - rc = get_page_and_type_from_mfn(_mfn(mfn), PGT_l1_page_table, d, - partial, false); + ASSERT(!(flags & PTF_preemptible)); + + rc = get_page_and_type_from_mfn(_mfn(mfn), PGT_l1_page_table, d, flags); if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) ) rc = 0; return rc; @@ -1183,7 +1198,7 @@ get_page_from_l2e( define_get_linear_pagetable(l3); static int get_page_from_l3e( - l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial) + l3_pgentry_t l3e, unsigned long pfn, struct domain *d, unsigned int flags) { int rc; @@ -1198,7 +1213,7 @@ get_page_from_l3e( } rc = get_page_and_type_from_mfn( - l3e_get_mfn(l3e), PGT_l2_page_table, d, partial, 1); + l3e_get_mfn(l3e), PGT_l2_page_table, d, flags | PTF_preemptible); if ( unlikely(rc == -EINVAL) && !is_pv_32bit_domain(d) && get_l3_linear_pagetable(l3e, pfn, d) ) @@ -1216,7 +1231,7 @@ get_page_from_l3e( define_get_linear_pagetable(l4); static int get_page_from_l4e( - l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial) + l4_pgentry_t l4e, unsigned long pfn, struct domain *d, unsigned int flags) { int rc; @@ -1231,7 +1246,7 @@ get_page_from_l4e( } rc = get_page_and_type_from_mfn( - l4e_get_mfn(l4e), PGT_l3_page_table, d, partial, 1); + l4e_get_mfn(l4e), PGT_l3_page_table, d, flags | PTF_preemptible); if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) ) rc = 0; @@ -1306,7 +1321,7 @@ void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner) * Note also that this automatically deals correctly with linear p.t.'s. */ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn, - int partial, bool defer) + unsigned int flags) { int rc = 0; @@ -1326,12 +1341,13 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn, struct page_info *pg = l2e_get_page(l2e); struct page_info *ptpg = mfn_to_page(_mfn(pfn)); - if ( unlikely(partial > 0) ) + if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) == + PTF_partial_set ) { - ASSERT(!defer); + ASSERT(!(flags & PTF_defer)); rc = _put_page_type(pg, true, ptpg); } - else if ( defer ) + else if ( flags & PTF_defer ) { current->arch.old_guest_ptpg = ptpg; current->arch.old_guest_table = pg; @@ -1348,7 +1364,7 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn, } static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, - int partial, bool defer) + unsigned int flags) { struct page_info *pg; int rc; @@ -1371,13 +1387,14 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, pg = l3e_get_page(l3e); - if ( unlikely(partial > 0) ) + if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) == + PTF_partial_set ) { - ASSERT(!defer); + ASSERT(!(flags & PTF_defer)); return _put_page_type(pg, true, mfn_to_page(_mfn(pfn))); } - if ( defer ) + if ( flags & PTF_defer ) { current->arch.old_guest_ptpg = mfn_to_page(_mfn(pfn)); current->arch.old_guest_table = pg; @@ -1392,7 +1409,7 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, } static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, - int partial, bool defer) + unsigned int flags) { int rc = 1; @@ -1401,13 +1418,14 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, { struct page_info *pg = l4e_get_page(l4e); - if ( unlikely(partial > 0) ) + if ( (flags & (PTF_partial_set | PTF_partial_general_ref)) == + PTF_partial_set ) { - ASSERT(!defer); + ASSERT(!(flags & PTF_defer)); return _put_page_type(pg, true, mfn_to_page(_mfn(pfn))); } - if ( defer ) + if ( flags & PTF_defer ) { current->arch.old_guest_ptpg = mfn_to_page(_mfn(pfn)); current->arch.old_guest_table = pg; @@ -1514,12 +1532,13 @@ static int alloc_l2_table(struct page_info *page, unsigned long type) unsigned long pfn = mfn_x(page_to_mfn(page)); l2_pgentry_t *pl2e; unsigned int i; - int rc = 0, partial = page->partial_pte; + int rc = 0; + unsigned int partial_flags = page->partial_flags; pl2e = map_domain_page(_mfn(pfn)); for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; - i++, partial = 0 ) + i++, partial_flags = 0 ) { if ( i > page->nr_validated_ptes && hypercall_preempt_check() ) { @@ -1529,18 +1548,19 @@ static int alloc_l2_table(struct page_info *page, unsigned long type) } if ( !is_guest_l2_slot(d, type, i) || - (rc = get_page_from_l2e(pl2e[i], pfn, d, partial)) > 0 ) + (rc = get_page_from_l2e(pl2e[i], pfn, d, partial_flags)) > 0 ) continue; if ( rc == -ERESTART ) { page->nr_validated_ptes = i; - page->partial_pte = partial ?: 1; + /* Set 'set', retain 'general ref' */ + page->partial_flags = partial_flags | PTF_partial_set; } else if ( rc == -EINTR && i ) { page->nr_validated_ptes = i; - page->partial_pte = 0; + page->partial_flags = 0; rc = -ERESTART; } else if ( rc < 0 && rc != -EINTR ) @@ -1549,7 +1569,7 @@ static int alloc_l2_table(struct page_info *page, unsigned long type) if ( i ) { page->nr_validated_ptes = i; - page->partial_pte = 0; + page->partial_flags = 0; current->arch.old_guest_ptpg = NULL; current->arch.old_guest_table = page; } @@ -1573,7 +1593,8 @@ static int alloc_l3_table(struct page_info *page) unsigned long pfn = mfn_x(page_to_mfn(page)); l3_pgentry_t *pl3e; unsigned int i; - int rc = 0, partial = page->partial_pte; + int rc = 0; + unsigned int partial_flags = page->partial_flags; pl3e = map_domain_page(_mfn(pfn)); @@ -1588,7 +1609,7 @@ static int alloc_l3_table(struct page_info *page) memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e)); for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; - i++, partial = 0 ) + i++, partial_flags = 0 ) { if ( i > page->nr_validated_ptes && hypercall_preempt_check() ) { @@ -1605,20 +1626,22 @@ static int alloc_l3_table(struct page_info *page) else rc = get_page_and_type_from_mfn( l3e_get_mfn(pl3e[i]), - PGT_l2_page_table | PGT_pae_xen_l2, d, partial, 1); + PGT_l2_page_table | PGT_pae_xen_l2, d, + partial_flags | PTF_preemptible); } - else if ( (rc = get_page_from_l3e(pl3e[i], pfn, d, partial)) > 0 ) + else if ( (rc = get_page_from_l3e(pl3e[i], pfn, d, partial_flags)) > 0 ) continue; if ( rc == -ERESTART ) { page->nr_validated_ptes = i; - page->partial_pte = partial ?: 1; + /* Set 'set', leave 'general ref' set if this entry was set */ + page->partial_flags = partial_flags | PTF_partial_set; } else if ( rc == -EINTR && i ) { page->nr_validated_ptes = i; - page->partial_pte = 0; + page->partial_flags = 0; rc = -ERESTART; } if ( rc < 0 ) @@ -1635,7 +1658,7 @@ static int alloc_l3_table(struct page_info *page) if ( i ) { page->nr_validated_ptes = i; - page->partial_pte = 0; + page->partial_flags = 0; current->arch.old_guest_ptpg = NULL; current->arch.old_guest_table = page; } @@ -1767,19 +1790,21 @@ static int alloc_l4_table(struct page_info *page) unsigned long pfn = mfn_x(page_to_mfn(page)); l4_pgentry_t *pl4e = map_domain_page(_mfn(pfn)); unsigned int i; - int rc = 0, partial = page->partial_pte; + int rc = 0; + unsigned int partial_flags = page->partial_flags; for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; - i++, partial = 0 ) + i++, partial_flags = 0 ) { if ( !is_guest_l4_slot(d, i) || - (rc = get_page_from_l4e(pl4e[i], pfn, d, partial)) > 0 ) + (rc = get_page_from_l4e(pl4e[i], pfn, d, partial_flags)) > 0 ) continue; if ( rc == -ERESTART ) { page->nr_validated_ptes = i; - page->partial_pte = partial ?: 1; + /* Set 'set', leave 'general ref' set if this entry was set */ + page->partial_flags = partial_flags | PTF_partial_set; } else if ( rc < 0 ) { @@ -1789,7 +1814,7 @@ static int alloc_l4_table(struct page_info *page) if ( i ) { page->nr_validated_ptes = i; - page->partial_pte = 0; + page->partial_flags = 0; if ( rc == -EINTR ) rc = -ERESTART; else @@ -1842,19 +1867,20 @@ static int free_l2_table(struct page_info *page) struct domain *d = page_get_owner(page); unsigned long pfn = mfn_x(page_to_mfn(page)); l2_pgentry_t *pl2e; - int rc = 0, partial = page->partial_pte; - unsigned int i = page->nr_validated_ptes - !partial; + int rc = 0; + unsigned int partial_flags = page->partial_flags, + i = page->nr_validated_ptes - !(partial_flags & PTF_partial_set); pl2e = map_domain_page(_mfn(pfn)); for ( ; ; ) { if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) ) - rc = put_page_from_l2e(pl2e[i], pfn, partial, false); + rc = put_page_from_l2e(pl2e[i], pfn, partial_flags); if ( rc < 0 ) break; - partial = 0; + partial_flags = 0; if ( !i-- ) break; @@ -1876,12 +1902,14 @@ static int free_l2_table(struct page_info *page) else if ( rc == -ERESTART ) { page->nr_validated_ptes = i; - page->partial_pte = partial ?: -1; + page->partial_flags = (partial_flags & PTF_partial_set) ? + partial_flags : + (PTF_partial_set | PTF_partial_general_ref); } else if ( rc == -EINTR && i < L2_PAGETABLE_ENTRIES - 1 ) { page->nr_validated_ptes = i + 1; - page->partial_pte = 0; + page->partial_flags = 0; rc = -ERESTART; } @@ -1893,18 +1921,19 @@ static int free_l3_table(struct page_info *page) struct domain *d = page_get_owner(page); unsigned long pfn = mfn_x(page_to_mfn(page)); l3_pgentry_t *pl3e; - int rc = 0, partial = page->partial_pte; - unsigned int i = page->nr_validated_ptes - !partial; + int rc = 0; + unsigned int partial_flags = page->partial_flags, + i = page->nr_validated_ptes - !(partial_flags & PTF_partial_set); pl3e = map_domain_page(_mfn(pfn)); for ( ; ; ) { - rc = put_page_from_l3e(pl3e[i], pfn, partial, 0); + rc = put_page_from_l3e(pl3e[i], pfn, partial_flags); if ( rc < 0 ) break; - partial = 0; + partial_flags = 0; if ( rc == 0 ) pl3e[i] = unadjust_guest_l3e(pl3e[i], d); @@ -1923,12 +1952,14 @@ static int free_l3_table(struct page_info *page) if ( rc == -ERESTART ) { page->nr_validated_ptes = i; - page->partial_pte = partial ?: -1; + page->partial_flags = (partial_flags & PTF_partial_set) ? + partial_flags : + (PTF_partial_set | PTF_partial_general_ref); } else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 ) { page->nr_validated_ptes = i + 1; - page->partial_pte = 0; + page->partial_flags = 0; rc = -ERESTART; } return rc > 0 ? 0 : rc; @@ -1939,26 +1970,29 @@ static int free_l4_table(struct page_info *page) struct domain *d = page_get_owner(page); unsigned long pfn = mfn_x(page_to_mfn(page)); l4_pgentry_t *pl4e = map_domain_page(_mfn(pfn)); - int rc = 0, partial = page->partial_pte; - unsigned int i = page->nr_validated_ptes - !partial; + int rc = 0; + unsigned partial_flags = page->partial_flags, + i = page->nr_validated_ptes - !(partial_flags & PTF_partial_set); do { if ( is_guest_l4_slot(d, i) ) - rc = put_page_from_l4e(pl4e[i], pfn, partial, 0); + rc = put_page_from_l4e(pl4e[i], pfn, partial_flags); if ( rc < 0 ) break; - partial = 0; + partial_flags = 0; } while ( i-- ); if ( rc == -ERESTART ) { page->nr_validated_ptes = i; - page->partial_pte = partial ?: -1; + page->partial_flags = (partial_flags & PTF_partial_set) ? + partial_flags : + (PTF_partial_set | PTF_partial_general_ref); } else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 ) { page->nr_validated_ptes = i + 1; - page->partial_pte = 0; + page->partial_flags = 0; rc = -ERESTART; } @@ -2180,7 +2214,7 @@ static int mod_l2_entry(l2_pgentry_t *pl2e, return -EBUSY; } - put_page_from_l2e(ol2e, pfn, 0, true); + put_page_from_l2e(ol2e, pfn, PTF_defer); return rc; } @@ -2248,7 +2282,7 @@ static int mod_l3_entry(l3_pgentry_t *pl3e, if ( !create_pae_xen_mappings(d, pl3e) ) BUG(); - put_page_from_l3e(ol3e, pfn, 0, 1); + put_page_from_l3e(ol3e, pfn, PTF_defer); return rc; } @@ -2311,7 +2345,7 @@ static int mod_l4_entry(l4_pgentry_t *pl4e, return -EFAULT; } - put_page_from_l4e(ol4e, pfn, 0, 1); + put_page_from_l4e(ol4e, pfn, PTF_defer); return rc; } @@ -2577,7 +2611,7 @@ int free_page_type(struct page_info *page, unsigned long type, if ( !(type & PGT_partial) ) { page->nr_validated_ptes = 1U << PAGETABLE_ORDER; - page->partial_pte = 0; + page->partial_flags = 0; } switch ( type & PGT_type_mask ) @@ -2862,7 +2896,7 @@ static int __get_page_type(struct page_info *page, unsigned long type, if ( !(x & PGT_partial) ) { page->nr_validated_ptes = 0; - page->partial_pte = 0; + page->partial_flags = 0; } page->linear_pt_count = 0; rc = alloc_page_type(page, type, preemptible); @@ -3037,7 +3071,8 @@ int new_guest_cr3(mfn_t mfn) rc = paging_mode_refcounts(d) ? (get_page_from_mfn(mfn, d) ? 0 : -EINVAL) - : get_page_and_type_from_mfn(mfn, PGT_root_page_table, d, 0, 1); + : get_page_and_type_from_mfn(mfn, PGT_root_page_table, d, + PTF_preemptible); switch ( rc ) { case 0: @@ -3420,7 +3455,7 @@ long do_mmuext_op( if ( op.arg1.mfn != 0 ) { rc = get_page_and_type_from_mfn( - _mfn(op.arg1.mfn), PGT_root_page_table, currd, 0, 1); + _mfn(op.arg1.mfn), PGT_root_page_table, currd, PTF_preemptible); if ( unlikely(rc) ) { diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 1030b8b5e6..a531fe3115 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -157,19 +157,34 @@ struct page_info * setting the flag must not drop that reference, whereas the instance * clearing it will have to. * - * If @partial_pte is positive then PTE at @nr_validated_ptes+1 has - * been partially validated. This implies that the general reference - * to the page (acquired from get_page_from_lNe()) would be dropped - * (again due to the apparent failure) and hence must be re-acquired - * when resuming the validation, but must not be dropped when picking - * up the page for invalidation. + * If partial_flags & PTF_partial_set is set, then the page at + * at @nr_validated_ptes had PGT_partial set as a result of an + * operation on the current page. (That page may or may not + * still have PGT_partial set.) * - * If @partial_pte is negative then PTE at @nr_validated_ptes+1 has - * been partially invalidated. This is basically the opposite case of - * above, i.e. the general reference to the page was not dropped in - * put_page_from_lNe() (due to the apparent failure), and hence it - * must be dropped when the put operation is resumed (and completes), - * but it must not be acquired if picking up the page for validation. + * If PTF_partial_general_ref is set, then the PTE at + * @nr_validated_ptef holds a general reference count for the + * page. + * + * This happens: + * - During de-validation, if de-validation of the page was + * interrupted + * - During validation, if an invalid entry is encountered and + * validation is preemptible + * - During validation, if PTF_partial_general_ref was set on + * this entry to begin with (perhaps because we're picking + * up from a partial de-validation). + * + * When resuming validation, if PTF_partial_general_ref is clear, + * then a general reference must be re-acquired; if it is set, no + * reference should be acquired. + * + * When resuming de-validation, if PTF_partial_general_ref is + * clear, no reference should be dropped; if it is set, a + * reference should be dropped. + * + * NB that PTF_partial_set and PTF_partial_general_ref are + * defined in mm.c, the only place where they are used. * * The 3rd field, @linear_pt_count, indicates * - by a positive value, how many same-level page table entries a page @@ -180,7 +195,7 @@ struct page_info struct { u16 nr_validated_ptes:PAGETABLE_ORDER + 1; u16 :16 - PAGETABLE_ORDER - 1 - 2; - s16 partial_pte:2; + u16 partial_flags:2; s16 linear_pt_count; }; -- 2.23.0