[-next,v3] mm: hwposion: support recovery from ksm_might_need_to_copy()

Message ID 20221213030557.143432-1-wangkefeng.wang@huawei.com
State New
Headers
Series [-next,v3] mm: hwposion: support recovery from ksm_might_need_to_copy() |

Commit Message

Kefeng Wang Dec. 13, 2022, 3:05 a.m. UTC
  When the kernel copy a page from ksm_might_need_to_copy(), but runs
into an uncorrectable error, it will crash since poisoned page is
consumed by kernel, this is similar to Copy-on-write poison recovery,
When an error is detected during the page copy, return VM_FAULT_HWPOISON
in do_swap_page(), and install a hwpoison entry in unuse_pte() when
swapoff, which help us to avoid system crash. Note, memory failure on
a KSM page will be skipped, but still call memory_failure_queue() to
be consistent with general memory failure process.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 mm/ksm.c      |  8 ++++++--
 mm/memory.c   |  3 +++
 mm/swapfile.c | 19 +++++++++++++------
 3 files changed, 22 insertions(+), 8 deletions(-)
  

Comments

kernel test robot Dec. 13, 2022, 11:33 a.m. UTC | #1
Hi Kefeng,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on next-20221208]
[cannot apply to akpm-mm/mm-everything linus/master v6.1 v6.1-rc8 v6.1-rc7 v6.1]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Kefeng-Wang/mm-hwposion-support-recovery-from-ksm_might_need_to_copy/20221213-105100
patch link:    https://lore.kernel.org/r/20221213030557.143432-1-wangkefeng.wang%40huawei.com
patch subject: [PATCH -next v3] mm: hwposion: support recovery from ksm_might_need_to_copy()
config: i386-randconfig-a002
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/0015a0b43762219cbe6edac8bb9e7e385978152b
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Kefeng-Wang/mm-hwposion-support-recovery-from-ksm_might_need_to_copy/20221213-105100
        git checkout 0015a0b43762219cbe6edac8bb9e7e385978152b
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> mm/swapfile.c:1777:6: warning: variable 'new_pte' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
           if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) {
               ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/compiler.h:78:22: note: expanded from macro 'unlikely'
   # define unlikely(x)    __builtin_expect(!!(x), 0)
                           ^~~~~~~~~~~~~~~~~~~~~~~~~~
   mm/swapfile.c:1827:36: note: uninitialized use occurs here
           set_pte_at(vma->vm_mm, addr, pte, new_pte);
                                             ^~~~~~~
   mm/swapfile.c:1777:2: note: remove the 'if' if its condition is always false
           if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) {
           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   mm/swapfile.c:1765:2: note: variable 'new_pte' is declared here
           pte_t *pte, new_pte;
           ^
   1 warning generated.


vim +1777 mm/swapfile.c

179ef71cbc0852 Cyrill Gorcunov         2013-08-13  1753  
^1da177e4c3f41 Linus Torvalds          2005-04-16  1754  /*
72866f6f277ec0 Hugh Dickins            2005-10-29  1755   * No need to decide whether this PTE shares the swap entry with others,
72866f6f277ec0 Hugh Dickins            2005-10-29  1756   * just let do_wp_page work it out if a write is requested later - to
72866f6f277ec0 Hugh Dickins            2005-10-29  1757   * force COW, vm_page_prot omits write permission from any private vma.
^1da177e4c3f41 Linus Torvalds          2005-04-16  1758   */
044d66c1d2b1c5 Hugh Dickins            2008-02-07  1759  static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
f102cd8b173e06 Matthew Wilcox (Oracle  2022-09-02  1760) 		unsigned long addr, swp_entry_t entry, struct folio *folio)
^1da177e4c3f41 Linus Torvalds          2005-04-16  1761  {
f102cd8b173e06 Matthew Wilcox (Oracle  2022-09-02  1762) 	struct page *page = folio_file_page(folio, swp_offset(entry));
9e16b7fb1d066d Hugh Dickins            2013-02-22  1763  	struct page *swapcache;
044d66c1d2b1c5 Hugh Dickins            2008-02-07  1764  	spinlock_t *ptl;
14a762dd1977cf Miaohe Lin              2022-05-19  1765  	pte_t *pte, new_pte;
0015a0b4376221 Kefeng Wang             2022-12-13  1766  	bool hwposioned = false;
044d66c1d2b1c5 Hugh Dickins            2008-02-07  1767  	int ret = 1;
044d66c1d2b1c5 Hugh Dickins            2008-02-07  1768  
9e16b7fb1d066d Hugh Dickins            2013-02-22  1769  	swapcache = page;
9e16b7fb1d066d Hugh Dickins            2013-02-22  1770  	page = ksm_might_need_to_copy(page, vma, addr);
9e16b7fb1d066d Hugh Dickins            2013-02-22  1771  	if (unlikely(!page))
9e16b7fb1d066d Hugh Dickins            2013-02-22  1772  		return -ENOMEM;
0015a0b4376221 Kefeng Wang             2022-12-13  1773  	else if (unlikely(PTR_ERR(page) == -EHWPOISON))
0015a0b4376221 Kefeng Wang             2022-12-13  1774  		hwposioned = true;
9e16b7fb1d066d Hugh Dickins            2013-02-22  1775  
044d66c1d2b1c5 Hugh Dickins            2008-02-07  1776  	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
9f8bdb3f3dad3f Hugh Dickins            2016-01-15 @1777  	if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) {
044d66c1d2b1c5 Hugh Dickins            2008-02-07  1778  		ret = 0;
044d66c1d2b1c5 Hugh Dickins            2008-02-07  1779  		goto out;
044d66c1d2b1c5 Hugh Dickins            2008-02-07  1780  	}
8a9f3ccd24741b Balbir Singh            2008-02-07  1781  
0015a0b4376221 Kefeng Wang             2022-12-13  1782  	if (hwposioned || !PageUptodate(page)) {
0015a0b4376221 Kefeng Wang             2022-12-13  1783  		swp_entry_t swp_entry;
9f186f9e5fa9eb Miaohe Lin              2022-05-19  1784  
9f186f9e5fa9eb Miaohe Lin              2022-05-19  1785  		dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
0015a0b4376221 Kefeng Wang             2022-12-13  1786  		if (hwposioned) {
0015a0b4376221 Kefeng Wang             2022-12-13  1787  			swp_entry = make_hwpoison_entry(swapcache);
0015a0b4376221 Kefeng Wang             2022-12-13  1788  			page = swapcache;
0015a0b4376221 Kefeng Wang             2022-12-13  1789  		} else {
0015a0b4376221 Kefeng Wang             2022-12-13  1790  			swp_entry = make_swapin_error_entry();
0015a0b4376221 Kefeng Wang             2022-12-13  1791  		}
0015a0b4376221 Kefeng Wang             2022-12-13  1792  		new_pte = swp_entry_to_pte(swp_entry);
9f186f9e5fa9eb Miaohe Lin              2022-05-19  1793  		ret = 0;
9f186f9e5fa9eb Miaohe Lin              2022-05-19  1794  		goto out;
9f186f9e5fa9eb Miaohe Lin              2022-05-19  1795  	}
9f186f9e5fa9eb Miaohe Lin              2022-05-19  1796  
78fbe906cc900b David Hildenbrand       2022-05-09  1797  	/* See do_swap_page() */
78fbe906cc900b David Hildenbrand       2022-05-09  1798  	BUG_ON(!PageAnon(page) && PageMappedToDisk(page));
78fbe906cc900b David Hildenbrand       2022-05-09  1799  	BUG_ON(PageAnon(page) && PageAnonExclusive(page));
78fbe906cc900b David Hildenbrand       2022-05-09  1800  
b084d4353ff99d KAMEZAWA Hiroyuki       2010-03-05  1801  	dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
d559db086ff5be KAMEZAWA Hiroyuki       2010-03-05  1802  	inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
^1da177e4c3f41 Linus Torvalds          2005-04-16  1803  	get_page(page);
00501b531c4723 Johannes Weiner         2014-08-08  1804  	if (page == swapcache) {
1493a1913e34b0 David Hildenbrand       2022-05-09  1805  		rmap_t rmap_flags = RMAP_NONE;
1493a1913e34b0 David Hildenbrand       2022-05-09  1806  
1493a1913e34b0 David Hildenbrand       2022-05-09  1807  		/*
1493a1913e34b0 David Hildenbrand       2022-05-09  1808  		 * See do_swap_page(): PageWriteback() would be problematic.
1493a1913e34b0 David Hildenbrand       2022-05-09  1809  		 * However, we do a wait_on_page_writeback() just before this
1493a1913e34b0 David Hildenbrand       2022-05-09  1810  		 * call and have the page locked.
1493a1913e34b0 David Hildenbrand       2022-05-09  1811  		 */
1493a1913e34b0 David Hildenbrand       2022-05-09  1812  		VM_BUG_ON_PAGE(PageWriteback(page), page);
1493a1913e34b0 David Hildenbrand       2022-05-09  1813  		if (pte_swp_exclusive(*pte))
1493a1913e34b0 David Hildenbrand       2022-05-09  1814  			rmap_flags |= RMAP_EXCLUSIVE;
1493a1913e34b0 David Hildenbrand       2022-05-09  1815  
1493a1913e34b0 David Hildenbrand       2022-05-09  1816  		page_add_anon_rmap(page, vma, addr, rmap_flags);
00501b531c4723 Johannes Weiner         2014-08-08  1817  	} else { /* ksm created a completely new copy */
40f2bbf71161fa David Hildenbrand       2022-05-09  1818  		page_add_new_anon_rmap(page, vma, addr);
b518154e59aab3 Joonsoo Kim             2020-08-11  1819  		lru_cache_add_inactive_or_unevictable(page, vma);
00501b531c4723 Johannes Weiner         2014-08-08  1820  	}
14a762dd1977cf Miaohe Lin              2022-05-19  1821  	new_pte = pte_mkold(mk_pte(page, vma->vm_page_prot));
14a762dd1977cf Miaohe Lin              2022-05-19  1822  	if (pte_swp_soft_dirty(*pte))
14a762dd1977cf Miaohe Lin              2022-05-19  1823  		new_pte = pte_mksoft_dirty(new_pte);
14a762dd1977cf Miaohe Lin              2022-05-19  1824  	if (pte_swp_uffd_wp(*pte))
14a762dd1977cf Miaohe Lin              2022-05-19  1825  		new_pte = pte_mkuffd_wp(new_pte);
0015a0b4376221 Kefeng Wang             2022-12-13  1826  out:
14a762dd1977cf Miaohe Lin              2022-05-19  1827  	set_pte_at(vma->vm_mm, addr, pte, new_pte);
^1da177e4c3f41 Linus Torvalds          2005-04-16  1828  	swap_free(entry);
044d66c1d2b1c5 Hugh Dickins            2008-02-07  1829  	pte_unmap_unlock(pte, ptl);
9e16b7fb1d066d Hugh Dickins            2013-02-22  1830  	if (page != swapcache) {
9e16b7fb1d066d Hugh Dickins            2013-02-22  1831  		unlock_page(page);
9e16b7fb1d066d Hugh Dickins            2013-02-22  1832  		put_page(page);
9e16b7fb1d066d Hugh Dickins            2013-02-22  1833  	}
044d66c1d2b1c5 Hugh Dickins            2008-02-07  1834  	return ret;
^1da177e4c3f41 Linus Torvalds          2005-04-16  1835  }
^1da177e4c3f41 Linus Torvalds          2005-04-16  1836
  

Patch

diff --git a/mm/ksm.c b/mm/ksm.c
index dd02780c387f..83e2f74ae7da 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2629,8 +2629,12 @@  struct page *ksm_might_need_to_copy(struct page *page,
 		new_page = NULL;
 	}
 	if (new_page) {
-		copy_user_highpage(new_page, page, address, vma);
-
+		if (copy_mc_user_highpage(new_page, page, address, vma)) {
+			put_page(new_page);
+			new_page = ERR_PTR(-EHWPOISON);
+			memory_failure_queue(page_to_pfn(page), 0);
+			return new_page;
+		}
 		SetPageDirty(new_page);
 		__SetPageUptodate(new_page);
 		__SetPageLocked(new_page);
diff --git a/mm/memory.c b/mm/memory.c
index aad226daf41b..5b2c137dfb2a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3840,6 +3840,9 @@  vm_fault_t do_swap_page(struct vm_fault *vmf)
 		if (unlikely(!page)) {
 			ret = VM_FAULT_OOM;
 			goto out_page;
+		} else if (unlikely(PTR_ERR(page) == -EHWPOISON)) {
+			ret = VM_FAULT_HWPOISON;
+			goto out_page;
 		}
 		folio = page_folio(page);
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 908a529bca12..06aaca111233 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1763,12 +1763,15 @@  static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 	struct page *swapcache;
 	spinlock_t *ptl;
 	pte_t *pte, new_pte;
+	bool hwposioned = false;
 	int ret = 1;
 
 	swapcache = page;
 	page = ksm_might_need_to_copy(page, vma, addr);
 	if (unlikely(!page))
 		return -ENOMEM;
+	else if (unlikely(PTR_ERR(page) == -EHWPOISON))
+		hwposioned = true;
 
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) {
@@ -1776,13 +1779,17 @@  static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 		goto out;
 	}
 
-	if (unlikely(!PageUptodate(page))) {
-		pte_t pteval;
+	if (hwposioned || !PageUptodate(page)) {
+		swp_entry_t swp_entry;
 
 		dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
-		pteval = swp_entry_to_pte(make_swapin_error_entry());
-		set_pte_at(vma->vm_mm, addr, pte, pteval);
-		swap_free(entry);
+		if (hwposioned) {
+			swp_entry = make_hwpoison_entry(swapcache);
+			page = swapcache;
+		} else {
+			swp_entry = make_swapin_error_entry();
+		}
+		new_pte = swp_entry_to_pte(swp_entry);
 		ret = 0;
 		goto out;
 	}
@@ -1816,9 +1823,9 @@  static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 		new_pte = pte_mksoft_dirty(new_pte);
 	if (pte_swp_uffd_wp(*pte))
 		new_pte = pte_mkuffd_wp(new_pte);
+out:
 	set_pte_at(vma->vm_mm, addr, pte, new_pte);
 	swap_free(entry);
-out:
 	pte_unmap_unlock(pte, ptl);
 	if (page != swapcache) {
 		unlock_page(page);