[-next,v3] mm: hwposion: support recovery from ksm_might_need_to_copy()
Commit Message
When the kernel copy a page from ksm_might_need_to_copy(), but runs
into an uncorrectable error, it will crash since poisoned page is
consumed by kernel, this is similar to Copy-on-write poison recovery,
When an error is detected during the page copy, return VM_FAULT_HWPOISON
in do_swap_page(), and install a hwpoison entry in unuse_pte() when
swapoff, which help us to avoid system crash. Note, memory failure on
a KSM page will be skipped, but still call memory_failure_queue() to
be consistent with general memory failure process.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
mm/ksm.c | 8 ++++++--
mm/memory.c | 3 +++
mm/swapfile.c | 19 +++++++++++++------
3 files changed, 22 insertions(+), 8 deletions(-)
Comments
Hi Kefeng,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on next-20221208]
[cannot apply to akpm-mm/mm-everything linus/master v6.1 v6.1-rc8 v6.1-rc7 v6.1]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Kefeng-Wang/mm-hwposion-support-recovery-from-ksm_might_need_to_copy/20221213-105100
patch link: https://lore.kernel.org/r/20221213030557.143432-1-wangkefeng.wang%40huawei.com
patch subject: [PATCH -next v3] mm: hwposion: support recovery from ksm_might_need_to_copy()
config: i386-randconfig-a002
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/0015a0b43762219cbe6edac8bb9e7e385978152b
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Kefeng-Wang/mm-hwposion-support-recovery-from-ksm_might_need_to_copy/20221213-105100
git checkout 0015a0b43762219cbe6edac8bb9e7e385978152b
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
>> mm/swapfile.c:1777:6: warning: variable 'new_pte' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) {
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/compiler.h:78:22: note: expanded from macro 'unlikely'
# define unlikely(x) __builtin_expect(!!(x), 0)
^~~~~~~~~~~~~~~~~~~~~~~~~~
mm/swapfile.c:1827:36: note: uninitialized use occurs here
set_pte_at(vma->vm_mm, addr, pte, new_pte);
^~~~~~~
mm/swapfile.c:1777:2: note: remove the 'if' if its condition is always false
if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) {
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
mm/swapfile.c:1765:2: note: variable 'new_pte' is declared here
pte_t *pte, new_pte;
^
1 warning generated.
vim +1777 mm/swapfile.c
179ef71cbc0852 Cyrill Gorcunov 2013-08-13 1753
^1da177e4c3f41 Linus Torvalds 2005-04-16 1754 /*
72866f6f277ec0 Hugh Dickins 2005-10-29 1755 * No need to decide whether this PTE shares the swap entry with others,
72866f6f277ec0 Hugh Dickins 2005-10-29 1756 * just let do_wp_page work it out if a write is requested later - to
72866f6f277ec0 Hugh Dickins 2005-10-29 1757 * force COW, vm_page_prot omits write permission from any private vma.
^1da177e4c3f41 Linus Torvalds 2005-04-16 1758 */
044d66c1d2b1c5 Hugh Dickins 2008-02-07 1759 static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
f102cd8b173e06 Matthew Wilcox (Oracle 2022-09-02 1760) unsigned long addr, swp_entry_t entry, struct folio *folio)
^1da177e4c3f41 Linus Torvalds 2005-04-16 1761 {
f102cd8b173e06 Matthew Wilcox (Oracle 2022-09-02 1762) struct page *page = folio_file_page(folio, swp_offset(entry));
9e16b7fb1d066d Hugh Dickins 2013-02-22 1763 struct page *swapcache;
044d66c1d2b1c5 Hugh Dickins 2008-02-07 1764 spinlock_t *ptl;
14a762dd1977cf Miaohe Lin 2022-05-19 1765 pte_t *pte, new_pte;
0015a0b4376221 Kefeng Wang 2022-12-13 1766 bool hwposioned = false;
044d66c1d2b1c5 Hugh Dickins 2008-02-07 1767 int ret = 1;
044d66c1d2b1c5 Hugh Dickins 2008-02-07 1768
9e16b7fb1d066d Hugh Dickins 2013-02-22 1769 swapcache = page;
9e16b7fb1d066d Hugh Dickins 2013-02-22 1770 page = ksm_might_need_to_copy(page, vma, addr);
9e16b7fb1d066d Hugh Dickins 2013-02-22 1771 if (unlikely(!page))
9e16b7fb1d066d Hugh Dickins 2013-02-22 1772 return -ENOMEM;
0015a0b4376221 Kefeng Wang 2022-12-13 1773 else if (unlikely(PTR_ERR(page) == -EHWPOISON))
0015a0b4376221 Kefeng Wang 2022-12-13 1774 hwposioned = true;
9e16b7fb1d066d Hugh Dickins 2013-02-22 1775
044d66c1d2b1c5 Hugh Dickins 2008-02-07 1776 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
9f8bdb3f3dad3f Hugh Dickins 2016-01-15 @1777 if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) {
044d66c1d2b1c5 Hugh Dickins 2008-02-07 1778 ret = 0;
044d66c1d2b1c5 Hugh Dickins 2008-02-07 1779 goto out;
044d66c1d2b1c5 Hugh Dickins 2008-02-07 1780 }
8a9f3ccd24741b Balbir Singh 2008-02-07 1781
0015a0b4376221 Kefeng Wang 2022-12-13 1782 if (hwposioned || !PageUptodate(page)) {
0015a0b4376221 Kefeng Wang 2022-12-13 1783 swp_entry_t swp_entry;
9f186f9e5fa9eb Miaohe Lin 2022-05-19 1784
9f186f9e5fa9eb Miaohe Lin 2022-05-19 1785 dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
0015a0b4376221 Kefeng Wang 2022-12-13 1786 if (hwposioned) {
0015a0b4376221 Kefeng Wang 2022-12-13 1787 swp_entry = make_hwpoison_entry(swapcache);
0015a0b4376221 Kefeng Wang 2022-12-13 1788 page = swapcache;
0015a0b4376221 Kefeng Wang 2022-12-13 1789 } else {
0015a0b4376221 Kefeng Wang 2022-12-13 1790 swp_entry = make_swapin_error_entry();
0015a0b4376221 Kefeng Wang 2022-12-13 1791 }
0015a0b4376221 Kefeng Wang 2022-12-13 1792 new_pte = swp_entry_to_pte(swp_entry);
9f186f9e5fa9eb Miaohe Lin 2022-05-19 1793 ret = 0;
9f186f9e5fa9eb Miaohe Lin 2022-05-19 1794 goto out;
9f186f9e5fa9eb Miaohe Lin 2022-05-19 1795 }
9f186f9e5fa9eb Miaohe Lin 2022-05-19 1796
78fbe906cc900b David Hildenbrand 2022-05-09 1797 /* See do_swap_page() */
78fbe906cc900b David Hildenbrand 2022-05-09 1798 BUG_ON(!PageAnon(page) && PageMappedToDisk(page));
78fbe906cc900b David Hildenbrand 2022-05-09 1799 BUG_ON(PageAnon(page) && PageAnonExclusive(page));
78fbe906cc900b David Hildenbrand 2022-05-09 1800
b084d4353ff99d KAMEZAWA Hiroyuki 2010-03-05 1801 dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
d559db086ff5be KAMEZAWA Hiroyuki 2010-03-05 1802 inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
^1da177e4c3f41 Linus Torvalds 2005-04-16 1803 get_page(page);
00501b531c4723 Johannes Weiner 2014-08-08 1804 if (page == swapcache) {
1493a1913e34b0 David Hildenbrand 2022-05-09 1805 rmap_t rmap_flags = RMAP_NONE;
1493a1913e34b0 David Hildenbrand 2022-05-09 1806
1493a1913e34b0 David Hildenbrand 2022-05-09 1807 /*
1493a1913e34b0 David Hildenbrand 2022-05-09 1808 * See do_swap_page(): PageWriteback() would be problematic.
1493a1913e34b0 David Hildenbrand 2022-05-09 1809 * However, we do a wait_on_page_writeback() just before this
1493a1913e34b0 David Hildenbrand 2022-05-09 1810 * call and have the page locked.
1493a1913e34b0 David Hildenbrand 2022-05-09 1811 */
1493a1913e34b0 David Hildenbrand 2022-05-09 1812 VM_BUG_ON_PAGE(PageWriteback(page), page);
1493a1913e34b0 David Hildenbrand 2022-05-09 1813 if (pte_swp_exclusive(*pte))
1493a1913e34b0 David Hildenbrand 2022-05-09 1814 rmap_flags |= RMAP_EXCLUSIVE;
1493a1913e34b0 David Hildenbrand 2022-05-09 1815
1493a1913e34b0 David Hildenbrand 2022-05-09 1816 page_add_anon_rmap(page, vma, addr, rmap_flags);
00501b531c4723 Johannes Weiner 2014-08-08 1817 } else { /* ksm created a completely new copy */
40f2bbf71161fa David Hildenbrand 2022-05-09 1818 page_add_new_anon_rmap(page, vma, addr);
b518154e59aab3 Joonsoo Kim 2020-08-11 1819 lru_cache_add_inactive_or_unevictable(page, vma);
00501b531c4723 Johannes Weiner 2014-08-08 1820 }
14a762dd1977cf Miaohe Lin 2022-05-19 1821 new_pte = pte_mkold(mk_pte(page, vma->vm_page_prot));
14a762dd1977cf Miaohe Lin 2022-05-19 1822 if (pte_swp_soft_dirty(*pte))
14a762dd1977cf Miaohe Lin 2022-05-19 1823 new_pte = pte_mksoft_dirty(new_pte);
14a762dd1977cf Miaohe Lin 2022-05-19 1824 if (pte_swp_uffd_wp(*pte))
14a762dd1977cf Miaohe Lin 2022-05-19 1825 new_pte = pte_mkuffd_wp(new_pte);
0015a0b4376221 Kefeng Wang 2022-12-13 1826 out:
14a762dd1977cf Miaohe Lin 2022-05-19 1827 set_pte_at(vma->vm_mm, addr, pte, new_pte);
^1da177e4c3f41 Linus Torvalds 2005-04-16 1828 swap_free(entry);
044d66c1d2b1c5 Hugh Dickins 2008-02-07 1829 pte_unmap_unlock(pte, ptl);
9e16b7fb1d066d Hugh Dickins 2013-02-22 1830 if (page != swapcache) {
9e16b7fb1d066d Hugh Dickins 2013-02-22 1831 unlock_page(page);
9e16b7fb1d066d Hugh Dickins 2013-02-22 1832 put_page(page);
9e16b7fb1d066d Hugh Dickins 2013-02-22 1833 }
044d66c1d2b1c5 Hugh Dickins 2008-02-07 1834 return ret;
^1da177e4c3f41 Linus Torvalds 2005-04-16 1835 }
^1da177e4c3f41 Linus Torvalds 2005-04-16 1836
@@ -2629,8 +2629,12 @@ struct page *ksm_might_need_to_copy(struct page *page,
new_page = NULL;
}
if (new_page) {
- copy_user_highpage(new_page, page, address, vma);
-
+ if (copy_mc_user_highpage(new_page, page, address, vma)) {
+ put_page(new_page);
+ new_page = ERR_PTR(-EHWPOISON);
+ memory_failure_queue(page_to_pfn(page), 0);
+ return new_page;
+ }
SetPageDirty(new_page);
__SetPageUptodate(new_page);
__SetPageLocked(new_page);
@@ -3840,6 +3840,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (unlikely(!page)) {
ret = VM_FAULT_OOM;
goto out_page;
+ } else if (unlikely(PTR_ERR(page) == -EHWPOISON)) {
+ ret = VM_FAULT_HWPOISON;
+ goto out_page;
}
folio = page_folio(page);
@@ -1763,12 +1763,15 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
struct page *swapcache;
spinlock_t *ptl;
pte_t *pte, new_pte;
+ bool hwposioned = false;
int ret = 1;
swapcache = page;
page = ksm_might_need_to_copy(page, vma, addr);
if (unlikely(!page))
return -ENOMEM;
+ else if (unlikely(PTR_ERR(page) == -EHWPOISON))
+ hwposioned = true;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) {
@@ -1776,13 +1779,17 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
goto out;
}
- if (unlikely(!PageUptodate(page))) {
- pte_t pteval;
+ if (hwposioned || !PageUptodate(page)) {
+ swp_entry_t swp_entry;
dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
- pteval = swp_entry_to_pte(make_swapin_error_entry());
- set_pte_at(vma->vm_mm, addr, pte, pteval);
- swap_free(entry);
+ if (hwposioned) {
+ swp_entry = make_hwpoison_entry(swapcache);
+ page = swapcache;
+ } else {
+ swp_entry = make_swapin_error_entry();
+ }
+ new_pte = swp_entry_to_pte(swp_entry);
ret = 0;
goto out;
}
@@ -1816,9 +1823,9 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
new_pte = pte_mksoft_dirty(new_pte);
if (pte_swp_uffd_wp(*pte))
new_pte = pte_mkuffd_wp(new_pte);
+out:
set_pte_at(vma->vm_mm, addr, pte, new_pte);
swap_free(entry);
-out:
pte_unmap_unlock(pte, ptl);
if (page != swapcache) {
unlock_page(page);