[v2,2/4] mm/hwpoison: check if a subpage of a hugetlb folio is raw HWPOISON
Commit Message
Adds the functionality to tell if a subpage of a hugetlb folio is a
raw HWPOISON page. This functionality relies on RawHwpUnreliable to
be not set; otherwise hugepage's HWPOISON list becomes meaningless.
Exports this functionality to be immediately used in the read operation
for hugetlbfs.
Signed-off-by: Jiaqi Yan <jiaqiyan@google.com>
---
include/linux/hugetlb.h | 19 +++++++++++++++++++
include/linux/mm.h | 7 +++++++
mm/hugetlb.c | 10 ++++++++++
mm/memory-failure.c | 34 ++++++++++++++++++++++++----------
4 files changed, 60 insertions(+), 10 deletions(-)
Comments
On 06/23/23 16:40, Jiaqi Yan wrote:
> Adds the functionality to tell if a subpage of a hugetlb folio is a
> raw HWPOISON page. This functionality relies on RawHwpUnreliable to
> be not set; otherwise hugepage's HWPOISON list becomes meaningless.
>
> Exports this functionality to be immediately used in the read operation
> for hugetlbfs.
>
> Signed-off-by: Jiaqi Yan <jiaqiyan@google.com>
> ---
> include/linux/hugetlb.h | 19 +++++++++++++++++++
> include/linux/mm.h | 7 +++++++
> mm/hugetlb.c | 10 ++++++++++
> mm/memory-failure.c | 34 ++++++++++++++++++++++++----------
> 4 files changed, 60 insertions(+), 10 deletions(-)
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 21f942025fec..8b73a12b7b38 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -1013,6 +1013,25 @@ void hugetlb_register_node(struct node *node);
> void hugetlb_unregister_node(struct node *node);
> #endif
>
> +/*
> + * Struct raw_hwp_page represents information about "raw error page",
> + * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
> + */
> +struct raw_hwp_page {
> + struct llist_node node;
> + struct page *page;
> +};
> +
> +static inline struct llist_head *raw_hwp_list_head(struct folio *folio)
> +{
> + return (struct llist_head *)&folio->_hugetlb_hwpoison;
> +}
> +
> +/*
> + * Check if a given raw @subpage in a hugepage @folio is HWPOISON.
> + */
> +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage);
> +
> #else /* CONFIG_HUGETLB_PAGE */
> struct hstate {};
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 66032f0d515c..41a283bd41a7 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3671,6 +3671,7 @@ extern const struct attribute_group memory_failure_attr_group;
> extern void memory_failure_queue(unsigned long pfn, int flags);
> extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> bool *migratable_cleared);
> +extern bool __is_raw_hwp_subpage(struct folio *folio, struct page *subpage);
> void num_poisoned_pages_inc(unsigned long pfn);
> void num_poisoned_pages_sub(unsigned long pfn, long i);
> struct task_struct *task_early_kill(struct task_struct *tsk, int force_early);
> @@ -3685,6 +3686,12 @@ static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> return 0;
> }
>
> +static inline bool __is_raw_hwp_subpage(struct folio *folio,
> + struct page *subpage)
> +{
> + return false;
> +}
> +
> static inline void num_poisoned_pages_inc(unsigned long pfn)
> {
> }
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index ea24718db4af..6b860de87590 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -7377,6 +7377,16 @@ int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> return ret;
> }
>
> +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage)
> +{
> + bool ret;
> +
> + spin_lock_irq(&hugetlb_lock);
> + ret = __is_raw_hwp_subpage(folio, subpage);
> + spin_unlock_irq(&hugetlb_lock);
Can you describe what races the hugetlb_lock prevents here?
On Wed, Jul 5, 2023 at 4:57 PM Mike Kravetz <mike.kravetz@oracle.com> wrote:
>
> On 06/23/23 16:40, Jiaqi Yan wrote:
> > Adds the functionality to tell if a subpage of a hugetlb folio is a
> > raw HWPOISON page. This functionality relies on RawHwpUnreliable to
> > be not set; otherwise hugepage's HWPOISON list becomes meaningless.
> >
> > Exports this functionality to be immediately used in the read operation
> > for hugetlbfs.
> >
> > Signed-off-by: Jiaqi Yan <jiaqiyan@google.com>
> > ---
> > include/linux/hugetlb.h | 19 +++++++++++++++++++
> > include/linux/mm.h | 7 +++++++
> > mm/hugetlb.c | 10 ++++++++++
> > mm/memory-failure.c | 34 ++++++++++++++++++++++++----------
> > 4 files changed, 60 insertions(+), 10 deletions(-)
> >
> > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> > index 21f942025fec..8b73a12b7b38 100644
> > --- a/include/linux/hugetlb.h
> > +++ b/include/linux/hugetlb.h
> > @@ -1013,6 +1013,25 @@ void hugetlb_register_node(struct node *node);
> > void hugetlb_unregister_node(struct node *node);
> > #endif
> >
> > +/*
> > + * Struct raw_hwp_page represents information about "raw error page",
> > + * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
> > + */
> > +struct raw_hwp_page {
> > + struct llist_node node;
> > + struct page *page;
> > +};
> > +
> > +static inline struct llist_head *raw_hwp_list_head(struct folio *folio)
> > +{
> > + return (struct llist_head *)&folio->_hugetlb_hwpoison;
> > +}
> > +
> > +/*
> > + * Check if a given raw @subpage in a hugepage @folio is HWPOISON.
> > + */
> > +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage);
> > +
> > #else /* CONFIG_HUGETLB_PAGE */
> > struct hstate {};
> >
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index 66032f0d515c..41a283bd41a7 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -3671,6 +3671,7 @@ extern const struct attribute_group memory_failure_attr_group;
> > extern void memory_failure_queue(unsigned long pfn, int flags);
> > extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> > bool *migratable_cleared);
> > +extern bool __is_raw_hwp_subpage(struct folio *folio, struct page *subpage);
> > void num_poisoned_pages_inc(unsigned long pfn);
> > void num_poisoned_pages_sub(unsigned long pfn, long i);
> > struct task_struct *task_early_kill(struct task_struct *tsk, int force_early);
> > @@ -3685,6 +3686,12 @@ static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> > return 0;
> > }
> >
> > +static inline bool __is_raw_hwp_subpage(struct folio *folio,
> > + struct page *subpage)
> > +{
> > + return false;
> > +}
> > +
> > static inline void num_poisoned_pages_inc(unsigned long pfn)
> > {
> > }
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index ea24718db4af..6b860de87590 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -7377,6 +7377,16 @@ int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
> > return ret;
> > }
> >
> > +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage)
> > +{
> > + bool ret;
> > +
> > + spin_lock_irq(&hugetlb_lock);
> > + ret = __is_raw_hwp_subpage(folio, subpage);
> > + spin_unlock_irq(&hugetlb_lock);
>
> Can you describe what races the hugetlb_lock prevents here?
I think we should sync here with __get_huge_page_for_hwpoison, who
iterates and inserts an entry to raw_hwp_list. llist itself doesn't
ensure insertion is synchronized with iterating from
__is_raw_hwp_subpage.
> --
> Mike Kravetz
>
> > + return ret;
> > +}
> > +
> > void folio_putback_active_hugetlb(struct folio *folio)
> > {
> > spin_lock_irq(&hugetlb_lock);
> > diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> > index c415c3c462a3..891248e2930e 100644
> > --- a/mm/memory-failure.c
> > +++ b/mm/memory-failure.c
> > @@ -1809,18 +1809,32 @@ EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
> > #endif /* CONFIG_FS_DAX */
> >
> > #ifdef CONFIG_HUGETLB_PAGE
> > -/*
> > - * Struct raw_hwp_page represents information about "raw error page",
> > - * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
> > - */
> > -struct raw_hwp_page {
> > - struct llist_node node;
> > - struct page *page;
> > -};
> >
> > -static inline struct llist_head *raw_hwp_list_head(struct folio *folio)
> > +bool __is_raw_hwp_subpage(struct folio *folio, struct page *subpage)
> > {
> > - return (struct llist_head *)&folio->_hugetlb_hwpoison;
> > + struct llist_head *raw_hwp_head;
> > + struct raw_hwp_page *p, *tmp;
> > + bool ret = false;
> > +
> > + if (!folio_test_hwpoison(folio))
> > + return false;
> > +
> > + /*
> > + * When RawHwpUnreliable is set, kernel lost track of which subpages
> > + * are HWPOISON. So return as if ALL subpages are HWPOISONed.
> > + */
> > + if (folio_test_hugetlb_raw_hwp_unreliable(folio))
> > + return true;
> > +
> > + raw_hwp_head = raw_hwp_list_head(folio);
> > + llist_for_each_entry_safe(p, tmp, raw_hwp_head->first, node) {
> > + if (subpage == p->page) {
> > + ret = true;
> > + break;
> > + }
> > + }
> > +
> > + return ret;
> > }
> >
> > static unsigned long __folio_free_raw_hwp(struct folio *folio, bool move_flag)
> > --
> > 2.41.0.162.gfafddb0af9-goog
> >
On 07/06/23 11:25, Jiaqi Yan wrote:
> On Wed, Jul 5, 2023 at 4:57 PM Mike Kravetz <mike.kravetz@oracle.com> wrote:
> > On 06/23/23 16:40, Jiaqi Yan wrote:
> > >
> > > +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage)
> > > +{
> > > + bool ret;
> > > +
> > > + spin_lock_irq(&hugetlb_lock);
> > > + ret = __is_raw_hwp_subpage(folio, subpage);
> > > + spin_unlock_irq(&hugetlb_lock);
> >
> > Can you describe what races the hugetlb_lock prevents here?
>
> I think we should sync here with __get_huge_page_for_hwpoison, who
> iterates and inserts an entry to raw_hwp_list. llist itself doesn't
> ensure insertion is synchronized with iterating from
> __is_raw_hwp_subpage.
>
Ok, makes sense. And, since this is only called in the file read patch
when we encounter a PageHWPoison(page), the overhead of the lock cycles
is not of concern.
You can add,
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
On Fri, Jun 23, 2023 at 04:40:13PM +0000, Jiaqi Yan wrote:
> Adds the functionality to tell if a subpage of a hugetlb folio is a
> raw HWPOISON page. This functionality relies on RawHwpUnreliable to
> be not set; otherwise hugepage's HWPOISON list becomes meaningless.
>
> Exports this functionality to be immediately used in the read operation
> for hugetlbfs.
>
> Signed-off-by: Jiaqi Yan <jiaqiyan@google.com>
Looks good to me, thank you.
Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
On Thu, Jul 6, 2023 at 3:06 PM Mike Kravetz <mike.kravetz@oracle.com> wrote:
>
> On 07/06/23 11:25, Jiaqi Yan wrote:
> > On Wed, Jul 5, 2023 at 4:57 PM Mike Kravetz <mike.kravetz@oracle.com> wrote:
> > > On 06/23/23 16:40, Jiaqi Yan wrote:
> > > >
> > > > +bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage)
> > > > +{
> > > > + bool ret;
> > > > +
> > > > + spin_lock_irq(&hugetlb_lock);
> > > > + ret = __is_raw_hwp_subpage(folio, subpage);
> > > > + spin_unlock_irq(&hugetlb_lock);
> > >
> > > Can you describe what races the hugetlb_lock prevents here?
> >
> > I think we should sync here with __get_huge_page_for_hwpoison, who
> > iterates and inserts an entry to raw_hwp_list. llist itself doesn't
> > ensure insertion is synchronized with iterating from
> > __is_raw_hwp_subpage.
> >
>
> Ok, makes sense. And, since this is only called in the file read patch
> when we encounter a PageHWPoison(page), the overhead of the lock cycles
> is not of concern.
Yes, thanks for pointing this out, (which I forgot), as
is_raw_hwp_subpage() is after PageHWPoison check in
hugetlbfs_read_iter. I think both this and the reason why holding the
lock is worth mentioning in the commit msg.
>
> You can add,
>
> Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
> --
> Mike Kravetz
@@ -1013,6 +1013,25 @@ void hugetlb_register_node(struct node *node);
void hugetlb_unregister_node(struct node *node);
#endif
+/*
+ * Struct raw_hwp_page represents information about "raw error page",
+ * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
+ */
+struct raw_hwp_page {
+ struct llist_node node;
+ struct page *page;
+};
+
+static inline struct llist_head *raw_hwp_list_head(struct folio *folio)
+{
+ return (struct llist_head *)&folio->_hugetlb_hwpoison;
+}
+
+/*
+ * Check if a given raw @subpage in a hugepage @folio is HWPOISON.
+ */
+bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage);
+
#else /* CONFIG_HUGETLB_PAGE */
struct hstate {};
@@ -3671,6 +3671,7 @@ extern const struct attribute_group memory_failure_attr_group;
extern void memory_failure_queue(unsigned long pfn, int flags);
extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared);
+extern bool __is_raw_hwp_subpage(struct folio *folio, struct page *subpage);
void num_poisoned_pages_inc(unsigned long pfn);
void num_poisoned_pages_sub(unsigned long pfn, long i);
struct task_struct *task_early_kill(struct task_struct *tsk, int force_early);
@@ -3685,6 +3686,12 @@ static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
return 0;
}
+static inline bool __is_raw_hwp_subpage(struct folio *folio,
+ struct page *subpage)
+{
+ return false;
+}
+
static inline void num_poisoned_pages_inc(unsigned long pfn)
{
}
@@ -7377,6 +7377,16 @@ int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
return ret;
}
+bool is_raw_hwp_subpage(struct folio *folio, struct page *subpage)
+{
+ bool ret;
+
+ spin_lock_irq(&hugetlb_lock);
+ ret = __is_raw_hwp_subpage(folio, subpage);
+ spin_unlock_irq(&hugetlb_lock);
+ return ret;
+}
+
void folio_putback_active_hugetlb(struct folio *folio)
{
spin_lock_irq(&hugetlb_lock);
@@ -1809,18 +1809,32 @@ EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
#endif /* CONFIG_FS_DAX */
#ifdef CONFIG_HUGETLB_PAGE
-/*
- * Struct raw_hwp_page represents information about "raw error page",
- * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
- */
-struct raw_hwp_page {
- struct llist_node node;
- struct page *page;
-};
-static inline struct llist_head *raw_hwp_list_head(struct folio *folio)
+bool __is_raw_hwp_subpage(struct folio *folio, struct page *subpage)
{
- return (struct llist_head *)&folio->_hugetlb_hwpoison;
+ struct llist_head *raw_hwp_head;
+ struct raw_hwp_page *p, *tmp;
+ bool ret = false;
+
+ if (!folio_test_hwpoison(folio))
+ return false;
+
+ /*
+ * When RawHwpUnreliable is set, kernel lost track of which subpages
+ * are HWPOISON. So return as if ALL subpages are HWPOISONed.
+ */
+ if (folio_test_hugetlb_raw_hwp_unreliable(folio))
+ return true;
+
+ raw_hwp_head = raw_hwp_list_head(folio);
+ llist_for_each_entry_safe(p, tmp, raw_hwp_head->first, node) {
+ if (subpage == p->page) {
+ ret = true;
+ break;
+ }
+ }
+
+ return ret;
}
static unsigned long __folio_free_raw_hwp(struct folio *folio, bool move_flag)