[PATCHv5,15/16] x86/mm: Introduce kernel_ident_mapping_free()

Message ID 20231222235209.32143-16-kirill.shutemov@linux.intel.com
State New
Headers
Series x86/tdx: Add kexec support |

Commit Message

Kirill A. Shutemov Dec. 22, 2023, 11:52 p.m. UTC
  The helper complements kernel_ident_mapping_init(): it frees the
identity mapping that was previously allocated. It will be used in the
error path to free a partially allocated mapping or if the mapping is no
longer needed.

The caller provides a struct x86_mapping_info with the free_pgd_page()
callback hooked up and the pgd_t to free.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/include/asm/init.h |  3 ++
 arch/x86/mm/ident_map.c     | 73 +++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+)
  

Comments

Kai Huang Jan. 8, 2024, 3:13 a.m. UTC | #1
On Sat, 2023-12-23 at 02:52 +0300, Kirill A. Shutemov wrote:
> The helper complements kernel_ident_mapping_init(): it frees the
> identity mapping that was previously allocated. It will be used in the
> error path to free a partially allocated mapping or if the mapping is no
> longer needed.
> 
> The caller provides a struct x86_mapping_info with the free_pgd_page()
> callback hooked up and the pgd_t to free.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> ---
>  arch/x86/include/asm/init.h |  3 ++
>  arch/x86/mm/ident_map.c     | 73 +++++++++++++++++++++++++++++++++++++
>  2 files changed, 76 insertions(+)
> 
> diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
> index cc9ccf61b6bd..14d72727d7ee 100644
> --- a/arch/x86/include/asm/init.h
> +++ b/arch/x86/include/asm/init.h
> @@ -6,6 +6,7 @@
>  
>  struct x86_mapping_info {
>  	void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
> +	void (*free_pgt_page)(void *, void *); /* free buf for page table */
>  	void *context;			 /* context for alloc_pgt_page */
>  	unsigned long page_flag;	 /* page flag for PMD or PUD entry */
>  	unsigned long offset;		 /* ident mapping offset */
> @@ -16,4 +17,6 @@ struct x86_mapping_info {
>  int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
>  				unsigned long pstart, unsigned long pend);
>  
> +void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd);

Maybe range-based free function can provide more flexibility (e.g., you can
directly call the free function to cleanup in kernel_ident_mapping_init()
internally when something goes wrong), but I guess this is sufficient for
current use case (and perhaps the majority use cases).

Reviewed-by: Kai Huang <kai.huang@intel.com>
  
Kai Huang Jan. 8, 2024, 3:30 a.m. UTC | #2
On Mon, 2024-01-08 at 03:13 +0000, Huang, Kai wrote:
> On Sat, 2023-12-23 at 02:52 +0300, Kirill A. Shutemov wrote:
> > The helper complements kernel_ident_mapping_init(): it frees the
> > identity mapping that was previously allocated. It will be used in the
> > error path to free a partially allocated mapping or if the mapping is no
> > longer needed.
> > 
> > The caller provides a struct x86_mapping_info with the free_pgd_page()
> > callback hooked up and the pgd_t to free.
> > 
> > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > ---
> >  arch/x86/include/asm/init.h |  3 ++
> >  arch/x86/mm/ident_map.c     | 73 +++++++++++++++++++++++++++++++++++++
> >  2 files changed, 76 insertions(+)
> > 
> > diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
> > index cc9ccf61b6bd..14d72727d7ee 100644
> > --- a/arch/x86/include/asm/init.h
> > +++ b/arch/x86/include/asm/init.h
> > @@ -6,6 +6,7 @@
> >  
> >  struct x86_mapping_info {
> >  	void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
> > +	void (*free_pgt_page)(void *, void *); /* free buf for page table */
> >  	void *context;			 /* context for alloc_pgt_page */
> >  	unsigned long page_flag;	 /* page flag for PMD or PUD entry */
> >  	unsigned long offset;		 /* ident mapping offset */
> > @@ -16,4 +17,6 @@ struct x86_mapping_info {
> >  int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
> >  				unsigned long pstart, unsigned long pend);
> >  
> > +void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd);
> 
> Maybe range-based free function can provide more flexibility (e.g., you can
> directly call the free function to cleanup in kernel_ident_mapping_init()
> internally when something goes wrong), but I guess this is sufficient for
> current use case (and perhaps the majority use cases).
> 
> Reviewed-by: Kai Huang <kai.huang@intel.com>
> 

Another argument of range-based free function is, theoretically you can build
the identical mapping table using different x86_mapping_info on different
ranges, thus it makes less sense to use one 'struct x86_mapping_info *info' to
free the entire page table, albeit in this implementation only the
'free_pgt_page()' callback is used.
  
Kirill A. Shutemov Jan. 8, 2024, 10:17 a.m. UTC | #3
On Mon, Jan 08, 2024 at 03:30:21AM +0000, Huang, Kai wrote:
> On Mon, 2024-01-08 at 03:13 +0000, Huang, Kai wrote:
> > On Sat, 2023-12-23 at 02:52 +0300, Kirill A. Shutemov wrote:
> > > The helper complements kernel_ident_mapping_init(): it frees the
> > > identity mapping that was previously allocated. It will be used in the
> > > error path to free a partially allocated mapping or if the mapping is no
> > > longer needed.
> > > 
> > > The caller provides a struct x86_mapping_info with the free_pgd_page()
> > > callback hooked up and the pgd_t to free.
> > > 
> > > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > > ---
> > >  arch/x86/include/asm/init.h |  3 ++
> > >  arch/x86/mm/ident_map.c     | 73 +++++++++++++++++++++++++++++++++++++
> > >  2 files changed, 76 insertions(+)
> > > 
> > > diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
> > > index cc9ccf61b6bd..14d72727d7ee 100644
> > > --- a/arch/x86/include/asm/init.h
> > > +++ b/arch/x86/include/asm/init.h
> > > @@ -6,6 +6,7 @@
> > >  
> > >  struct x86_mapping_info {
> > >  	void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
> > > +	void (*free_pgt_page)(void *, void *); /* free buf for page table */
> > >  	void *context;			 /* context for alloc_pgt_page */
> > >  	unsigned long page_flag;	 /* page flag for PMD or PUD entry */
> > >  	unsigned long offset;		 /* ident mapping offset */
> > > @@ -16,4 +17,6 @@ struct x86_mapping_info {
> > >  int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
> > >  				unsigned long pstart, unsigned long pend);
> > >  
> > > +void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd);
> > 
> > Maybe range-based free function can provide more flexibility (e.g., you can
> > directly call the free function to cleanup in kernel_ident_mapping_init()
> > internally when something goes wrong), but I guess this is sufficient for
> > current use case (and perhaps the majority use cases).
> > 
> > Reviewed-by: Kai Huang <kai.huang@intel.com>
> > 
> 
> Another argument of range-based free function is, theoretically you can build
> the identical mapping table using different x86_mapping_info on different
> ranges, thus it makes less sense to use one 'struct x86_mapping_info *info' to
> free the entire page table, albeit in this implementation only the
> 'free_pgt_page()' callback is used. 

The interface can be changed if there will be need for such behaviour.
This kind of future-proofing rarely helpful.
  
Kai Huang Jan. 8, 2024, 1:13 p.m. UTC | #4
On Mon, 2024-01-08 at 13:17 +0300, kirill.shutemov@linux.intel.com wrote:
> On Mon, Jan 08, 2024 at 03:30:21AM +0000, Huang, Kai wrote:
> > On Mon, 2024-01-08 at 03:13 +0000, Huang, Kai wrote:
> > > On Sat, 2023-12-23 at 02:52 +0300, Kirill A. Shutemov wrote:
> > > > The helper complements kernel_ident_mapping_init(): it frees the
> > > > identity mapping that was previously allocated. It will be used in the
> > > > error path to free a partially allocated mapping or if the mapping is no
> > > > longer needed.
> > > > 
> > > > The caller provides a struct x86_mapping_info with the free_pgd_page()
> > > > callback hooked up and the pgd_t to free.
> > > > 
> > > > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > > > ---
> > > >  arch/x86/include/asm/init.h |  3 ++
> > > >  arch/x86/mm/ident_map.c     | 73 +++++++++++++++++++++++++++++++++++++
> > > >  2 files changed, 76 insertions(+)
> > > > 
> > > > diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
> > > > index cc9ccf61b6bd..14d72727d7ee 100644
> > > > --- a/arch/x86/include/asm/init.h
> > > > +++ b/arch/x86/include/asm/init.h
> > > > @@ -6,6 +6,7 @@
> > > >  
> > > >  struct x86_mapping_info {
> > > >  	void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
> > > > +	void (*free_pgt_page)(void *, void *); /* free buf for page table */
> > > >  	void *context;			 /* context for alloc_pgt_page */
> > > >  	unsigned long page_flag;	 /* page flag for PMD or PUD entry */
> > > >  	unsigned long offset;		 /* ident mapping offset */
> > > > @@ -16,4 +17,6 @@ struct x86_mapping_info {
> > > >  int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
> > > >  				unsigned long pstart, unsigned long pend);
> > > >  
> > > > +void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd);
> > > 
> > > Maybe range-based free function can provide more flexibility (e.g., you can
> > > directly call the free function to cleanup in kernel_ident_mapping_init()
> > > internally when something goes wrong), but I guess this is sufficient for
> > > current use case (and perhaps the majority use cases).
> > > 
> > > Reviewed-by: Kai Huang <kai.huang@intel.com>
> > > 
> > 
> > Another argument of range-based free function is, theoretically you can build
> > the identical mapping table using different x86_mapping_info on different
> > ranges, thus it makes less sense to use one 'struct x86_mapping_info *info' to
> > free the entire page table, albeit in this implementation only the
> > 'free_pgt_page()' callback is used. 
> 
> The interface can be changed if there will be need for such behaviour.
> This kind of future-proofing rarely helpful.
> 

Do you want to just pass the 'free_pgt_page' function pointer to
kernel_ident_mapping_free(), instead of 'struct x86_mapping_info *info'?  As
mentioned above conceptually the page table can be built from multiple
x86_mapping_info for multiple ranges.
  
Kirill A. Shutemov Jan. 8, 2024, 1:35 p.m. UTC | #5
On Mon, Jan 08, 2024 at 01:13:18PM +0000, Huang, Kai wrote:
> On Mon, 2024-01-08 at 13:17 +0300, kirill.shutemov@linux.intel.com wrote:
> > On Mon, Jan 08, 2024 at 03:30:21AM +0000, Huang, Kai wrote:
> > > On Mon, 2024-01-08 at 03:13 +0000, Huang, Kai wrote:
> > > > On Sat, 2023-12-23 at 02:52 +0300, Kirill A. Shutemov wrote:
> > > > > The helper complements kernel_ident_mapping_init(): it frees the
> > > > > identity mapping that was previously allocated. It will be used in the
> > > > > error path to free a partially allocated mapping or if the mapping is no
> > > > > longer needed.
> > > > > 
> > > > > The caller provides a struct x86_mapping_info with the free_pgd_page()
> > > > > callback hooked up and the pgd_t to free.
> > > > > 
> > > > > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > > > > ---
> > > > >  arch/x86/include/asm/init.h |  3 ++
> > > > >  arch/x86/mm/ident_map.c     | 73 +++++++++++++++++++++++++++++++++++++
> > > > >  2 files changed, 76 insertions(+)
> > > > > 
> > > > > diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
> > > > > index cc9ccf61b6bd..14d72727d7ee 100644
> > > > > --- a/arch/x86/include/asm/init.h
> > > > > +++ b/arch/x86/include/asm/init.h
> > > > > @@ -6,6 +6,7 @@
> > > > >  
> > > > >  struct x86_mapping_info {
> > > > >  	void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
> > > > > +	void (*free_pgt_page)(void *, void *); /* free buf for page table */
> > > > >  	void *context;			 /* context for alloc_pgt_page */
> > > > >  	unsigned long page_flag;	 /* page flag for PMD or PUD entry */
> > > > >  	unsigned long offset;		 /* ident mapping offset */
> > > > > @@ -16,4 +17,6 @@ struct x86_mapping_info {
> > > > >  int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
> > > > >  				unsigned long pstart, unsigned long pend);
> > > > >  
> > > > > +void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd);
> > > > 
> > > > Maybe range-based free function can provide more flexibility (e.g., you can
> > > > directly call the free function to cleanup in kernel_ident_mapping_init()
> > > > internally when something goes wrong), but I guess this is sufficient for
> > > > current use case (and perhaps the majority use cases).
> > > > 
> > > > Reviewed-by: Kai Huang <kai.huang@intel.com>
> > > > 
> > > 
> > > Another argument of range-based free function is, theoretically you can build
> > > the identical mapping table using different x86_mapping_info on different
> > > ranges, thus it makes less sense to use one 'struct x86_mapping_info *info' to
> > > free the entire page table, albeit in this implementation only the
> > > 'free_pgt_page()' callback is used. 
> > 
> > The interface can be changed if there will be need for such behaviour.
> > This kind of future-proofing rarely helpful.
> > 
> 
> Do you want to just pass the 'free_pgt_page' function pointer to
> kernel_ident_mapping_free(), instead of 'struct x86_mapping_info *info'?  As
> mentioned above conceptually the page table can be built from multiple
> x86_mapping_info for multiple ranges.

I don't think we have such cases in kernel. Let's not overcomplicate
things. I see value in keeping interface symmetric.

We can always change things according to needs.
  

Patch

diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index cc9ccf61b6bd..14d72727d7ee 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -6,6 +6,7 @@ 
 
 struct x86_mapping_info {
 	void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
+	void (*free_pgt_page)(void *, void *); /* free buf for page table */
 	void *context;			 /* context for alloc_pgt_page */
 	unsigned long page_flag;	 /* page flag for PMD or PUD entry */
 	unsigned long offset;		 /* ident mapping offset */
@@ -16,4 +17,6 @@  struct x86_mapping_info {
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
 				unsigned long pstart, unsigned long pend);
 
+void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd);
+
 #endif /* _ASM_X86_INIT_H */
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 968d7005f4a7..3996af7b4abf 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -4,6 +4,79 @@ 
  * included by both the compressed kernel and the regular kernel.
  */
 
+static void free_pte(struct x86_mapping_info *info, pmd_t *pmd)
+{
+	pte_t *pte = pte_offset_kernel(pmd, 0);
+
+	info->free_pgt_page(pte, info->context);
+}
+
+static void free_pmd(struct x86_mapping_info *info, pud_t *pud)
+{
+	pmd_t *pmd = pmd_offset(pud, 0);
+	int i;
+
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		if (!pmd_present(pmd[i]))
+			continue;
+
+		if (pmd_leaf(pmd[i]))
+			continue;
+
+		free_pte(info, &pmd[i]);
+	}
+
+	info->free_pgt_page(pmd, info->context);
+}
+
+static void free_pud(struct x86_mapping_info *info, p4d_t *p4d)
+{
+	pud_t *pud = pud_offset(p4d, 0);
+	int i;
+
+	for (i = 0; i < PTRS_PER_PUD; i++) {
+		if (!pud_present(pud[i]))
+			continue;
+
+		if (pud_leaf(pud[i]))
+			continue;
+
+		free_pmd(info, &pud[i]);
+	}
+
+	info->free_pgt_page(pud, info->context);
+}
+
+static void free_p4d(struct x86_mapping_info *info, pgd_t *pgd)
+{
+	p4d_t *p4d = p4d_offset(pgd, 0);
+	int i;
+
+	for (i = 0; i < PTRS_PER_P4D; i++) {
+		if (!p4d_present(p4d[i]))
+			continue;
+
+		free_pud(info, &p4d[i]);
+	}
+
+	if (pgtable_l5_enabled())
+		info->free_pgt_page(pgd, info->context);
+}
+
+void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd)
+{
+	int i;
+
+	for (i = 0; i < PTRS_PER_PGD; i++) {
+		if (!pgd_present(pgd[i]))
+			continue;
+
+		free_p4d(info, &pgd[i]);
+	}
+
+	info->free_pgt_page(pgd, info->context);
+}
+
 static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page,
 			   unsigned long addr, unsigned long end)
 {