[v3,13/34] m68k: Implement the new page table range API
Commit Message
Add set_ptes(), update_mmu_cache_range(), flush_icache_pages() and
flush_dcache_folio().
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: linux-m68k@lists.linux-m68k.org
---
arch/m68k/include/asm/cacheflush_mm.h | 26 +++++++++++++++++---------
arch/m68k/include/asm/pgtable_mm.h | 21 ++++++++++++++++++---
arch/m68k/mm/motorola.c | 2 +-
3 files changed, 36 insertions(+), 13 deletions(-)
Comments
Hi Willy,
On Tue, Feb 28, 2023 at 10:37 PM Matthew Wilcox (Oracle)
<willy@infradead.org> wrote:
> Add set_ptes(), update_mmu_cache_range(), flush_icache_pages() and
> flush_dcache_folio().
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Thanks for your patch!
> --- a/arch/m68k/include/asm/cacheflush_mm.h
> +++ b/arch/m68k/include/asm/cacheflush_mm.h
> @@ -220,24 +220,28 @@ static inline void flush_cache_page(struct vm_area_struct *vma, unsigned long vm
>
> /* Push the page at kernel virtual address and clear the icache */
> /* RZ: use cpush %bc instead of cpush %dc, cinv %ic */
> -static inline void __flush_page_to_ram(void *vaddr)
> +static inline void __flush_pages_to_ram(void *vaddr, unsigned int nr)
> {
> if (CPU_IS_COLDFIRE) {
> unsigned long addr, start, end;
> addr = ((unsigned long) vaddr) & ~(PAGE_SIZE - 1);
> start = addr & ICACHE_SET_MASK;
> - end = (addr + PAGE_SIZE - 1) & ICACHE_SET_MASK;
> + end = (addr + nr * PAGE_SIZE - 1) & ICACHE_SET_MASK;
> if (start > end) {
> flush_cf_bcache(0, end);
> end = ICACHE_MAX_ADDR;
> }
> flush_cf_bcache(start, end);
> } else if (CPU_IS_040_OR_060) {
> - __asm__ __volatile__("nop\n\t"
> - ".chip 68040\n\t"
> - "cpushp %%bc,(%0)\n\t"
> - ".chip 68k"
> - : : "a" (__pa(vaddr)));
> + unsigned long paddr = __pa(vaddr);
> +
> + while (nr--) {
> + __asm__ __volatile__("nop\n\t"
> + ".chip 68040\n\t"
> + "cpushp %%bc,(%0)\n\t"
> + ".chip 68k"
> + : : "a" (paddr + nr * PAGE_SIZE));
As gcc (9.5.0) keeps on calculating "paddr + nr * PAGE_SIZE"
inside the loop (albeit using a shift instead of a multiplication),
please use "paddr" here, followed by "paddr += PAGE_SIZE;".
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
On Sun, Mar 05, 2023 at 11:16:13AM +0100, Geert Uytterhoeven wrote:
> > + while (nr--) {
> > + __asm__ __volatile__("nop\n\t"
> > + ".chip 68040\n\t"
> > + "cpushp %%bc,(%0)\n\t"
> > + ".chip 68k"
> > + : : "a" (paddr + nr * PAGE_SIZE));
>
> As gcc (9.5.0) keeps on calculating "paddr + nr * PAGE_SIZE"
> inside the loop (albeit using a shift instead of a multiplication),
> please use "paddr" here, followed by "paddr += PAGE_SIZE;".
Thanks. So this?
+++ b/arch/m68k/include/asm/cacheflush_mm.h
@@ -235,13 +235,14 @@ static inline void __flush_pages_to_ram(void *vaddr, unsigned int nr)
} else if (CPU_IS_040_OR_060) {
unsigned long paddr = __pa(vaddr);
- while (nr--) {
+ do {
__asm__ __volatile__("nop\n\t"
".chip 68040\n\t"
"cpushp %%bc,(%0)\n\t"
".chip 68k"
- : : "a" (paddr + nr * PAGE_SIZE));
- }
+ : : "a" (paddr));
+ paddr += PAGE_SIZE;
+ } while (--nr);
} else {
unsigned long _tmp;
__asm__ __volatile__("movec %%cacr,%0\n\t"
Also, I noticed that I broke sun3. It puts the PFN in bits 0-n instead
of 12-n. New patch coming soon.
Hi Willy,
On Sun, Mar 5, 2023 at 4:28 PM Matthew Wilcox <willy@infradead.org> wrote:
> On Sun, Mar 05, 2023 at 11:16:13AM +0100, Geert Uytterhoeven wrote:
> > > + while (nr--) {
> > > + __asm__ __volatile__("nop\n\t"
> > > + ".chip 68040\n\t"
> > > + "cpushp %%bc,(%0)\n\t"
> > > + ".chip 68k"
> > > + : : "a" (paddr + nr * PAGE_SIZE));
> >
> > As gcc (9.5.0) keeps on calculating "paddr + nr * PAGE_SIZE"
> > inside the loop (albeit using a shift instead of a multiplication),
> > please use "paddr" here, followed by "paddr += PAGE_SIZE;".
>
> Thanks. So this?
>
> +++ b/arch/m68k/include/asm/cacheflush_mm.h
> @@ -235,13 +235,14 @@ static inline void __flush_pages_to_ram(void *vaddr, unsigned int nr)
> } else if (CPU_IS_040_OR_060) {
> unsigned long paddr = __pa(vaddr);
>
> - while (nr--) {
> + do {
> __asm__ __volatile__("nop\n\t"
> ".chip 68040\n\t"
> "cpushp %%bc,(%0)\n\t"
> ".chip 68k"
> - : : "a" (paddr + nr * PAGE_SIZE));
> - }
> + : : "a" (paddr));
> + paddr += PAGE_SIZE;
> + } while (--nr);
> } else {
> unsigned long _tmp;
> __asm__ __volatile__("movec %%cacr,%0\n\t"
LGTM. Might be safer to keep the "while (nr--) {", just in case someone
ever passes zero.
> Also, I noticed that I broke sun3. It puts the PFN in bits 0-n instead
> of 12-n. New patch coming soon.
Thanks, hadn't noticed (there are no sun3-specific code changes in
this series?)
Gr{oetje,eeting}s,
Geert
Hi Matthew, Geert,
sorry, I missed that one when it got posted ...
On 6/03/23 04:28, Matthew Wilcox wrote:
> On Sun, Mar 05, 2023 at 11:16:13AM +0100, Geert Uytterhoeven wrote:
>>> + while (nr--) {
>>> + __asm__ __volatile__("nop\n\t"
>>> + ".chip 68040\n\t"
>>> + "cpushp %%bc,(%0)\n\t"
>>> + ".chip 68k"
>>> + : : "a" (paddr + nr * PAGE_SIZE));
>> As gcc (9.5.0) keeps on calculating "paddr + nr * PAGE_SIZE"
>> inside the loop (albeit using a shift instead of a multiplication),
>> please use "paddr" here, followed by "paddr += PAGE_SIZE;".
Are we certain that contiguous vaddr always maps to contiguous paddr?
If not, I'd suggest we increment vaddr inside the loop, and use __pa()
each time:
> +++ b/arch/m68k/include/asm/cacheflush_mm.h
> @@ -235,13 +235,14 @@ static inline void __flush_pages_to_ram(void *vaddr, unsigned int nr)
> } else if (CPU_IS_040_OR_060) {
>
> - while (nr--) {
> + do {
> __asm__ __volatile__("nop\n\t"
> ".chip 68040\n\t"
> "cpushp %%bc,(%0)\n\t"
> ".chip 68k"
> - : : "a" (paddr + nr * PAGE_SIZE));
> - }
> + : : "a" __pa(vaddr));
> + vaddr += PAGE_SIZE;
> + } while (--nr);
> } else {
> unsigned long _tmp;
> __asm__ __volatile__("movec %%cacr,%0\n\t"
>
(just edited Matthew's patch in the mail editor, untested, may not apply
cleanly ...)
Cheers,
Michael
Hi Michael,
On Sun, Mar 5, 2023 at 9:44 PM Michael Schmitz <schmitzmic@gmail.com> wrote:
> On 6/03/23 04:28, Matthew Wilcox wrote:
> > On Sun, Mar 05, 2023 at 11:16:13AM +0100, Geert Uytterhoeven wrote:
> >>> + while (nr--) {
> >>> + __asm__ __volatile__("nop\n\t"
> >>> + ".chip 68040\n\t"
> >>> + "cpushp %%bc,(%0)\n\t"
> >>> + ".chip 68k"
> >>> + : : "a" (paddr + nr * PAGE_SIZE));
> >> As gcc (9.5.0) keeps on calculating "paddr + nr * PAGE_SIZE"
> >> inside the loop (albeit using a shift instead of a multiplication),
> >> please use "paddr" here, followed by "paddr += PAGE_SIZE;".
>
> Are we certain that contiguous vaddr always maps to contiguous paddr?
For a general __flush_pages_to_ram() function, that would not be
guaranteed. But as this is meant for folios, it must be true:
https://elixir.bootlin.com/linux/latest/source/include/linux/mm_types.h#L320
Gr{oetje,eeting}s,
Geert
Hi Geert,
On 6/03/23 20:21, Geert Uytterhoeven wrote:
>> Are we certain that contiguous vaddr always maps to contiguous paddr?
> For a general __flush_pages_to_ram() function, that would not be
> guaranteed. But as this is meant for folios, it must be true:
> https://elixir.bootlin.com/linux/latest/source/include/linux/mm_types.h#L320
Thanks for explaining - that just leaves the problem of cowboys like
myself abusing __flush_pages_to_ram(addr, nr) with nr > 1 for something
that isn't a folio. Maybe a comment 'nr > 1 only valid on folios' would
help ...
Cheers,
Michael
>
> Gr{oetje,eeting}s,
>
> Geert
>
@@ -220,24 +220,28 @@ static inline void flush_cache_page(struct vm_area_struct *vma, unsigned long vm
/* Push the page at kernel virtual address and clear the icache */
/* RZ: use cpush %bc instead of cpush %dc, cinv %ic */
-static inline void __flush_page_to_ram(void *vaddr)
+static inline void __flush_pages_to_ram(void *vaddr, unsigned int nr)
{
if (CPU_IS_COLDFIRE) {
unsigned long addr, start, end;
addr = ((unsigned long) vaddr) & ~(PAGE_SIZE - 1);
start = addr & ICACHE_SET_MASK;
- end = (addr + PAGE_SIZE - 1) & ICACHE_SET_MASK;
+ end = (addr + nr * PAGE_SIZE - 1) & ICACHE_SET_MASK;
if (start > end) {
flush_cf_bcache(0, end);
end = ICACHE_MAX_ADDR;
}
flush_cf_bcache(start, end);
} else if (CPU_IS_040_OR_060) {
- __asm__ __volatile__("nop\n\t"
- ".chip 68040\n\t"
- "cpushp %%bc,(%0)\n\t"
- ".chip 68k"
- : : "a" (__pa(vaddr)));
+ unsigned long paddr = __pa(vaddr);
+
+ while (nr--) {
+ __asm__ __volatile__("nop\n\t"
+ ".chip 68040\n\t"
+ "cpushp %%bc,(%0)\n\t"
+ ".chip 68k"
+ : : "a" (paddr + nr * PAGE_SIZE));
+ }
} else {
unsigned long _tmp;
__asm__ __volatile__("movec %%cacr,%0\n\t"
@@ -249,10 +253,14 @@ static inline void __flush_page_to_ram(void *vaddr)
}
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
-#define flush_dcache_page(page) __flush_page_to_ram(page_address(page))
+#define flush_dcache_page(page) __flush_pages_to_ram(page_address(page), 1)
+#define flush_dcache_folio(folio) \
+ __flush_pages_to_ram(folio_address(folio), folio_nr_pages(folio))
#define flush_dcache_mmap_lock(mapping) do { } while (0)
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
-#define flush_icache_page(vma, page) __flush_page_to_ram(page_address(page))
+#define flush_icache_pages(vma, page, nr) \
+ __flush_pages_to_ram(page_address(page), nr)
+#define flush_icache_page(vma, page) flush_icache_pages(vma, page, 1)
extern void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long addr, int len);
@@ -31,8 +31,20 @@
do{ \
*(pteptr) = (pteval); \
} while(0)
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, unsigned int nr)
+{
+ for (;;) {
+ set_pte(ptep, pte);
+ if (--nr == 0)
+ break;
+ ptep++;
+ pte_val(pte) += PAGE_SIZE;
+ }
+}
+
+#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1)
/* PMD_SHIFT determines the size of the area a second-level page table can map */
#if CONFIG_PGTABLE_LEVELS == 3
@@ -138,11 +150,14 @@ extern void kernel_set_cachemode(void *addr, unsigned long size, int cmode);
* tables contain all the necessary information. The Sun3 does, but
* they are updated on demand.
*/
-static inline void update_mmu_cache(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep)
+static inline void update_mmu_cache_range(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep, unsigned int nr)
{
}
+#define update_mmu_cache(vma, addr, ptep) \
+ update_mmu_cache_range(vma, addr, ptep, 1)
+
#endif /* !__ASSEMBLY__ */
/* MMU-specific headers */
@@ -81,7 +81,7 @@ static inline void cache_page(void *vaddr)
void mmu_page_ctor(void *page)
{
- __flush_page_to_ram(page);
+ __flush_pages_to_ram(page, 1);
flush_tlb_kernel_page(page);
nocache_page(page);
}