[17/21] ARM: dma-mapping: use arch_sync_dma_for_{device,cpu}() internally

Message ID 20230327121317.4081816-18-arnd@kernel.org
State New
Headers
Series dma-mapping: unify support for cache flushes |

Commit Message

Arnd Bergmann March 27, 2023, 12:13 p.m. UTC
  From: Arnd Bergmann <arnd@arndb.de>

The arm specific iommu code in dma-mapping.c uses the page+offset based
__dma_page_cpu_to_dev()/__dma_page_dev_to_cpu() helpers in place of the
phys_addr_t based arch_sync_dma_for_device()/arch_sync_dma_for_cpu()
wrappers around the.

In order to be able to move the latter part set of functions into
common code, change the iommu implementation to use them directly
and remove the internal ones as a separate interface.

As page+offset and phys_address are equivalent, but are used in
different parts of the code here, this allows removing some of
the conversion but adds them elsewhere.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mm/dma-mapping.c | 93 ++++++++++++++-------------------------
 1 file changed, 33 insertions(+), 60 deletions(-)
  

Comments

Linus Walleij March 31, 2023, 9:10 a.m. UTC | #1
On Mon, Mar 27, 2023 at 2:16 PM Arnd Bergmann <arnd@kernel.org> wrote:

> From: Arnd Bergmann <arnd@arndb.de>
>
> The arm specific iommu code in dma-mapping.c uses the page+offset based
> __dma_page_cpu_to_dev()/__dma_page_dev_to_cpu() helpers in place of the
> phys_addr_t based arch_sync_dma_for_device()/arch_sync_dma_for_cpu()
> wrappers around the.

Broken sentence?

> In order to be able to move the latter part set of functions into
> common code, change the iommu implementation to use them directly
> and remove the internal ones as a separate interface.
>
> As page+offset and phys_address are equivalent, but are used in
> different parts of the code here, this allows removing some of
> the conversion but adds them elsewhere.
>
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>

Looks good to me, took me some time to verify and understand
the open-coded version of PFN_UP() and this refactoring alone
makes the patch highly valuable.
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>

Yours,
Linus Walleij
  
Arnd Bergmann March 31, 2023, 12:48 p.m. UTC | #2
On Fri, Mar 31, 2023, at 11:10, Linus Walleij wrote:
> On Mon, Mar 27, 2023 at 2:16 PM Arnd Bergmann <arnd@kernel.org> wrote:
>
>> From: Arnd Bergmann <arnd@arndb.de>
>>
>> The arm specific iommu code in dma-mapping.c uses the page+offset based
>> __dma_page_cpu_to_dev()/__dma_page_dev_to_cpu() helpers in place of the
>> phys_addr_t based arch_sync_dma_for_device()/arch_sync_dma_for_cpu()
>> wrappers around the.
>
> Broken sentence?

I've changed s/the/them/ now, at least I think that's what I meant to
write in the first place.

>> In order to be able to move the latter part set of functions into
>> common code, change the iommu implementation to use them directly
>> and remove the internal ones as a separate interface.
>>
>> As page+offset and phys_address are equivalent, but are used in
>> different parts of the code here, this allows removing some of
>> the conversion but adds them elsewhere.
>>
>> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
>
> Looks good to me, took me some time to verify and understand
> the open-coded version of PFN_UP() and this refactoring alone
> makes the patch highly valuable.
> Reviewed-by: Linus Walleij <linus.walleij@linaro.org>

Thanks!

    ARnd
  

Patch

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 8bc01071474a..ce4b74f34a58 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -622,16 +622,14 @@  static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
 	kfree(buf);
 }
 
-static void dma_cache_maint_page(struct page *page, unsigned long offset,
+static void dma_cache_maint(phys_addr_t paddr,
 	size_t size, enum dma_data_direction dir,
 	void (*op)(const void *, size_t, int))
 {
-	unsigned long pfn;
+	unsigned long pfn = PFN_DOWN(paddr);
+	unsigned long offset = paddr % PAGE_SIZE;
 	size_t left = size;
 
-	pfn = page_to_pfn(page) + offset / PAGE_SIZE;
-	offset %= PAGE_SIZE;
-
 	/*
 	 * A single sg entry may refer to multiple physically contiguous
 	 * pages.  But we still need to process highmem pages individually.
@@ -641,8 +639,7 @@  static void dma_cache_maint_page(struct page *page, unsigned long offset,
 	do {
 		size_t len = left;
 		void *vaddr;
-
-		page = pfn_to_page(pfn);
+		struct page *page = pfn_to_page(pfn);
 
 		if (PageHighMem(page)) {
 			if (len + offset > PAGE_SIZE)
@@ -674,14 +671,11 @@  static void dma_cache_maint_page(struct page *page, unsigned long offset,
  * Note: Drivers should NOT use this function directly.
  * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
  */
-static void __dma_page_cpu_to_dev(struct page *page, unsigned long off,
-	size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
-	phys_addr_t paddr;
+	dma_cache_maint(paddr, size, dir, dmac_map_area);
 
-	dma_cache_maint_page(page, off, size, dir, dmac_map_area);
-
-	paddr = page_to_phys(page) + off;
 	if (dir == DMA_FROM_DEVICE) {
 		outer_inv_range(paddr, paddr + size);
 	} else {
@@ -690,34 +684,30 @@  static void __dma_page_cpu_to_dev(struct page *page, unsigned long off,
 	/* FIXME: non-speculating: flush on bidirectional mappings? */
 }
 
-static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
-	size_t size, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
 {
-	phys_addr_t paddr = page_to_phys(page) + off;
-
 	/* FIXME: non-speculating: not required */
 	/* in any case, don't bother invalidating if DMA to device */
 	if (dir != DMA_TO_DEVICE) {
 		outer_inv_range(paddr, paddr + size);
 
-		dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
+		dma_cache_maint(paddr, size, dir, dmac_unmap_area);
 	}
 
 	/*
 	 * Mark the D-cache clean for these pages to avoid extra flushing.
 	 */
 	if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) {
-		unsigned long pfn;
+		unsigned long pfn = PFN_UP(paddr);
+		unsigned long off = paddr & (PAGE_SIZE - 1);
 		size_t left = size;
 
-		pfn = page_to_pfn(page) + off / PAGE_SIZE;
-		off %= PAGE_SIZE;
-		if (off) {
-			pfn++;
+		if (off)
 			left -= PAGE_SIZE - off;
-		}
+
 		while (left >= PAGE_SIZE) {
-			page = pfn_to_page(pfn++);
+			struct page *page = pfn_to_page(pfn++);
 			set_bit(PG_dcache_clean, &page->flags);
 			left -= PAGE_SIZE;
 		}
@@ -1204,7 +1194,7 @@  static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
 		unsigned int len = PAGE_ALIGN(s->offset + s->length);
 
 		if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-			__dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
+			arch_sync_dma_for_device(phys + s->offset, s->length, dir);
 
 		prot = __dma_info_to_prot(dir, attrs);
 
@@ -1306,8 +1296,7 @@  static void arm_iommu_unmap_sg(struct device *dev,
 			__iommu_remove_mapping(dev, sg_dma_address(s),
 					       sg_dma_len(s));
 		if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-			__dma_page_dev_to_cpu(sg_page(s), s->offset,
-					      s->length, dir);
+			arch_sync_dma_for_cpu(sg_phys(s), s->length, dir);
 	}
 }
 
@@ -1329,7 +1318,7 @@  static void arm_iommu_sync_sg_for_cpu(struct device *dev,
 		return;
 
 	for_each_sg(sg, s, nents, i)
-		__dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir);
+		arch_sync_dma_for_cpu(sg_phys(s), s->length, dir);
 
 }
 
@@ -1351,7 +1340,8 @@  static void arm_iommu_sync_sg_for_device(struct device *dev,
 		return;
 
 	for_each_sg(sg, s, nents, i)
-		__dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
+		arch_sync_dma_for_device(page_to_phys(sg_page(s)) + s->offset,
+					 s->length, dir);
 }
 
 /**
@@ -1373,7 +1363,8 @@  static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
 	int ret, prot, len = PAGE_ALIGN(size + offset);
 
 	if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		__dma_page_cpu_to_dev(page, offset, size, dir);
+		arch_sync_dma_for_device(page_to_phys(page) + offset,
+					 size, dir);
 
 	dma_addr = __alloc_iova(mapping, len);
 	if (dma_addr == DMA_MAPPING_ERROR)
@@ -1406,7 +1397,7 @@  static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
 {
 	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
 	dma_addr_t iova = handle & PAGE_MASK;
-	struct page *page;
+	phys_addr_t phys;
 	int offset = handle & ~PAGE_MASK;
 	int len = PAGE_ALIGN(size + offset);
 
@@ -1414,8 +1405,8 @@  static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
 		return;
 
 	if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
-		page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
-		__dma_page_dev_to_cpu(page, offset, size, dir);
+		phys = iommu_iova_to_phys(mapping->domain, handle);
+		arch_sync_dma_for_cpu(phys, size, dir);
 	}
 
 	iommu_unmap(mapping->domain, iova, len);
@@ -1483,30 +1474,26 @@  static void arm_iommu_sync_single_for_cpu(struct device *dev,
 		dma_addr_t handle, size_t size, enum dma_data_direction dir)
 {
 	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
-	dma_addr_t iova = handle & PAGE_MASK;
-	struct page *page;
-	unsigned int offset = handle & ~PAGE_MASK;
+	phys_addr_t phys;
 
-	if (dev->dma_coherent || !iova)
+	if (dev->dma_coherent || !(handle & PAGE_MASK))
 		return;
 
-	page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
-	__dma_page_dev_to_cpu(page, offset, size, dir);
+	phys = iommu_iova_to_phys(mapping->domain, handle);
+	arch_sync_dma_for_cpu(phys, size, dir);
 }
 
 static void arm_iommu_sync_single_for_device(struct device *dev,
 		dma_addr_t handle, size_t size, enum dma_data_direction dir)
 {
 	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
-	dma_addr_t iova = handle & PAGE_MASK;
-	struct page *page;
-	unsigned int offset = handle & ~PAGE_MASK;
+	phys_addr_t phys;
 
-	if (dev->dma_coherent || !iova)
+	if (dev->dma_coherent || !(handle & PAGE_MASK))
 		return;
 
-	page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
-	__dma_page_cpu_to_dev(page, offset, size, dir);
+	phys = iommu_iova_to_phys(mapping->domain, handle);
+	arch_sync_dma_for_device(phys, size, dir);
 }
 
 static const struct dma_map_ops iommu_ops = {
@@ -1789,20 +1776,6 @@  void arch_teardown_dma_ops(struct device *dev)
 	set_dma_ops(dev, NULL);
 }
 
-void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
-{
-	__dma_page_cpu_to_dev(phys_to_page(paddr), paddr & (PAGE_SIZE - 1),
-			      size, dir);
-}
-
-void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
-{
-	__dma_page_dev_to_cpu(phys_to_page(paddr), paddr & (PAGE_SIZE - 1),
-			      size, dir);
-}
-
 void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		gfp_t gfp, unsigned long attrs)
 {