[RFC] dma-mapping: introduce dma_can_skip_unmap()

Message ID 20240301071918.64631-1-xuanzhuo@linux.alibaba.com
State New
Headers
Series [RFC] dma-mapping: introduce dma_can_skip_unmap() |

Commit Message

Xuan Zhuo March 1, 2024, 7:19 a.m. UTC
  In a typical workflow, we first perform a dma map on an address to
obtain a dma address, followed by a dma unmap on that address. Generally,
this process works without issues. However, under certain circumstances,
we require additional resources to manage these dma addresses. For
instance, in layered architectures, we pass the dma address to another
module, but retrieving it back from that module can present some
challenges. In such cases, we must allocate extra resources to manage
these dma addresses.

However, considering that many times the dma unmap operation is actually
a no-op, if we know in advance that unmap is not necessary, we can save
on these extra management overheads. Moreover, we can directly skip the
dma unmap operation. This would be advantageous.

This tries to resolve the problem of patchset:

 http://lore.kernel.org/all/20240225032330-mutt-send-email-mst@kernel.org

For a single packet, virtio-net may submit 1-19 dma addresses to virtio
core. If the virtio-net maintains the dma addresses will waste too much
memory when the unmap is not necessary. If the virtio-net retrieves the
dma addresses of the packet from the virtio core, we need to hold the 19
dma addresses by one call. And the net drivers maintain the dma is the
future. So we hope to check the unmap is necessary or not.

Co-developed-by: Zelin Deng <zelin.deng@linux.alibaba.com>
Signed-off-by: Zelin Deng <zelin.deng@linux.alibaba.com>
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/iommu/dma-iommu.c   | 11 +++++++++++
 include/linux/dma-map-ops.h |  1 +
 include/linux/dma-mapping.h |  5 +++++
 kernel/dma/mapping.c        | 23 +++++++++++++++++++++++
 4 files changed, 40 insertions(+)
  

Comments

Robin Murphy March 1, 2024, 11:38 a.m. UTC | #1
On 2024-03-01 7:19 am, Xuan Zhuo wrote:
> In a typical workflow, we first perform a dma map on an address to
> obtain a dma address, followed by a dma unmap on that address. Generally,
> this process works without issues. However, under certain circumstances,
> we require additional resources to manage these dma addresses. For
> instance, in layered architectures, we pass the dma address to another
> module, but retrieving it back from that module can present some
> challenges. In such cases, we must allocate extra resources to manage
> these dma addresses.
> 
> However, considering that many times the dma unmap operation is actually
> a no-op, if we know in advance that unmap is not necessary, we can save
> on these extra management overheads. Moreover, we can directly skip the
> dma unmap operation. This would be advantageous.
> 
> This tries to resolve the problem of patchset:
> 
>   http://lore.kernel.org/all/20240225032330-mutt-send-email-mst@kernel.org
> 
> For a single packet, virtio-net may submit 1-19 dma addresses to virtio
> core. If the virtio-net maintains the dma addresses will waste too much
> memory when the unmap is not necessary. If the virtio-net retrieves the
> dma addresses of the packet from the virtio core, we need to hold the 19
> dma addresses by one call. And the net drivers maintain the dma is the
> future. So we hope to check the unmap is necessary or not.
> 
> Co-developed-by: Zelin Deng <zelin.deng@linux.alibaba.com>
> Signed-off-by: Zelin Deng <zelin.deng@linux.alibaba.com>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>   drivers/iommu/dma-iommu.c   | 11 +++++++++++
>   include/linux/dma-map-ops.h |  1 +
>   include/linux/dma-mapping.h |  5 +++++
>   kernel/dma/mapping.c        | 23 +++++++++++++++++++++++
>   4 files changed, 40 insertions(+)
> 
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 50ccc4f1ef81..8c661a0e1111 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -1706,6 +1706,16 @@ static size_t iommu_dma_opt_mapping_size(void)
>   	return iova_rcache_range();
>   }
>   
> +static bool iommu_dma_opt_can_skip_unmap(struct device *dev)
> +{
> +	struct iommu_domain *domain = iommu_get_dma_domain(dev);
> +
> +	if (domain->type == IOMMU_DOMAIN_IDENTITY)

This is nonsense; iommu-dma does not operate on identity domains in the 
first place.

> +		return true;
> +	else
> +		return false;
> +}
> +
>   static const struct dma_map_ops iommu_dma_ops = {
>   	.flags			= DMA_F_PCI_P2PDMA_SUPPORTED,
>   	.alloc			= iommu_dma_alloc,
> @@ -1728,6 +1738,7 @@ static const struct dma_map_ops iommu_dma_ops = {
>   	.unmap_resource		= iommu_dma_unmap_resource,
>   	.get_merge_boundary	= iommu_dma_get_merge_boundary,
>   	.opt_mapping_size	= iommu_dma_opt_mapping_size,
> +	.dma_can_skip_unmap	= iommu_dma_opt_can_skip_unmap,
>   };
>   
>   /*
> diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
> index 4abc60f04209..d508fa90bc06 100644
> --- a/include/linux/dma-map-ops.h
> +++ b/include/linux/dma-map-ops.h
> @@ -83,6 +83,7 @@ struct dma_map_ops {
>   	size_t (*max_mapping_size)(struct device *dev);
>   	size_t (*opt_mapping_size)(void);
>   	unsigned long (*get_merge_boundary)(struct device *dev);
> +	bool (*dma_can_skip_unmap)(struct device *dev);
>   };
>   
>   #ifdef CONFIG_DMA_OPS
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index 4a658de44ee9..af5d9275f8cc 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -140,6 +140,7 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
>   		void *cpu_addr, dma_addr_t dma_addr, size_t size,
>   		unsigned long attrs);
>   bool dma_can_mmap(struct device *dev);
> +bool dma_can_skip_unmap(struct device *dev);
>   bool dma_pci_p2pdma_supported(struct device *dev);
>   int dma_set_mask(struct device *dev, u64 mask);
>   int dma_set_coherent_mask(struct device *dev, u64 mask);
> @@ -249,6 +250,10 @@ static inline bool dma_can_mmap(struct device *dev)
>   {
>   	return false;
>   }
> +static inline bool dma_can_skip_unmap(struct device *dev)
> +{
> +	return false;
> +}
>   static inline bool dma_pci_p2pdma_supported(struct device *dev)
>   {
>   	return false;
> diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
> index 58db8fd70471..99a81932820b 100644
> --- a/kernel/dma/mapping.c
> +++ b/kernel/dma/mapping.c
> @@ -445,6 +445,29 @@ bool dma_can_mmap(struct device *dev)
>   }
>   EXPORT_SYMBOL_GPL(dma_can_mmap);
>   
> +/**
> + * dma_can_skip_unmap - check if unmap can be skipped
> + * @dev: device to check
> + *
> + * Returns %true if @dev supports direct map or dma_can_skip_unmap() return true.
> + */
> +bool dma_can_skip_unmap(struct device *dev)
> +{
> +	const struct dma_map_ops *ops = get_dma_ops(dev);
> +
> +	if (is_swiotlb_force_bounce(dev))
> +		return false;
> +
> +	if (dma_map_direct(dev, ops))
> +		return true;

And this is also broken and nonsensical. What about non-coherent cache 
maintenance, or regular non-forced SWIOTLB bouncing due to per-mapping 
address limitations or buffer alignment, or dma-debug?

Not only is this idea not viable, the entire premise seems flawed - the 
reasons for virtio needing to use the DMA API at all are highly likely 
to be the same reasons for it needing to use the DMA API *properly* anyway.

Thanks,
Robin.

> +
> +	if (ops->dma_can_skip_unmap)
> +		return ops->dma_can_skip_unmap(dev);
> +
> +	return false;
> +}
> +EXPORT_SYMBOL_GPL(dma_can_skip_unmap);
> +
>   /**
>    * dma_mmap_attrs - map a coherent DMA allocation into user space
>    * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
  
Michael S. Tsirkin March 1, 2024, 11:50 a.m. UTC | #2
On Fri, Mar 01, 2024 at 11:38:25AM +0000, Robin Murphy wrote:
> Not only is this idea not viable, the entire premise seems flawed - the
> reasons for virtio needing to use the DMA API at all are highly likely to be
> the same reasons for it needing to use the DMA API *properly* anyway.

The idea has nothing to do with virtio per se - we are likely not the
only driver that wastes a lot of memory (hot in cache, too) keeping DMA
addresses around for the sole purpose of calling DMA unmap.  On a bunch
of systems unmap is always a nop and we could save some memory if there
was a way to find out. What is proposed is an API extension allowing
that for anyone - not just virtio.
  
Robin Murphy March 1, 2024, 12:42 p.m. UTC | #3
On 2024-03-01 11:50 am, Michael S. Tsirkin wrote:
> On Fri, Mar 01, 2024 at 11:38:25AM +0000, Robin Murphy wrote:
>> Not only is this idea not viable, the entire premise seems flawed - the
>> reasons for virtio needing to use the DMA API at all are highly likely to be
>> the same reasons for it needing to use the DMA API *properly* anyway.
> 
> The idea has nothing to do with virtio per se

Sure, I can see that, but if virtio is presented as the justification 
for doing this then it's the justification I'm going to look at first. 
And the fact is that it *does* seem to have particular significance, 
since having up to 19 DMA addresses involved in a single transfer is 
very much an outlier compared to typical hardware drivers. Furthermore 
the fact that DMA API support was retrofitted to the established virtio 
design means I would always expect it to run up against more challenges 
than a hardware driver designed around the expectation that DMA buffers 
have DMA addresses.

> - we are likely not the
> only driver that wastes a lot of memory (hot in cache, too) keeping DMA
> addresses around for the sole purpose of calling DMA unmap.  On a bunch
> of systems unmap is always a nop and we could save some memory if there
> was a way to find out. What is proposed is an API extension allowing
> that for anyone - not just virtio.

And the point I'm making is that that "always" is a big assumption, and 
in fact for the situations where it is robustly true we already have the 
DEFINE_DMA_UNMAP_{ADDR,LEN} mechanism. I'd consider it rare for DMA 
addresses to be stored in isolation, as opposed to being part of some 
kind of buffer descriptor (or indeed struct scatterlist, for an obvious 
example) that a driver or subsystem still has to keep track of anyway, 
so in general I believe the scope for saving decidedly small amounts of 
memory at runtime is also considerably less than you might be imagining.

Thanks,
Robin.
  
Michael S. Tsirkin March 1, 2024, 1:41 p.m. UTC | #4
On Fri, Mar 01, 2024 at 12:42:39PM +0000, Robin Murphy wrote:
> On 2024-03-01 11:50 am, Michael S. Tsirkin wrote:
> > On Fri, Mar 01, 2024 at 11:38:25AM +0000, Robin Murphy wrote:
> > > Not only is this idea not viable, the entire premise seems flawed - the
> > > reasons for virtio needing to use the DMA API at all are highly likely to be
> > > the same reasons for it needing to use the DMA API *properly* anyway.
> > 
> > The idea has nothing to do with virtio per se
> 
> Sure, I can see that, but if virtio is presented as the justification for
> doing this then it's the justification I'm going to look at first. And the
> fact is that it *does* seem to have particular significance, since having up
> to 19 DMA addresses involved in a single transfer is very much an outlier
> compared to typical hardware drivers.

That's a valid comment. Xuan Zhuo do other drivers do this too,
could you check pls?

> Furthermore the fact that DMA API
> support was retrofitted to the established virtio design means I would
> always expect it to run up against more challenges than a hardware driver
> designed around the expectation that DMA buffers have DMA addresses.


It seems virtio can't drive any DMA changes then it's forever tainted?
Seems unfair - we retrofitted it years ago, enough refactoring happened
since then.


> > - we are likely not the
> > only driver that wastes a lot of memory (hot in cache, too) keeping DMA
> > addresses around for the sole purpose of calling DMA unmap.  On a bunch
> > of systems unmap is always a nop and we could save some memory if there
> > was a way to find out. What is proposed is an API extension allowing
> > that for anyone - not just virtio.
> 
> And the point I'm making is that that "always" is a big assumption, and in
> fact for the situations where it is robustly true we already have the
> DEFINE_DMA_UNMAP_{ADDR,LEN} mechanism.
> I'd consider it rare for DMA
> addresses to be stored in isolation, as opposed to being part of some kind
> of buffer descriptor (or indeed struct scatterlist, for an obvious example)
> that a driver or subsystem still has to keep track of anyway, so in general
> I believe the scope for saving decidedly small amounts of memory at runtime
> is also considerably less than you might be imagining.
> 
> Thanks,
> Robin.


Yes. DEFINE_DMA_UNMAP_ exits but that's only compile time.
And I think the fact we have that mechanism is a hint that
enough configurations could benefit from a runtime
mechanism, too.

E.g. since you mentioned scatterlist, it has a bunch of ifdefs
in place.

Of course
- finding more examples would be benefitial to help maintainers
  do the cost/benefit analysis
- a robust implementation is needed
  
Robin Murphy March 1, 2024, 6:04 p.m. UTC | #5
On 2024-03-01 1:41 pm, Michael S. Tsirkin wrote:
> On Fri, Mar 01, 2024 at 12:42:39PM +0000, Robin Murphy wrote:
>> On 2024-03-01 11:50 am, Michael S. Tsirkin wrote:
>>> On Fri, Mar 01, 2024 at 11:38:25AM +0000, Robin Murphy wrote:
>>>> Not only is this idea not viable, the entire premise seems flawed - the
>>>> reasons for virtio needing to use the DMA API at all are highly likely to be
>>>> the same reasons for it needing to use the DMA API *properly* anyway.
>>>
>>> The idea has nothing to do with virtio per se
>>
>> Sure, I can see that, but if virtio is presented as the justification for
>> doing this then it's the justification I'm going to look at first. And the
>> fact is that it *does* seem to have particular significance, since having up
>> to 19 DMA addresses involved in a single transfer is very much an outlier
>> compared to typical hardware drivers.
> 
> That's a valid comment. Xuan Zhuo do other drivers do this too,
> could you check pls?
> 
>> Furthermore the fact that DMA API
>> support was retrofitted to the established virtio design means I would
>> always expect it to run up against more challenges than a hardware driver
>> designed around the expectation that DMA buffers have DMA addresses.
> 
> 
> It seems virtio can't drive any DMA changes then it's forever tainted?
> Seems unfair - we retrofitted it years ago, enough refactoring happened
> since then.

No, I'm not saying we couldn't still do things to help virtio if and 
when it does prove reasonable to do so; just that if anything it's 
*because* that retrofit is mature and fairly well polished by now that 
any remaining issues like this one are going to be found in the most 
awkward corners and thus unlikely to generalise.

FWIW in my experience it seems more common for network drivers to 
actually have the opposite problem, where knowing the DMA address of a 
buffer is easy, but keeping track of the corresponding CPU address can 
be more of a pain.

>>> - we are likely not the
>>> only driver that wastes a lot of memory (hot in cache, too) keeping DMA
>>> addresses around for the sole purpose of calling DMA unmap.  On a bunch
>>> of systems unmap is always a nop and we could save some memory if there
>>> was a way to find out. What is proposed is an API extension allowing
>>> that for anyone - not just virtio.
>>
>> And the point I'm making is that that "always" is a big assumption, and in
>> fact for the situations where it is robustly true we already have the
>> DEFINE_DMA_UNMAP_{ADDR,LEN} mechanism.
>> I'd consider it rare for DMA
>> addresses to be stored in isolation, as opposed to being part of some kind
>> of buffer descriptor (or indeed struct scatterlist, for an obvious example)
>> that a driver or subsystem still has to keep track of anyway, so in general
>> I believe the scope for saving decidedly small amounts of memory at runtime
>> is also considerably less than you might be imagining.
>>
>> Thanks,
>> Robin.
> 
> 
> Yes. DEFINE_DMA_UNMAP_ exits but that's only compile time.
> And I think the fact we have that mechanism is a hint that
> enough configurations could benefit from a runtime
> mechanism, too.
> 
> E.g. since you mentioned scatterlist, it has a bunch of ifdefs
> in place.

But what could that benefit be in general? It's not like we can change 
structure layouts on a per-DMA-mapping-call basis to save 
already-allocated memory... :/

Thanks,
Robin.

> 
> Of course
> - finding more examples would be benefitial to help maintainers
>    do the cost/benefit analysis
> - a robust implementation is needed
> 
>
  
Michael S. Tsirkin March 2, 2024, 9:58 a.m. UTC | #6
On Fri, Mar 01, 2024 at 06:04:10PM +0000, Robin Murphy wrote:
> On 2024-03-01 1:41 pm, Michael S. Tsirkin wrote:
> > On Fri, Mar 01, 2024 at 12:42:39PM +0000, Robin Murphy wrote:
> > > On 2024-03-01 11:50 am, Michael S. Tsirkin wrote:
> > > > On Fri, Mar 01, 2024 at 11:38:25AM +0000, Robin Murphy wrote:
> > > > > Not only is this idea not viable, the entire premise seems flawed - the
> > > > > reasons for virtio needing to use the DMA API at all are highly likely to be
> > > > > the same reasons for it needing to use the DMA API *properly* anyway.
> > > > 
> > > > The idea has nothing to do with virtio per se
> > > 
> > > Sure, I can see that, but if virtio is presented as the justification for
> > > doing this then it's the justification I'm going to look at first. And the
> > > fact is that it *does* seem to have particular significance, since having up
> > > to 19 DMA addresses involved in a single transfer is very much an outlier
> > > compared to typical hardware drivers.
> > 
> > That's a valid comment. Xuan Zhuo do other drivers do this too,
> > could you check pls?
> > 
> > > Furthermore the fact that DMA API
> > > support was retrofitted to the established virtio design means I would
> > > always expect it to run up against more challenges than a hardware driver
> > > designed around the expectation that DMA buffers have DMA addresses.
> > 
> > 
> > It seems virtio can't drive any DMA changes then it's forever tainted?
> > Seems unfair - we retrofitted it years ago, enough refactoring happened
> > since then.
> 
> No, I'm not saying we couldn't still do things to help virtio if and when it
> does prove reasonable to do so; just that if anything it's *because* that
> retrofit is mature and fairly well polished by now that any remaining issues
> like this one are going to be found in the most awkward corners and thus
> unlikely to generalise.
> 
> FWIW in my experience it seems more common for network drivers to actually
> have the opposite problem, where knowing the DMA address of a buffer is
> easy, but keeping track of the corresponding CPU address can be more of a
> pain.
> 
> > > > - we are likely not the
> > > > only driver that wastes a lot of memory (hot in cache, too) keeping DMA
> > > > addresses around for the sole purpose of calling DMA unmap.  On a bunch
> > > > of systems unmap is always a nop and we could save some memory if there
> > > > was a way to find out. What is proposed is an API extension allowing
> > > > that for anyone - not just virtio.
> > > 
> > > And the point I'm making is that that "always" is a big assumption, and in
> > > fact for the situations where it is robustly true we already have the
> > > DEFINE_DMA_UNMAP_{ADDR,LEN} mechanism.
> > > I'd consider it rare for DMA
> > > addresses to be stored in isolation, as opposed to being part of some kind
> > > of buffer descriptor (or indeed struct scatterlist, for an obvious example)
> > > that a driver or subsystem still has to keep track of anyway, so in general
> > > I believe the scope for saving decidedly small amounts of memory at runtime
> > > is also considerably less than you might be imagining.
> > > 
> > > Thanks,
> > > Robin.
> > 
> > 
> > Yes. DEFINE_DMA_UNMAP_ exits but that's only compile time.
> > And I think the fact we have that mechanism is a hint that
> > enough configurations could benefit from a runtime
> > mechanism, too.
> > 
> > E.g. since you mentioned scatterlist, it has a bunch of ifdefs
> > in place.
> 
> But what could that benefit be in general? It's not like we can change
> structure layouts on a per-DMA-mapping-call basis to save already-allocated
> memory... :/
> 
> Thanks,
> Robin.

This is all speculation, but maybe e.g. by not writing into a cache line
we can reduce pressure on the cache.  Some other code and/or structure
changes might or might not become benefitial.


> > 
> > Of course
> > - finding more examples would be benefitial to help maintainers
> >    do the cost/benefit analysis
> > - a robust implementation is needed
> > 
> >
  
Xuan Zhuo March 4, 2024, 6:19 a.m. UTC | #7
On Fri, 1 Mar 2024 08:41:50 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Fri, Mar 01, 2024 at 12:42:39PM +0000, Robin Murphy wrote:
> > On 2024-03-01 11:50 am, Michael S. Tsirkin wrote:
> > > On Fri, Mar 01, 2024 at 11:38:25AM +0000, Robin Murphy wrote:
> > > > Not only is this idea not viable, the entire premise seems flawed - the
> > > > reasons for virtio needing to use the DMA API at all are highly likely to be
> > > > the same reasons for it needing to use the DMA API *properly* anyway.
> > >
> > > The idea has nothing to do with virtio per se
> >
> > Sure, I can see that, but if virtio is presented as the justification for
> > doing this then it's the justification I'm going to look at first. And the
> > fact is that it *does* seem to have particular significance, since having up
> > to 19 DMA addresses involved in a single transfer is very much an outlier
> > compared to typical hardware drivers.
>
> That's a valid comment. Xuan Zhuo do other drivers do this too,
> could you check pls?

I checked some drivers(gve, mlx, ice), no one has the same operation like
virtio-net. We can do it because we have indirect feature.

Thanks.



>
> > Furthermore the fact that DMA API
> > support was retrofitted to the established virtio design means I would
> > always expect it to run up against more challenges than a hardware driver
> > designed around the expectation that DMA buffers have DMA addresses.
>
>
> It seems virtio can't drive any DMA changes then it's forever tainted?
> Seems unfair - we retrofitted it years ago, enough refactoring happened
> since then.
>
>
> > > - we are likely not the
> > > only driver that wastes a lot of memory (hot in cache, too) keeping DMA
> > > addresses around for the sole purpose of calling DMA unmap.  On a bunch
> > > of systems unmap is always a nop and we could save some memory if there
> > > was a way to find out. What is proposed is an API extension allowing
> > > that for anyone - not just virtio.
> >
> > And the point I'm making is that that "always" is a big assumption, and in
> > fact for the situations where it is robustly true we already have the
> > DEFINE_DMA_UNMAP_{ADDR,LEN} mechanism.
> > I'd consider it rare for DMA
> > addresses to be stored in isolation, as opposed to being part of some kind
> > of buffer descriptor (or indeed struct scatterlist, for an obvious example)
> > that a driver or subsystem still has to keep track of anyway, so in general
> > I believe the scope for saving decidedly small amounts of memory at runtime
> > is also considerably less than you might be imagining.
> >
> > Thanks,
> > Robin.
>
>
> Yes. DEFINE_DMA_UNMAP_ exits but that's only compile time.
> And I think the fact we have that mechanism is a hint that
> enough configurations could benefit from a runtime
> mechanism, too.
>
> E.g. since you mentioned scatterlist, it has a bunch of ifdefs
> in place.
>
> Of course
> - finding more examples would be benefitial to help maintainers
>   do the cost/benefit analysis
> - a robust implementation is needed
>
>
> --
> MST
>
  
Xuan Zhuo March 4, 2024, 6:28 a.m. UTC | #8
On Fri, 1 Mar 2024 18:04:10 +0000, Robin Murphy <robin.murphy@arm.com> wrote:
> On 2024-03-01 1:41 pm, Michael S. Tsirkin wrote:
> > On Fri, Mar 01, 2024 at 12:42:39PM +0000, Robin Murphy wrote:
> >> On 2024-03-01 11:50 am, Michael S. Tsirkin wrote:
> >>> On Fri, Mar 01, 2024 at 11:38:25AM +0000, Robin Murphy wrote:
> >>>> Not only is this idea not viable, the entire premise seems flawed - the
> >>>> reasons for virtio needing to use the DMA API at all are highly likely to be
> >>>> the same reasons for it needing to use the DMA API *properly* anyway.
> >>>
> >>> The idea has nothing to do with virtio per se
> >>
> >> Sure, I can see that, but if virtio is presented as the justification for
> >> doing this then it's the justification I'm going to look at first. And the
> >> fact is that it *does* seem to have particular significance, since having up
> >> to 19 DMA addresses involved in a single transfer is very much an outlier
> >> compared to typical hardware drivers.
> >
> > That's a valid comment. Xuan Zhuo do other drivers do this too,
> > could you check pls?
> >
> >> Furthermore the fact that DMA API
> >> support was retrofitted to the established virtio design means I would
> >> always expect it to run up against more challenges than a hardware driver
> >> designed around the expectation that DMA buffers have DMA addresses.
> >
> >
> > It seems virtio can't drive any DMA changes then it's forever tainted?
> > Seems unfair - we retrofitted it years ago, enough refactoring happened
> > since then.
>
> No, I'm not saying we couldn't still do things to help virtio if and
> when it does prove reasonable to do so; just that if anything it's
> *because* that retrofit is mature and fairly well polished by now that
> any remaining issues like this one are going to be found in the most
> awkward corners and thus unlikely to generalise.
>
> FWIW in my experience it seems more common for network drivers to
> actually have the opposite problem, where knowing the DMA address of a
> buffer is easy, but keeping track of the corresponding CPU address can
> be more of a pain.
>
> >>> - we are likely not the
> >>> only driver that wastes a lot of memory (hot in cache, too) keeping DMA
> >>> addresses around for the sole purpose of calling DMA unmap.  On a bunch
> >>> of systems unmap is always a nop and we could save some memory if there
> >>> was a way to find out. What is proposed is an API extension allowing
> >>> that for anyone - not just virtio.
> >>
> >> And the point I'm making is that that "always" is a big assumption, and in
> >> fact for the situations where it is robustly true we already have the
> >> DEFINE_DMA_UNMAP_{ADDR,LEN} mechanism.
> >> I'd consider it rare for DMA
> >> addresses to be stored in isolation, as opposed to being part of some kind
> >> of buffer descriptor (or indeed struct scatterlist, for an obvious example)
> >> that a driver or subsystem still has to keep track of anyway, so in general
> >> I believe the scope for saving decidedly small amounts of memory at runtime
> >> is also considerably less than you might be imagining.
> >>
> >> Thanks,
> >> Robin.
> >
> >
> > Yes. DEFINE_DMA_UNMAP_ exits but that's only compile time.
> > And I think the fact we have that mechanism is a hint that
> > enough configurations could benefit from a runtime
> > mechanism, too.
> >
> > E.g. since you mentioned scatterlist, it has a bunch of ifdefs
> > in place.
>
> But what could that benefit be in general? It's not like we can change
> structure layouts on a per-DMA-mapping-call basis to save
> already-allocated memory... :/

We can put the memory together. If the unmap is not needed, then we do not
allocate the memory. That is the way we are trying.

Thanks.



>
> Thanks,
> Robin.
>
> >
> > Of course
> > - finding more examples would be benefitial to help maintainers
> >    do the cost/benefit analysis
> > - a robust implementation is needed
> >
> >
  

Patch

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 50ccc4f1ef81..8c661a0e1111 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1706,6 +1706,16 @@  static size_t iommu_dma_opt_mapping_size(void)
 	return iova_rcache_range();
 }
 
+static bool iommu_dma_opt_can_skip_unmap(struct device *dev)
+{
+	struct iommu_domain *domain = iommu_get_dma_domain(dev);
+
+	if (domain->type == IOMMU_DOMAIN_IDENTITY)
+		return true;
+	else
+		return false;
+}
+
 static const struct dma_map_ops iommu_dma_ops = {
 	.flags			= DMA_F_PCI_P2PDMA_SUPPORTED,
 	.alloc			= iommu_dma_alloc,
@@ -1728,6 +1738,7 @@  static const struct dma_map_ops iommu_dma_ops = {
 	.unmap_resource		= iommu_dma_unmap_resource,
 	.get_merge_boundary	= iommu_dma_get_merge_boundary,
 	.opt_mapping_size	= iommu_dma_opt_mapping_size,
+	.dma_can_skip_unmap	= iommu_dma_opt_can_skip_unmap,
 };
 
 /*
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 4abc60f04209..d508fa90bc06 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -83,6 +83,7 @@  struct dma_map_ops {
 	size_t (*max_mapping_size)(struct device *dev);
 	size_t (*opt_mapping_size)(void);
 	unsigned long (*get_merge_boundary)(struct device *dev);
+	bool (*dma_can_skip_unmap)(struct device *dev);
 };
 
 #ifdef CONFIG_DMA_OPS
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 4a658de44ee9..af5d9275f8cc 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -140,6 +140,7 @@  int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
 		void *cpu_addr, dma_addr_t dma_addr, size_t size,
 		unsigned long attrs);
 bool dma_can_mmap(struct device *dev);
+bool dma_can_skip_unmap(struct device *dev);
 bool dma_pci_p2pdma_supported(struct device *dev);
 int dma_set_mask(struct device *dev, u64 mask);
 int dma_set_coherent_mask(struct device *dev, u64 mask);
@@ -249,6 +250,10 @@  static inline bool dma_can_mmap(struct device *dev)
 {
 	return false;
 }
+static inline bool dma_can_skip_unmap(struct device *dev)
+{
+	return false;
+}
 static inline bool dma_pci_p2pdma_supported(struct device *dev)
 {
 	return false;
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 58db8fd70471..99a81932820b 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -445,6 +445,29 @@  bool dma_can_mmap(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dma_can_mmap);
 
+/**
+ * dma_can_skip_unmap - check if unmap can be skipped
+ * @dev: device to check
+ *
+ * Returns %true if @dev supports direct map or dma_can_skip_unmap() return true.
+ */
+bool dma_can_skip_unmap(struct device *dev)
+{
+	const struct dma_map_ops *ops = get_dma_ops(dev);
+
+	if (is_swiotlb_force_bounce(dev))
+		return false;
+
+	if (dma_map_direct(dev, ops))
+		return true;
+
+	if (ops->dma_can_skip_unmap)
+		return ops->dma_can_skip_unmap(dev);
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(dma_can_skip_unmap);
+
 /**
  * dma_mmap_attrs - map a coherent DMA allocation into user space
  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices