[2/2] iommu/vt-d: Fix NULL domain on device release

Message ID 20240223051302.177596-3-baolu.lu@linux.intel.com
State New
Headers
Series iommu: Fix domain check on device release |

Commit Message

Baolu Lu Feb. 23, 2024, 5:13 a.m. UTC
  In the kdump kernel, the IOMMU operates in deferred_attach mode. In this
mode, info->domain may not yet be assigned by the time the release_device
function is called. It leads to the following crash in the crash kernel:

    BUG: kernel NULL pointer dereference, address: 000000000000003c
    ...
    RIP: 0010:do_raw_spin_lock+0xa/0xa0
    ...
    _raw_spin_lock_irqsave+0x1b/0x30
    intel_iommu_release_device+0x96/0x170
    iommu_deinit_device+0x39/0xf0
    __iommu_group_remove_device+0xa0/0xd0
    iommu_bus_notifier+0x55/0xb0
    notifier_call_chain+0x5a/0xd0
    blocking_notifier_call_chain+0x41/0x60
    bus_notify+0x34/0x50
    device_del+0x269/0x3d0
    pci_remove_bus_device+0x77/0x100
    p2sb_bar+0xae/0x1d0
    ...
    i801_probe+0x423/0x740

Use the release_domain mechanism to fix it.

Fixes: 586081d3f6b1 ("iommu/vt-d: Remove DEFER_DEVICE_DOMAIN_INFO")
Reported-by: Eric Badger <ebadger@purestorage.com>
Closes: https://lore.kernel.org/r/20240113181713.1817855-1-ebadger@purestorage.com
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 drivers/iommu/intel/iommu.c | 26 +-------------------------
 1 file changed, 1 insertion(+), 25 deletions(-)
  

Comments

Tian, Kevin Feb. 27, 2024, 7:40 a.m. UTC | #1
> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Friday, February 23, 2024 1:13 PM
> 
> -static void dmar_remove_one_dev_info(struct device *dev)
> -{
> -	struct device_domain_info *info = dev_iommu_priv_get(dev);
> -	struct dmar_domain *domain = info->domain;
> -	struct intel_iommu *iommu = info->iommu;
> -	unsigned long flags;
> -
> -	if (!dev_is_real_dma_subdevice(info->dev)) {
> -		if (dev_is_pci(info->dev) && sm_supported(iommu))
> -			intel_pasid_tear_down_entry(iommu, info->dev,
> -					IOMMU_NO_PASID, false);
> -
> -		iommu_disable_pci_caps(info);
> -		domain_context_clear(info);
> -	}
> -
> -	spin_lock_irqsave(&domain->lock, flags);
> -	list_del(&info->link);
> -	spin_unlock_irqrestore(&domain->lock, flags);
> -
> -	domain_detach_iommu(domain, iommu);
> -	info->domain = NULL;
> -}
> -

what's required here is slightly different from device_block_translation()
which leaves context entry uncleared in scalable mode (implying the
pasid table must be valid). but in the release path the pasid table will
be freed right after then leading to a use-after-free case.

let's add an explicit domain_context_clear() in intel_iommu_release_device().

with that,

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
  
Baolu Lu Feb. 28, 2024, 1:22 a.m. UTC | #2
On 2/27/24 3:40 PM, Tian, Kevin wrote:
>> From: Lu Baolu<baolu.lu@linux.intel.com>
>> Sent: Friday, February 23, 2024 1:13 PM
>>
>> -static void dmar_remove_one_dev_info(struct device *dev)
>> -{
>> -	struct device_domain_info *info = dev_iommu_priv_get(dev);
>> -	struct dmar_domain *domain = info->domain;
>> -	struct intel_iommu *iommu = info->iommu;
>> -	unsigned long flags;
>> -
>> -	if (!dev_is_real_dma_subdevice(info->dev)) {
>> -		if (dev_is_pci(info->dev) && sm_supported(iommu))
>> -			intel_pasid_tear_down_entry(iommu, info->dev,
>> -					IOMMU_NO_PASID, false);
>> -
>> -		iommu_disable_pci_caps(info);
>> -		domain_context_clear(info);
>> -	}
>> -
>> -	spin_lock_irqsave(&domain->lock, flags);
>> -	list_del(&info->link);
>> -	spin_unlock_irqrestore(&domain->lock, flags);
>> -
>> -	domain_detach_iommu(domain, iommu);
>> -	info->domain = NULL;
>> -}
>> -
> what's required here is slightly different from device_block_translation()
> which leaves context entry uncleared in scalable mode (implying the
> pasid table must be valid). but in the release path the pasid table will
> be freed right after then leading to a use-after-free case.
> 
> let's add an explicit domain_context_clear() in intel_iommu_release_device().

Nice catch!

How about moving the scalable mode context entry management to probe and
release path? Currently, it's part of domain switch, that's really
irrelevant.

Best regards,
baolu
  
Tian, Kevin Feb. 28, 2024, 3:08 a.m. UTC | #3
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Wednesday, February 28, 2024 9:23 AM
> 
> On 2/27/24 3:40 PM, Tian, Kevin wrote:
> >> From: Lu Baolu<baolu.lu@linux.intel.com>
> >> Sent: Friday, February 23, 2024 1:13 PM
> >>
> >> -static void dmar_remove_one_dev_info(struct device *dev)
> >> -{
> >> -	struct device_domain_info *info = dev_iommu_priv_get(dev);
> >> -	struct dmar_domain *domain = info->domain;
> >> -	struct intel_iommu *iommu = info->iommu;
> >> -	unsigned long flags;
> >> -
> >> -	if (!dev_is_real_dma_subdevice(info->dev)) {
> >> -		if (dev_is_pci(info->dev) && sm_supported(iommu))
> >> -			intel_pasid_tear_down_entry(iommu, info->dev,
> >> -					IOMMU_NO_PASID, false);
> >> -
> >> -		iommu_disable_pci_caps(info);
> >> -		domain_context_clear(info);
> >> -	}
> >> -
> >> -	spin_lock_irqsave(&domain->lock, flags);
> >> -	list_del(&info->link);
> >> -	spin_unlock_irqrestore(&domain->lock, flags);
> >> -
> >> -	domain_detach_iommu(domain, iommu);
> >> -	info->domain = NULL;
> >> -}
> >> -
> > what's required here is slightly different from device_block_translation()
> > which leaves context entry uncleared in scalable mode (implying the
> > pasid table must be valid). but in the release path the pasid table will
> > be freed right after then leading to a use-after-free case.
> >
> > let's add an explicit domain_context_clear() in
> intel_iommu_release_device().
> 
> Nice catch!
> 
> How about moving the scalable mode context entry management to probe
> and
> release path? Currently, it's part of domain switch, that's really
> irrelevant.
> 

sounds good.
  

Patch

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 61bb35046ea4..2c04ea90d22f 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3798,30 +3798,6 @@  static void domain_context_clear(struct device_domain_info *info)
 			       &domain_context_clear_one_cb, info);
 }
 
-static void dmar_remove_one_dev_info(struct device *dev)
-{
-	struct device_domain_info *info = dev_iommu_priv_get(dev);
-	struct dmar_domain *domain = info->domain;
-	struct intel_iommu *iommu = info->iommu;
-	unsigned long flags;
-
-	if (!dev_is_real_dma_subdevice(info->dev)) {
-		if (dev_is_pci(info->dev) && sm_supported(iommu))
-			intel_pasid_tear_down_entry(iommu, info->dev,
-					IOMMU_NO_PASID, false);
-
-		iommu_disable_pci_caps(info);
-		domain_context_clear(info);
-	}
-
-	spin_lock_irqsave(&domain->lock, flags);
-	list_del(&info->link);
-	spin_unlock_irqrestore(&domain->lock, flags);
-
-	domain_detach_iommu(domain, iommu);
-	info->domain = NULL;
-}
-
 /*
  * Clear the page table pointer in context or pasid table entries so that
  * all DMA requests without PASID from the device are blocked. If the page
@@ -4348,7 +4324,6 @@  static void intel_iommu_release_device(struct device *dev)
 {
 	struct device_domain_info *info = dev_iommu_priv_get(dev);
 
-	dmar_remove_one_dev_info(dev);
 	intel_pasid_free_table(dev);
 	intel_iommu_debugfs_remove_dev(info);
 	kfree(info);
@@ -4839,6 +4814,7 @@  static const struct iommu_dirty_ops intel_dirty_ops = {
 
 const struct iommu_ops intel_iommu_ops = {
 	.blocked_domain		= &blocking_domain,
+	.release_domain		= &blocking_domain,
 	.capable		= intel_iommu_capable,
 	.hw_info		= intel_iommu_hw_info,
 	.domain_alloc		= intel_iommu_domain_alloc,