From 58c610bd1a3f50820e45a7c09ec0e44d2cda15dd Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 18 Mar 2009 15:33:05 +0800 Subject: intel-iommu: Snooping control support Snooping control enabled IOMMU to guarantee DMA cache coherency and thus reduce software effort (VMM) in maintaining effective memory type. Signed-off-by: Sheng Yang Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index f3f6865..be999ff 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -231,6 +231,7 @@ struct dmar_domain { int flags; /* flags to find out type of domain */ int iommu_coherency;/* indicate coherency of iommu access */ + int iommu_snooping; /* indicate snooping control feature*/ int iommu_count; /* reference count of iommu */ spinlock_t iommu_lock; /* protect iommu set in domain */ u64 max_addr; /* maximum mapped address */ @@ -421,7 +422,6 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) return g_iommus[iommu_id]; } -/* "Coherency" capability may be different across iommus */ static void domain_update_iommu_coherency(struct dmar_domain *domain) { int i; @@ -438,6 +438,29 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain) } } +static void domain_update_iommu_snooping(struct dmar_domain *domain) +{ + int i; + + domain->iommu_snooping = 1; + + i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); + for (; i < g_num_of_iommus; ) { + if (!ecap_sc_support(g_iommus[i]->ecap)) { + domain->iommu_snooping = 0; + break; + } + i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1); + } +} + +/* Some capabilities may be different across iommus */ +static void domain_update_iommu_cap(struct dmar_domain *domain) +{ + domain_update_iommu_coherency(domain); + domain_update_iommu_snooping(domain); +} + static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn) { struct dmar_drhd_unit *drhd = NULL; @@ -1429,6 +1452,11 @@ static int domain_init(struct dmar_domain *domain, int guest_width) else domain->iommu_coherency = 0; + if (ecap_sc_support(iommu->ecap)) + domain->iommu_snooping = 1; + else + domain->iommu_snooping = 0; + domain->iommu_count = 1; /* always allocate the top pgd */ @@ -1557,7 +1585,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, spin_lock_irqsave(&domain->iommu_lock, flags); if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) { domain->iommu_count++; - domain_update_iommu_coherency(domain); + domain_update_iommu_cap(domain); } spin_unlock_irqrestore(&domain->iommu_lock, flags); return 0; @@ -2820,7 +2848,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, spin_lock_irqsave(&domain->iommu_lock, tmp_flags); clear_bit(iommu->seq_id, &domain->iommu_bmp); domain->iommu_count--; - domain_update_iommu_coherency(domain); + domain_update_iommu_cap(domain); spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); } @@ -2848,13 +2876,13 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) iommu_detach_dev(iommu, info->bus, info->devfn); /* clear this iommu in iommu_bmp, update iommu count - * and coherency + * and capabilities */ spin_lock_irqsave(&domain->iommu_lock, flags2); if (test_and_clear_bit(iommu->seq_id, &domain->iommu_bmp)) { domain->iommu_count--; - domain_update_iommu_coherency(domain); + domain_update_iommu_cap(domain); } spin_unlock_irqrestore(&domain->iommu_lock, flags2); -- cgit v1.1 From dbb9fd8630e95b6155aff658a2b5f80e95ca2bc6 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 18 Mar 2009 15:33:06 +0800 Subject: iommu: Add domain_has_cap iommu_ops This iommu_op can tell if domain have a specific capability, like snooping control for Intel IOMMU, which can be used by other components of kernel to adjust the behaviour. Signed-off-by: Sheng Yang Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index be999ff..3778ab1 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3158,6 +3158,17 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, return phys; } +static int intel_iommu_domain_has_cap(struct iommu_domain *domain, + unsigned long cap) +{ + struct dmar_domain *dmar_domain = domain->priv; + + if (cap == IOMMU_CAP_CACHE_COHERENCY) + return dmar_domain->iommu_snooping; + + return 0; +} + static struct iommu_ops intel_iommu_ops = { .domain_init = intel_iommu_domain_init, .domain_destroy = intel_iommu_domain_destroy, @@ -3166,6 +3177,7 @@ static struct iommu_ops intel_iommu_ops = { .map = intel_iommu_map_range, .unmap = intel_iommu_unmap_range, .iova_to_phys = intel_iommu_iova_to_phys, + .domain_has_cap = intel_iommu_domain_has_cap, }; static void __devinit quirk_iommu_rwbf(struct pci_dev *dev) -- cgit v1.1 From 9cf0669746be19a4906a6c48920060bcf54c708b Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 18 Mar 2009 15:33:07 +0800 Subject: intel-iommu: VT-d page table to support snooping control bit The user can request to enable snooping control through VT-d page table. Signed-off-by: Sheng Yang Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 3778ab1..a0ba568 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -164,7 +164,8 @@ static inline void context_clear_entry(struct context_entry *context) * 1: writable * 2-6: reserved * 7: super page - * 8-11: available + * 8-10: available + * 11: snoop behavior * 12-63: Host physcial address */ struct dma_pte { @@ -186,6 +187,11 @@ static inline void dma_set_pte_writable(struct dma_pte *pte) pte->val |= DMA_PTE_WRITE; } +static inline void dma_set_pte_snp(struct dma_pte *pte) +{ + pte->val |= DMA_PTE_SNP; +} + static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot) { pte->val = (pte->val & ~3) | (prot & 3); @@ -1685,6 +1691,8 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, BUG_ON(dma_pte_addr(pte)); dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); dma_set_pte_prot(pte, prot); + if (prot & DMA_PTE_SNP) + dma_set_pte_snp(pte); domain_flush_cache(domain, pte, sizeof(*pte)); start_pfn++; index++; @@ -3105,6 +3113,8 @@ static int intel_iommu_map_range(struct iommu_domain *domain, prot |= DMA_PTE_READ; if (iommu_prot & IOMMU_WRITE) prot |= DMA_PTE_WRITE; + if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) + prot |= DMA_PTE_SNP; max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); if (dmar_domain->max_addr < max_addr) { -- cgit v1.1 From 3199aa6bc8766e17b8f60820c4f78d59c25fce0e Mon Sep 17 00:00:00 2001 From: "Han, Weidong" Date: Thu, 26 Feb 2009 17:31:12 +0800 Subject: intel-iommu: fix PCI device detach from virtual machine When assign a device behind conventional PCI bridge or PCIe to PCI/PCI-x bridge to a domain, it must assign its bridge and may also need to assign secondary interface to the same domain. Dependent assignment is already there, but dependent deassignment is missed when detach device from virtual machine. This results in conventional PCI device assignment failure after it has been assigned once. This patch addes dependent deassignment, and fixes the issue. Signed-off-by: Weidong Han Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index a0ba568..e541c3b 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2808,6 +2808,33 @@ static int vm_domain_add_dev_info(struct dmar_domain *domain, return 0; } +static void iommu_detach_dependent_devices(struct intel_iommu *iommu, + struct pci_dev *pdev) +{ + struct pci_dev *tmp, *parent; + + if (!iommu || !pdev) + return; + + /* dependent device detach */ + tmp = pci_find_upstream_pcie_bridge(pdev); + /* Secondary interface's bus number and devfn 0 */ + if (tmp) { + parent = pdev->bus->self; + while (parent != tmp) { + iommu_detach_dev(iommu, parent->bus->number, + parent->devfn); + parent = parent->bus->self; + } + if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ + iommu_detach_dev(iommu, + tmp->subordinate->number, 0); + else /* this is a legacy PCI bridge */ + iommu_detach_dev(iommu, + tmp->bus->number, tmp->devfn); + } +} + static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, struct pci_dev *pdev) { @@ -2833,6 +2860,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, spin_unlock_irqrestore(&device_domain_lock, flags); iommu_detach_dev(iommu, info->bus, info->devfn); + iommu_detach_dependent_devices(iommu, pdev); free_devinfo_mem(info); spin_lock_irqsave(&device_domain_lock, flags); @@ -2882,6 +2910,7 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) iommu = device_to_iommu(info->bus, info->devfn); iommu_detach_dev(iommu, info->bus, info->devfn); + iommu_detach_dependent_devices(iommu, info->dev); /* clear this iommu in iommu_bmp, update iommu count * and capabilities -- cgit v1.1 From 4cf2e75d0bec15d945972b005056c4a8731b82cf Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 11 Feb 2009 17:23:43 +0000 Subject: intel-iommu: Enable DMAR on 32-bit kernel. If we fix a few highmem-related thinkos and a couple of printk format warnings, the Intel IOMMU driver works fine in a 32-bit kernel. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index e541c3b..0c12d06 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2315,7 +2315,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, error: if (iova) __free_iova(&domain->iovad, iova); - printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n", + printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n", pci_name(pdev), size, (unsigned long long)paddr, dir); return 0; } @@ -2411,7 +2411,7 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size, start_addr = iova->pfn_lo << PAGE_SHIFT; size = aligned_size((u64)dev_addr, size); - pr_debug("Device %s unmapping: %lx@%llx\n", + pr_debug("Device %s unmapping: %zx@%llx\n", pci_name(pdev), size, (unsigned long long)start_addr); /* clear the whole page */ @@ -2469,8 +2469,6 @@ void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, free_pages((unsigned long)vaddr, order); } -#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg))) - void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, int dir) { @@ -2480,7 +2478,7 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, unsigned long start_addr; struct iova *iova; size_t size = 0; - void *addr; + phys_addr_t addr; struct scatterlist *sg; struct intel_iommu *iommu; @@ -2496,7 +2494,7 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, if (!iova) return; for_each_sg(sglist, sg, nelems, i) { - addr = SG_ENT_VIRT_ADDRESS(sg); + addr = page_to_phys(sg_page(sg)) + sg->offset; size += aligned_size((u64)addr, sg->length); } @@ -2523,7 +2521,7 @@ static int intel_nontranslate_map_sg(struct device *hddev, for_each_sg(sglist, sg, nelems, i) { BUG_ON(!sg_page(sg)); - sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg)); + sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset; sg->dma_length = sg->length; } return nelems; @@ -2532,7 +2530,7 @@ static int intel_nontranslate_map_sg(struct device *hddev, int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, int dir) { - void *addr; + phys_addr_t addr; int i; struct pci_dev *pdev = to_pci_dev(hwdev); struct dmar_domain *domain; @@ -2556,8 +2554,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, iommu = domain_get_iommu(domain); for_each_sg(sglist, sg, nelems, i) { - addr = SG_ENT_VIRT_ADDRESS(sg); - addr = (void *)virt_to_phys(addr); + addr = page_to_phys(sg_page(sg)) + sg->offset; size += aligned_size((u64)addr, sg->length); } @@ -2580,8 +2577,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, start_addr = iova->pfn_lo << PAGE_SHIFT; offset = 0; for_each_sg(sglist, sg, nelems, i) { - addr = SG_ENT_VIRT_ADDRESS(sg); - addr = (void *)virt_to_phys(addr); + addr = page_to_phys(sg_page(sg)) + sg->offset; size = aligned_size((u64)addr, sg->length); ret = domain_page_mapping(domain, start_addr + offset, ((u64)addr) & PAGE_MASK, -- cgit v1.1 From afeeb7cebbd223ffee303fd8de4ba97458b13581 Mon Sep 17 00:00:00 2001 From: "Zhao, Yu" Date: Fri, 13 Feb 2009 17:55:49 +0800 Subject: intel-iommu: Fix address wrap on 32-bit kernel. The problem is in dma_pte_clear_range and dma_pte_free_pagetable. When intel_unmap_single and intel_unmap_sg call them, the end address may be zero if the 'start_addr + size' rounds up. So no PTE gets cleared. The uncleared PTE fires the BUG_ON when it's used again to create new mappings. After I modified dma_pte_clear_range a bit, the BUG_ON is gone. Tested both 32 and 32 PAE modes on Intel X58 and Q35 platforms. Signed-off-by: Yu Zhao Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 0c12d06..002c8b9 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -718,15 +718,17 @@ static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr) static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) { int addr_width = agaw_to_width(domain->agaw); + int npages; start &= (((u64)1) << addr_width) - 1; end &= (((u64)1) << addr_width) - 1; /* in case it's partial page */ start = PAGE_ALIGN(start); end &= PAGE_MASK; + npages = (end - start) / VTD_PAGE_SIZE; /* we don't need lock here, nobody else touches the iova range */ - while (start < end) { + while (npages--) { dma_pte_clear_one(domain, start); start += VTD_PAGE_SIZE; } -- cgit v1.1