diff options
| author | Alex Williamson <alex.williamson@redhat.com> | 2013-06-15 10:27:19 -0600 | 
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2013-10-13 14:51:06 -0700 | 
| commit | 00dd675556f5c6a17bcba5f7e47c2552daeed84c (patch) | |
| tree | 9c45f1fde364e28bd061ac1265348ade2a381000 | |
| parent | 5b31202a3604ef905d0433402aaf8aadf4660774 (diff) | |
| download | kernel_samsung_espresso10-00dd675556f5c6a17bcba5f7e47c2552daeed84c.zip kernel_samsung_espresso10-00dd675556f5c6a17bcba5f7e47c2552daeed84c.tar.gz kernel_samsung_espresso10-00dd675556f5c6a17bcba5f7e47c2552daeed84c.tar.bz2  | |
intel-iommu: Fix leaks in pagetable freeing
commit 3269ee0bd6686baf86630300d528500ac5b516d7 upstream.
At best the current code only seems to free the leaf pagetables and
the root.  If you're unlucky enough to have a large gap (like any
QEMU guest with more than 3G of memory), only the first chunk of leaf
pagetables are freed (plus the root).  This is a massive memory leak.
This patch re-writes the pagetable freeing function to use a
recursive algorithm and manages to not only free all the pagetables,
but does it without any apparent performance loss versus the current
broken version.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Cc: stable@vger.kernel.org
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
| -rw-r--r-- | drivers/pci/intel-iommu.c | 72 | 
1 files changed, 35 insertions, 37 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index ae762ec..68baf17 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -853,56 +853,54 @@ static int dma_pte_clear_range(struct dmar_domain *domain,  	return order;  } +static void dma_pte_free_level(struct dmar_domain *domain, int level, +			       struct dma_pte *pte, unsigned long pfn, +			       unsigned long start_pfn, unsigned long last_pfn) +{ +	pfn = max(start_pfn, pfn); +	pte = &pte[pfn_level_offset(pfn, level)]; + +	do { +		unsigned long level_pfn; +		struct dma_pte *level_pte; + +		if (!dma_pte_present(pte) || dma_pte_superpage(pte)) +			goto next; + +		level_pfn = pfn & level_mask(level - 1); +		level_pte = phys_to_virt(dma_pte_addr(pte)); + +		if (level > 2) +			dma_pte_free_level(domain, level - 1, level_pte, +					   level_pfn, start_pfn, last_pfn); + +		/* If range covers entire pagetable, free it */ +		if (!(start_pfn > level_pfn || +		      last_pfn < level_pfn + level_size(level))) { +			dma_clear_pte(pte); +			domain_flush_cache(domain, pte, sizeof(*pte)); +			free_pgtable_page(level_pte); +		} +next: +		pfn += level_size(level); +	} while (!first_pte_in_page(++pte) && pfn <= last_pfn); +} +  /* free page table pages. last level pte should already be cleared */  static void dma_pte_free_pagetable(struct dmar_domain *domain,  				   unsigned long start_pfn,  				   unsigned long last_pfn)  {  	int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; -	struct dma_pte *first_pte, *pte; -	int total = agaw_to_level(domain->agaw); -	int level; -	unsigned long tmp; -	int large_page = 2;  	BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);  	BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);  	BUG_ON(start_pfn > last_pfn);  	/* We don't need lock here; nobody else touches the iova range */ -	level = 2; -	while (level <= total) { -		tmp = align_to_level(start_pfn, level); - -		/* If we can't even clear one PTE at this level, we're done */ -		if (tmp + level_size(level) - 1 > last_pfn) -			return; - -		do { -			large_page = level; -			first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page); -			if (large_page > level) -				level = large_page + 1; -			if (!pte) { -				tmp = align_to_level(tmp + 1, level + 1); -				continue; -			} -			do { -				if (dma_pte_present(pte)) { -					free_pgtable_page(phys_to_virt(dma_pte_addr(pte))); -					dma_clear_pte(pte); -				} -				pte++; -				tmp += level_size(level); -			} while (!first_pte_in_page(pte) && -				 tmp + level_size(level) - 1 <= last_pfn); +	dma_pte_free_level(domain, agaw_to_level(domain->agaw), +			   domain->pgd, 0, start_pfn, last_pfn); -			domain_flush_cache(domain, first_pte, -					   (void *)pte - (void *)first_pte); -			 -		} while (tmp && tmp + level_size(level) - 1 <= last_pfn); -		level++; -	}  	/* free pgd */  	if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {  		free_pgtable_page(domain->pgd);  | 
