vmalloc最小分配一个page.并且分配到的页面不保证是连续的.因为vmalloc内部调用alloc_page多次分配单个页面.
vmalloc主要内容:
1. 从VMALLOC_START到VMALLOC_END查找空闲的虚拟地址空间(hole)
2.根据分配的size,调用alloc_page依次分配单个页面.
3. 把分配的单个页面,映射到第一步中找到的连续的虚拟地址。
1. 查找空闲的虚拟地址空间(hole)
关键数据结构
struct vm_struct { struct vm_struct *next; void *addr;//起始虚拟地址 unsigned long size;//分配内存大小 unsigned long flags; struct page **pages;//分配的物理页面 unsigned int nr_pages;//页面数量 phys_addr_t phys_addr;//起始物理地址 const void *caller; };
struct vmap_area { unsigned long va_start;//起始虚拟地址 unsigned long va_end;//结束虚拟地址 unsigned long flags; struct rb_node rb_node; /* 挂接到vmap_area_root红黑树 */ struct list_head list; /* 挂接到vmap_area_list链表 */ struct list_head purge_list; /* "lazy purge" list */ struct vm_struct *vm; struct rcu_head rcu_head; };
__vmalloc->__vmalloc_node->__vmalloc_node_range->__get_vm_area_node->alloc_vmap_area
static struct vmap_area *alloc_vmap_area(unsigned long size, unsigned long align, unsigned long vstart, unsigned long vend, int node, gfp_t gfp_mask) { struct vmap_area *va; struct rb_node *n; unsigned long addr; int purged = 0; struct vmap_area *first; va = kmalloc_node(sizeof(struct vmap_area), gfp_mask & GFP_RECLAIM_MASK, node); if (unlikely(!va)) return ERR_PTR(-ENOMEM); retry: spin_lock(&vmap_area_lock); /* find starting point for our search */ addr = ALIGN(vstart, align); if (addr + size < addr) goto overflow; n = vmap_area_root.rb_node; first = NULL; /*在红黑树中,找到一个离vstart最近的虚拟地址区域, */ while (n) { struct vmap_area *tmp; tmp = rb_entry(n, struct vmap_area, rb_node); if (tmp->va_end >= addr) { first = tmp; if (tmp->va_start <= addr) break; n = n->rb_left; } else n = n->rb_right; } if (!first) goto found; /* 从最小的地址开始查找可以用的区间(hole),找到一个addr+size<first->va_start区域 */ while (addr + size > first->va_start && addr + size <= vend) { addr = ALIGN(first->va_end, align); if (addr + size < addr) goto overflow; if (list_is_last(&first->list, &vmap_area_list)) goto found; /*遍历下一个区域 */ first = list_entry(first->list.next, struct vmap_area, list); } found: if (addr + size > vend) goto overflow; /*记录其实虚拟地址和大小 */ va->va_start = addr; va->va_end = addr + size; va->flags = 0; /*把这个区域插入红黑树 */ __insert_vmap_area(va); free_vmap_cache = &va->rb_node; spin_unlock(&vmap_area_lock); return va; }2.分配页面和页表映射
__vmalloc->__vmalloc_node->__vmalloc_node_range->__vmalloc_area_node
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot, int node) { const int order = 0; struct page **pages; unsigned int nr_pages, array_size, i; const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
/*计算物理页面数量 */ nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; array_size = (nr_pages * sizeof(struct page *));
area->nr_pages = nr_pages; /*分配page内存 */ /* Please note that the recursion is strictly bounded. */ if (array_size > PAGE_SIZE) { pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM, PAGE_KERNEL, node, area->caller); area->flags |= VM_VPAGES; } else { pages = kmalloc_node(array_size, nested_gfp, node); } area->pages = pages;
for (i = 0; i < area->nr_pages; i++) { struct page *page; /*调用alloc_page分配单个页面 */ if (node == NUMA_NO_NODE) page = alloc_page(alloc_mask); else page = alloc_pages_node(node, alloc_mask, order);
area->pages[i] = page; if (gfpflags_allow_blocking(gfp_mask)) cond_resched(); } /*建立页表,把分配的物理页面,映射到对应的虚拟地址, PGD->PMD->PTE,页表映射已经分析过,这里不在分析*/ if (map_vm_area(area, prot, pages)) goto fail; return area->addr;
}
/proc/vmallocinfo 可用查看vmalloc分配情况
另外内核提供了vmap函数,提供把离散的page映射到连续的虚拟地址空间。
void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)