kmem_cache是slab的核心结构体,主要描述slab的各种信息和链接空闲slab,还保存高速缓存的指针数组。所以要想使用slab分配得先创建kmem_cache结构体。
struct kmem_cache * kmem_cache_create (const char *name, size_t size, size_t align, unsigned long flags, void (*ctor)(void *)) { size_t left_over, slab_size, ralign; struct kmem_cache *cachep = NULL, *pc; if (!name || (size < BYTES_PER_WORD) || size > KMALLOC_MAX_SIZE) { printf("slab create error\n"); } //size和字对齐,32位4字节对齐,64位8字节对齐 if (size & (BYTES_PER_WORD - 1)) { size += (BYTES_PER_WORD - 1); size &= ~(BYTES_PER_WORD - 1); } //如果标记了cache对齐,cache行大小在现代的处理器中大部分是64B if (flags & SLAB_HWCACHE_ALIGN) { ralign = cache_line_size(); //获取cache行大小 while (size <= ralign / 2) //获取size最小对齐 ralign /= 2; } else { ralign = BYTES_PER_WORD; } /* 特定体系结构最小的slab对象大小*/ if (ralign < ARCH_SLAB_MINALIGN) { ralign = ARCH_SLAB_MINALIGN; } if (ralign < align) { ralign = align; //取更大的对齐 } /* * 计算完成对齐 */ align = ralign; //获取kmem_cache结构体指针,从cache_cache结构体中分配 cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL); if (!cachep) goto oops; //如果申请的对象size大于等于512B,则slab管理结构不在本slab页面上,在早期初始化slab阶段所分配的cache都是onslab的 //早期初始化完成后可以offslab if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init) /* * Size is large, assume best to place the slab management obj * off-slab (should allow better packing of objs). */ //如果分配的对象很大,则最好将slab管理结构放在slab外面,也就是动态的从其它cache分配一块 flags |= CFLGS_OFF_SLAB; //对齐申请的size size = ALIGN(size, align); //计算剩余字节和一个slab的页面的阶 left_over = calculate_slab_order(cachep, size, align, flags); //如果对象个数是0则说明分配出错,内存不够了 if (!cachep->num) { printf( "kmem_cache_create: couldn't create cache %s.\n", name); cachep = NULL; goto oops; } //计算对齐后的slab管理结构的大小 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab), align); //如果slab管理结构在本cache外面,并且剩余的字节数大于等于管理结构体的 //大小则把标记位置为onslab,并且剩余字节数减去slab管理结构的大小 //这样做应该是为了节省空间,减少内部碎片,但是可能会造成比较多的cache miss if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) { flags &= ~CFLGS_OFF_SLAB; left_over -= slab_size; } //如果slab管理结构不在本cache上,则slab管理结构的大小不需要对齐,只需要计算真实的slab管理结构大小 if (flags & CFLGS_OFF_SLAB) { /* really off slab. No need for manual alignment */ slab_size = cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab); } //着色偏移等于cache line的大小 cachep->colour_off = cache_line_size(); /* Offset must be a multiple of the alignment. */ //如果指定的对齐大于cache line大小,则着色偏移等于指定对齐 if (cachep->colour_off < align) cachep->colour_off = align; //着色颜色个数等于剩余字节数除以着色偏移 cachep->colour = left_over / cachep->colour_off; //slab管理结构的大小 cachep->slab_size = slab_size; //标记位 cachep->flags = flags; cachep->gfpflags = 0; //cache中的object大小 cachep->buffer_size = size; //cache对象大小的倒数 cachep->reciprocal_buffer_size = reciprocal_value(size); //如果slab管理结构不在本cache,则需要给slab管理结构指定一个大小适合的kmem_cache //给slab管理结构分配内存 if (flags & CFLGS_OFF_SLAB) { cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); /* * This is a possibility for one of the malloc_sizes caches. * But since we go off slab only for object size greater than * PAGE_SIZE/8, and malloc_sizes gets created in ascending order, * this should not happen at all. * But leave a BUG_ON for some lucky dude. */ //BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache)); } //构造函数指针,分配完回调 cachep->ctor = ctor; //kmem_cache的名字 cachep->name = name; //设置cache的array缓存 if (setup_cpu_cache(cachep)) { __kmem_cache_destroy(cachep); cachep = NULL; goto oops; } /* cache setup completed, link it into the list */ //把分配的cache加入到kmem_cache链表 list_add(&cachep->next, &cache_chain); oops: //如果cachep指针为空,则说明没分配成功报错 if (!cachep && (flags & SLAB_PANIC)) printf("kmem_cache_create(): failed to create slab `%s'\n", name); //mutex_unlock(&cache_chain_mutex); // put_online_cpus(); //返回cachep return cachep; } 接下来主要分析calculate_slab_order函数,此函数主要计算一个slab占用的页面个数,以最小对象个数为单位。
static size_t calculate_slab_order(struct kmem_cache *cachep, size_t size, size_t align, unsigned long flags) { unsigned long offslab_limit; size_t left_over = 0; int gfporder; //计算一个slab占用页面的最小阶 for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) { unsigned int num; size_t remainder; cache_estimate(gfporder, size, align, flags, &remainder, &num); if (!num) continue; if (flags & CFLGS_OFF_SLAB) { /* * Max number of objs-per-slab for caches which * use off-slab slabs. Needed to avoid a possible * looping condition in cache_grow(). */ /* *如果slab管理结构不在本slab页面上则需要计算slab大小的限制,主要针对 *大内存对象 */ offslab_limit = size - sizeof(struct slab); offslab_limit /= sizeof(kmem_bufctl_t); if (num > offslab_limit) break; } //一个slab对象个数 cachep->num = num; //一个slab占用的页面的个数的阶 cachep->gfporder = gfporder; //内部碎片大小 left_over = remainder; /* * A VFS-reclaimable slab tends to have most allocations * as GFP_NOFS and we really don't want to have to be allocating * higher-order pages when we are unable to shrink dcache. */ //if (flags & SLAB_RECLAIM_ACCOUNT) // break; /* * Large number of objects is good, but very large slabs are * currently bad for the gfp()s. */ //一个slab按照最小的页面数计算,比如不超过4KB的对象,每次分配slab只需要 //一页即可 if (gfporder >= slab_break_gfp_order) break; /* * Acceptable internal fragmentation? */ //内部碎片乘以8,要小于等于所分配的页大小 //比如此时只分配了一页,那么leftover的大小不能大于512B if (left_over * 8 <= (PAGE_SIZE << gfporder)) break; } return left_over; } 具体的计算函数cache_estimate
//计算一个slab中的object的数目和slab剩余的字节数 static void cache_estimate(unsigned long gfporder, size_t buffer_size, size_t align, int flags, size_t *left_over, unsigned int *num) { int nr_objs; size_t mgmt_size; size_t slab_size = PAGE_SIZE << gfporder; //如果slab管理结构不在本slab页面上 if (flags & CFLGS_OFF_SLAB) { mgmt_size = 0; nr_objs = slab_size / buffer_size; //一个slab上对象的个数 if (nr_objs > SLAB_LIMIT) nr_objs = SLAB_LIMIT; //如果超过个数限制就等于最大的 } else {//如果在本页面上 //计算对象个数 nr_objs = (slab_size - sizeof(struct slab)) / (buffer_size + sizeof(kmem_bufctl_t)); /* * This calculated number will be either the right * amount, or one greater than what we want. */ //计算的结构可能超过slab_size大小,需要减去一个对象 if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size > slab_size) nr_objs--; if (nr_objs > SLAB_LIMIT) nr_objs = SLAB_LIMIT; //slab管理结构的最终大小 mgmt_size = slab_mgmt_size(nr_objs, align); } *num = nr_objs; //内部碎片大小 *left_over = slab_size - nr_objs*buffer_size - mgmt_size; }