xen_blkif_init()->xen_blkif_interface_init()
xen_blkif_cachep = kmem_cache_create("blkif_cache", sizeof(struct xen_blkif), 0, 0, NULL);内核函数 kmem_cache_create 用来创建一个新缓存。这通常是在内核初始化时执行的,或者在首次加载内核模块时执行。
struct kmem_cache *kmem_cache_create (
const char *name,
size_t size,
size_t align,
unsigned long flags,
void (ctor)(void ))
name 参数定义了缓存名称,proc 文件系统(在 /proc/slabinfo 中)使用它标识这个缓存。
size 参数指定了为这个缓存创建的对象的大小,
align 参数定义了每个对象必需的对齐。 kemem_cache_create
xenblkif_init()->xen_blkif_xenbus_init()->xenbus_register_backend(&xen_blkbk_driver) 与前端相同 xenbus_register_driver_common()->driver_register(struct device_driver *drv)->bus_add_driver()系统调用,把设备添加到添加到总线
1096 static struct xenbus_driver xen_blkbk_driver = { 1097 .ids = xen_blkbk_ids, 1098 .probe = xen_blkbk_probe, 1099 .remove = xen_blkbk_remove, 1100 .otherend_changed = frontend_changed 1101 };(1)xen_blkbk_probe()->xenbus_watch_pathfmt(dev,&be->backend_watch,backend_changed,”%s/%s”,dev->nodename,”physical-device”) backend_changed()->xen_update_blkif_status()
139 /* xenbus_watch_pathfmt - register a watch on a sprintf-formatted path 140 * @dev: xenbus device 141 * @watch: watch to register 142 * @callback: callback to register 143 * @pathfmt: format of path to watch 144 * 145 * Register a watch on the given @path, using the given xenbus_watch 146 * structure for storage, and the given @callback function as the callback. 147 * Return 0 on success, or -errno on error. On success, the watched path 148 * (@path/@path2) will be saved as @watch->node, and becomes the caller's to 149 * kfree(). On error, watch->node will be NULL, so the caller has nothing to 150 * free, the device will switch to %XenbusStateClosing, and the error will be 151 * saved in the store. 152 */ 153 int xenbus_watch_pathfmt(struct xenbus_device *dev, 154 struct xenbus_watch *watch, 155 void (*callback)(struct xenbus_watch *, 156 const char *, const char *), 157 const char *pathfmt, ...) 158 { 159 int err; 160 va_list ap; 161 char *path; 162 163 va_start(ap, pathfmt); 164 path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap); 165 va_end(ap); 166 167 if (!path) { 168 xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch"); 169 return -ENOMEM; 170 } 171 err = xenbus_watch_path(dev, path, watch, callback); 172 173 if (err) 174 kfree(path); 175 return err; 176 }不知道hotplug scripts??? backend_changed是在注册完之后立马就开始运行的,读hotplug scripts和mode node,创建vbd,与frontend连接。
658 /* 659 * Callback received when the hotplug scripts have placed the physical-device 660 * node. Read it and the mode node, and create a vbd. If the frontend is 661 * ready, connect. 662 */ 663 static void backend_changed(struct xenbus_watch *watch, 664 const char *path, const char *token)(2)frontend_changed() state==XenbusStateConnected 前后端正常通信->xen_update_blkif_status()
770 case XenbusStateConnected: 771 /* 772 * Ensure we connect even when two watches fire in 773 * close succession and we miss the intermediate value 774 * of frontend_state. 775 */ //已经连接 776 if (dev->state == XenbusStateConnected) 777 break; 778 779 /* 780 * Enforce precondition before potential leak point. 781 * xen_blkif_disconnect() is idempotent. 782 */ 783 err = xen_blkif_disconnect(be->blkif); 784 if (err) { 785 xenbus_dev_fatal(dev, err, "pending I/O"); 786 break; 787 } 788 789 err = connect_ring(be); 790 if (err) { 791 /* 792 * Clean up so that memory resources can be used by 793 * other devices. connect_ring reported already error. 794 */ 795 xen_blkif_disconnect(be->blkif); 796 break; 797 } 798 xen_update_blkif_status(be->blkif); 799 break;xen_update_blkif_status()==>对每一个ring kthread_run(xen_blkif_schedule…) xen_blkif_schedule()->do_block_io_op() xen_blkif_schedule()->do_block_io_op()->__do_block_io_op() xen_blkif_schedule()->do_block_io_op()->__do_block_io_op()->dispatch_rw_block_io() dispatch_rw_block_io()->submit_bio()
84 static void xen_update_blkif_status(struct xen_blkif *blkif) 85 { 86 int err; 87 char name[TASK_COMM_LEN]; 88 struct xen_blkif_ring *ring; 89 int i; 90 91 /* Not ready to connect? */ 92 if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev) 93 return; 94 95 /* Already connected? */ 96 if (blkif->be->dev->state == XenbusStateConnected) 97 return; 98 99 /* Attempt to connect: exit if we fail to. */ //把block device相关的物理信息存到store,交换connected状态,主要和bus相关,以后再看 100 connect(blkif->be); 101 if (blkif->be->dev->state != XenbusStateConnected) 102 return; 103 //把blkif对应的xenbus_device的名字存到name 104 err = blkback_name(blkif, name); 105 if (err) { 106 xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); 107 return; 108 } 109 110 err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping); 111 if (err) { 112 xenbus_dev_error(blkif->be->dev, err, "block flush"); 113 return; 114 } 115 invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); 116 //遍历所有的rings,每一个ring启动一个xen_blkif_schedule线程 117 for (i = 0; i < blkif->nr_rings; i++) { 118 ring = &blkif->rings[i]; 119 ring->xenblkd = kthread_run(xen_blkif_schedule, ring, "%s-%d", name, i); 120 if (IS_ERR(ring->xenblkd)) { 121 err = PTR_ERR(ring->xenblkd); 122 ring->xenblkd = NULL; 123 xenbus_dev_fatal(blkif->be->dev, err, 124 "start %s-%d xenblkd", name, i); 125 goto out; 126 } 127 } 128 return; 129 130 out: 131 while (--i >= 0) { 132 ring = &blkif->rings[i]; 133 kthread_stop(ring->xenblkd); 134 } 135 return; 136 }对每个ring做调度
int xen_blkif_schedule(void *arg) { struct xen_blkif_ring *ring = arg; struct xen_blkif *blkif = ring->blkif; struct xen_vbd *vbd = &blkif->vbd; unsigned long timeout; int ret; xen_blkif_get(blkif); set_freezable(); while (!kthread_should_stop()) { if (try_to_freeze()) continue; if (unlikely(vbd->size != vbd_sz(vbd))) xen_vbd_resize(blkif); timeout = msecs_to_jiffies(LRU_INTERVAL); //这里要满足ring->wq != 0,从guest OS中来,可以满足,因为会调用blkif_notify_work,见下文 timeout = wait_event_interruptible_timeout( ring->wq, ring->waiting_reqs || kthread_should_stop(), timeout); if (timeout == 0) goto purge_gnt_list; //要保证还有pending_free,即这时候请求队列没有达到上限 timeout = wait_event_interruptible_timeout( ring->pending_free_wq, !list_empty(&ring->pending_free) || kthread_should_stop(), timeout); if (timeout == 0) goto purge_gnt_list; ring->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ ret = do_block_io_op(ring); if (ret > 0) ring->waiting_reqs = 1; if (ret == -EACCES) wait_event_interruptible(ring->shutdown_wq, kthread_should_stop()); purge_gnt_list: if (blkif->vbd.feature_gnt_persistent && time_after(jiffies, ring->next_lru)) { purge_persistent_gnt(ring); ring->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL); } /* Shrink if we have more than xen_blkif_max_buffer_pages */ shrink_free_pagepool(ring, xen_blkif_max_buffer_pages); if (log_stats && time_after(jiffies, ring->st_print)) print_stats(ring); } /* Drain pending purge work */ flush_work(&ring->persistent_purge_work); if (log_stats) print_stats(ring); ring->xenblkd = NULL; xen_blkif_put(blkif); return 0; }早在创建连接的时候就注册了event channel和irqhandler 当guest os有notification的时候,就会调用xen_blkif_be_int() frontend_changed()->connect_ring()->read_per_ring_refs()->xen_blkif_map err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn, 231 xen_blkif_be_int, 0, 232 “blkif-backend”, ring); xen_blkif_be_int()->blkif_notify_work() ring->waiting_reqs =1;wake_up(&ting->wq);