Linux IO Scheduler -- NOOP(no operation)

xiaoxiao2021-02-27  152

每个块设备或者块设备的分区,都对应有自身的请求队列(request_queue), 而每个请求队列都可以选择一个IO调度器来协调所递交的request。 IO调度器的基本目的是将请求按照它们对应在块设备上的扇区号进行排列,以减少磁头的移动,提高效率。 每个request_queue都有一个request的队列,队列里的请求将按顺序被响应。 实际上,除了这个队列,每个调度器自身都维护有不同数量的队列,用来对递交上来的request进行处理, 而排在队列最前面的request将适时被移动到request_queue中等待响应。 Noop 调度器(no operations) : 是最简单的IO调度器,它本质上就是一个链表实现的 fifo 队列,并对请求进行简单的合并处理,。 此调度程序最适合于SSD,专为随机访问设备而设计 首先要了解描述elevator的数据结构。 和elevator相关的数据结构有个,一个是elevator_type,一个是elevator_queue, 前者对应一个调度器类型,后者对应一个调度器队列。 如果内核中只有上述四种类型的调度器,则就有四个elevator_type,但是多个块设备(分区)可拥有多个相应的elevator_queue。 两个数据结构中最关键的元素都是struct elevator_ops,该结构定义了一组操作函数, 用来描述请求队列的相关算法,实现对请求的处理。 ./include/linux/elevator.h  8 typedef int (elevator_merge_fn) (struct request_queue *, struct request **,  9                                  struct bio *);  10  11 typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *, struct request *);  12  13 typedef void (elevator_merged_fn) (struct request_queue *, struct request *, int);  14  15 typedef int (elevator_allow_merge_fn) (struct request_queue *, struct request *, struct bio *);  16  17 typedef int (elevator_dispatch_fn) (struct request_queue *, int);  18  19 typedef void (elevator_add_req_fn) (struct request_queue *, struct request *);  20 typedef int (elevator_queue_empty_fn) (struct request_queue *);  21 typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *);  22 typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);  23 typedef int (elevator_may_queue_fn) (struct request_queue *, int);  24  25 typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t);  26 typedef void (elevator_put_req_fn) (struct request *);  27 typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);  28 typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);  29  30 typedef void *(elevator_init_fn) (struct request_queue *);  31 typedef void (elevator_exit_fn) (struct elevator_queue *);  33 struct elevator_ops  34 {  35         elevator_merge_fn *elevator_merge_fn;  36         elevator_merged_fn *elevator_merged_fn;  37         elevator_merge_req_fn *elevator_merge_req_fn;  38         elevator_allow_merge_fn *elevator_allow_merge_fn;  39  40         elevator_dispatch_fn *elevator_dispatch_fn;  41         elevator_add_req_fn *elevator_add_req_fn;  42         elevator_activate_req_fn *elevator_activate_req_fn;  43         elevator_deactivate_req_fn *elevator_deactivate_req_fn;  44  45         elevator_queue_empty_fn *elevator_queue_empty_fn;  46         elevator_completed_req_fn *elevator_completed_req_fn;  47  48         elevator_request_list_fn *elevator_former_req_fn;  49         elevator_request_list_fn *elevator_latter_req_fn;  50  51         elevator_set_req_fn *elevator_set_req_fn;  52         elevator_put_req_fn *elevator_put_req_fn;  53  54         elevator_may_queue_fn *elevator_may_queue_fn;  55  56         elevator_init_fn *elevator_init_fn;  57         elevator_exit_fn *elevator_exit_fn;  58         void (*trim)(struct io_context *);  59 };  69 /*  70  * identifies an elevator type, such as AS or deadline  71  */  72 struct elevator_type  73 {  74         struct list_head list;  75         struct elevator_ops ops;  76         struct elv_fs_entry *elevator_attrs;  77         char elevator_name[ELV_NAME_MAX];  78         struct module *elevator_owner;  79 };  80  81 /*  82  * each queue has an elevator_queue associated with it  83  */  84 struct elevator_queue  85 {  86         struct elevator_ops *ops;  87         void *elevator_data;  88         struct kobject kobj;  89         struct elevator_type *elevator_type;  90         struct mutex sysfs_lock;  91         struct hlist_head *hash;  92 };  ./block/elevator.c  247 int elevator_init(struct request_queue *q, char *name)  248 {  249         struct elevator_type *e = NULL;  250         struct elevator_queue *eq;  251         int ret = 0;  252         void *data;  253         // 初始化请求队列相关属性  254         INIT_LIST_HEAD(&q->queue_head);  255         q->last_merge = NULL;  256         q->end_sector = 0;  257         q->boundary_rq = NULL;  258         // 如果有指定名称的IO调度器,则获取与指定名称匹配的IO调度器  259         if (name) {  260                 e = elevator_get(name);  261                 if (!e)  262                         return -EINVAL;  263         }  264         // 如果没有指定名称的IO调度器,存在可选IO调度器,获取可选IO调度器  265         if (!e && *chosen_elevator) {  266                 e = elevator_get(chosen_elevator);  267                 if (!e)  268                         printk(KERN_ERR "I/O scheduler %s not found\n",  269                                                         chosen_elevator);  270         }  271         // 如果没有指定名称或者可选的IO调度器,获取默认的调度器  272         if (!e) {  273                 e = elevator_get(CONFIG_DEFAULT_IOSCHED);  274                 if (!e) {  275                         printk(KERN_ERR  276                                 "Default I/O scheduler not found. " \  277                                 "Using noop.\n");  278                         e = elevator_get("noop");  279                 }  280         }  281         // 为调度器分配elevator_queue  282         eq = elevator_alloc(q, e);  283         if (!eq)  284                 return -ENOMEM;  285         // 从reqest_queue中为调度器数据分配内存  286         data = elevator_init_queue(q, eq);  287         if (!data) {  288                 kobject_put(&eq->kobj);  289                 return -ENOMEM;  290         }  291         // 关联request_queue,elevator_queue  292         elevator_attach(q, eq, data);  293         return ret;  294 }  295 EXPORT_SYMBOL(elevator_init);  296  297 void elevator_exit(struct elevator_queue *e)  298 {  299         mutex_lock(&e->sysfs_lock);  300         if (e->ops->elevator_exit_fn)  301                 e->ops->elevator_exit_fn(e);  302         e->ops = NULL;  303         mutex_unlock(&e->sysfs_lock);  304  305         kobject_put(&e->kobj);  306 }  307 EXPORT_SYMBOL(elevator_exit);    207 static struct elevator_queue *elevator_alloc(struct request_queue *q,  208                                   struct elevator_type *e)  209 {  210         struct elevator_queue *eq;  211         int i;  212         // 为elevator_queue分配内存  213         eq = kmalloc_node(sizeof(*eq), GFP_KERNEL | __GFP_ZERO, q->node);  214         if (unlikely(!eq))  215                 goto err;  216         // 使elevator_type与elevator_queue关联  217         eq->ops = &e->ops;  218         eq->elevator_type = e;  219         kobject_init(&eq->kobj, &elv_ktype);  220         mutex_init(&eq->sysfs_lock);  221         // 为elevator_queue hash表分配内存  222         eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,  223                                         GFP_KERNEL, q->node);  224         if (!eq->hash)  225                 goto err;  226         // 初始化elevator_queue hash表  227         for (i = 0; i < ELV_HASH_ENTRIES; i++)  228                 INIT_HLIST_HEAD(&eq->hash[i]);  229  230         return eq;  231 err:  232         kfree(eq);  233         elevator_put(e);  234         return NULL;  235 }  175 static void *elevator_init_queue(struct request_queue *q,  176                                  struct elevator_queue *eq)  177 {  178         return eq->ops->elevator_init_fn(q);  179 }  180  181 static void elevator_attach(struct request_queue *q, struct elevator_queue *eq,  182                            void *data)  183 {  184         q->elevator = eq;  185         eq->elevator_data = data;  186 }  ./block/noop-iosched.c  87 static struct elevator_type elevator_noop = {  88         .ops = {  89                 .elevator_merge_req_fn          = noop_merged_requests,  90                 .elevator_dispatch_fn           = noop_dispatch,  91                 .elevator_add_req_fn            = noop_add_request,  92                 .elevator_queue_empty_fn        = noop_queue_empty,  93                 .elevator_former_req_fn         = noop_former_request,  94                 .elevator_latter_req_fn         = noop_latter_request,  95                 .elevator_init_fn               = noop_init_queue,  96                 .elevator_exit_fn               = noop_exit_queue,  97         },  98         .elevator_name = "noop",  99         .elevator_owner = THIS_MODULE, 100 }; 101 102 static int __init noop_init(void) 103 { 104         elv_register(&elevator_noop); 105 106         return 0; 107 } 108 109 static void __exit noop_exit(void) 110 { 111         elv_unregister(&elevator_noop); 112 } 113 114 module_init(noop_init); 115 module_exit(noop_exit);  68 static void *noop_init_queue(struct request_queue *q)  69 {  70         struct noop_data *nd;  71         // 为noop_data从指点的q->node中分配内存  72         nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node);  73         if (!nd)  74                 return NULL;  75         INIT_LIST_HEAD(&nd->queue);  76         return nd;  77 }  10 struct noop_data {  11         struct list_head queue;  12 };  13  14 static void noop_merged_requests(struct request_queue *q, struct request *rq,  15                                  struct request *next)  16 {  17         list_del_init(&next->queuelist);  18 }  19  20 static int noop_dispatch(struct request_queue *q, int force)  21 {  22         struct noop_data *nd = q->elevator->elevator_data;  23  24         if (!list_empty(&nd->queue)) {  25                 struct request *rq;  26                 rq = list_entry(nd->queue.next, struct request, queuelist);  27                 list_del_init(&rq->queuelist);  28                 elv_dispatch_sort(q, rq);  29                 return 1;  30         }  31         return 0;  32 }  33    34 static void noop_add_request(struct request_queue *q, struct request *rq)  35 {  36         struct noop_data *nd = q->elevator->elevator_data;  37  38         list_add_tail(&rq->queuelist, &nd->queue);  39 }  40   ./include/linux/blkdev.h  153 /*  154  * try to put the fields that are referenced together in the same cacheline.  155  * if you modify this structure, be sure to check block/blk-core.c:rq_init()  156  * as well!  157  */  158 struct request {  159         struct list_head queuelist;  160         struct call_single_data csd;  161         int cpu;  162  163         struct request_queue *q;  164  165         unsigned int cmd_flags;  166         enum rq_cmd_type_bits cmd_type;  167         unsigned long atomic_flags;  168  169         /* the following two fields are internal, NEVER access directly */  170         sector_t __sector;              /* sector cursor */  171         unsigned int __data_len;        /* total data len */  172  173         struct bio *bio;  174         struct bio *biotail;  175  176         struct hlist_node hash; /* merge hash */  177         /*  178          * The rb_node is only used inside the io scheduler, requests  179          * are pruned when moved to the dispatch queue. So let the  180          * completion_data share space with the rb_node.  181          */  182         union {  183                 struct rb_node rb_node; /* sort/lookup */  184                 void *completion_data;  185         };  186  187         /*  188          * two pointers are available for the IO schedulers, if they need  189          * more they have to dynamically allocate it.  190          */  191         void *elevator_private;  192         void *elevator_private2;  193  194         struct gendisk *rq_disk;  195         unsigned long start_time;  196  197         /* Number of scatter-gather DMA addr+len pairs after  198          * physical address coalescing is performed.  199          */  200         unsigned short nr_phys_segments;  201  202         unsigned short ioprio;  203  204         void *special;          /* opaque pointer available for LLD use */  205         char *buffer;           /* kaddr of the current segment if available */  206  207         int tag;  208         int errors;  209  210         int ref_count;  211  212         /*  213          * when request is used as a packet command carrier  214          */  215         unsigned short cmd_len;  216         unsigned char __cmd[BLK_MAX_CDB];  217         unsigned char *cmd;  218  219         unsigned int extra_len; /* length of alignment and padding */  220         unsigned int sense_len;  221         unsigned int resid_len; /* residual count */  222         void *sense;  223  224         unsigned long deadline;  225         struct list_head timeout_list;  226         unsigned int timeout;  227         int retries;  228  229         /*  230          * completion callback.  231          */  232         rq_end_io_fn *end_io;  233         void *end_io_data;  234  235         /* for bidi */  236         struct request *next_rq;  237 };    324 struct request_queue  325 {  326         /*  327          * Together with queue_head for cacheline sharing  328          */  329         struct list_head        queue_head;  330         struct request          *last_merge;  331         struct elevator_queue   *elevator;  332  333         /*  334          * the queue request freelist, one for reads and one for writes  335          */  336         struct request_list     rq;  337  338         request_fn_proc         *request_fn;  339         make_request_fn         *make_request_fn;  340         prep_rq_fn              *prep_rq_fn;  341         unplug_fn               *unplug_fn;  342         merge_bvec_fn           *merge_bvec_fn;  343         prepare_flush_fn        *prepare_flush_fn;  344         softirq_done_fn         *softirq_done_fn;  345         rq_timed_out_fn         *rq_timed_out_fn;  346         dma_drain_needed_fn     *dma_drain_needed;  347         lld_busy_fn             *lld_busy_fn;  348  349         /*  350          * Dispatch queue sorting  351          */  352         sector_t                end_sector;  353         struct request          *boundary_rq;  354  355         /*  356          * Auto-unplugging state  357          */  358         struct timer_list       unplug_timer;  359         int                     unplug_thresh;  /* After this many requests */  360         unsigned long           unplug_delay;   /* After this many jiffies */  361         struct work_struct      unplug_work;  362  363         struct backing_dev_info backing_dev_info;  364  365         /*  366          * The queue owner gets to use this for whatever they like.  367          * ll_rw_blk doesn't touch it.  368          */  369         void                    *queuedata;  370  371         /*  372          * queue needs bounce pages for pages above this limit  373          */  374         gfp_t                   bounce_gfp;  375  376         /*  377          * various queue flags, see QUEUE_* below  378          */  379         unsigned long           queue_flags;  380  381         /*  382          * protects queue structures from reentrancy. ->__queue_lock should  383          * _never_ be used directly, it is queue private. always use  384          * ->queue_lock.  385          */  386         spinlock_t              __queue_lock;  387         spinlock_t              *queue_lock;  388  389         /*  390          * queue kobject  391          */  392         struct kobject kobj;  393  394         /*  395          * queue settings  396          */  397         unsigned long           nr_requests;    /* Max # of requests */  398         unsigned int            nr_congestion_on;  399         unsigned int            nr_congestion_off;  400         unsigned int            nr_batching;  401  402         void                    *dma_drain_buffer;  403         unsigned int            dma_drain_size;  404         unsigned int            dma_pad_mask;  405         unsigned int            dma_alignment;  406  407         struct blk_queue_tag    *queue_tags;  408         struct list_head        tag_busy_list;  409  410         unsigned int            nr_sorted;  411         unsigned int            in_flight[2];  412  413         unsigned int            rq_timeout;  414         struct timer_list       timeout;  415         struct list_head        timeout_list;  416  417         struct queue_limits     limits;  418  419         /*  420          * sg stuff  421          */  422         unsigned int            sg_timeout;  423         unsigned int            sg_reserved_size;  424         int                     node;  425 #ifdef CONFIG_BLK_DEV_IO_TRACE  426         struct blk_trace        *blk_trace;  427 #endif  428         /*  429          * reserved for flush operations  430          */  431         unsigned int            ordered, next_ordered, ordseq;  432         int                     orderr, ordcolor;  433         struct request          pre_flush_rq, bar_rq, post_flush_rq;  434         struct request          *orig_bar_rq;  435  436         struct mutex            sysfs_lock;  437  438 #if defined(CONFIG_BLK_DEV_BSG)  439         struct bsg_class_device bsg_dev;  440 #endif  441 };

查看当前系统支持的IO调度算法 dmesg | grep -i scheduler [    0.207639] io scheduler noop registered [    0.207641] io scheduler deadline registered [    0.207675] io scheduler cfq registered (default)

查看当前系统的I/O调度方法: cat /sys/block/xvda/queue/scheduler noop deadline [cfq] 可以看出cfq为默认的调度方法                                                   


转载请注明原文地址: https://www.6miu.com/read-15682.html

最新回复(0)