参考文档: 请问进程描述符中 nvcsw和nivcsw的区别是 http://bbs.chinaunix.net/thread-3688431-1-1.html 答案如下: nvcsw: voluntary context switch nivcsw: involuntary context switch A voluntary context switch occurs when a thread blocks because it requires a resource that is unavailable. An involuntary context switch takes place when a thread executes for the duration of its time slice or when the system identifies a higher-priority thread to run.
kernel 3.10内核源码分析–hung task机制 http://blog.csdn.net/wh_19910525/article/details/50503269
案例分享: http://lists.infradead.org/pipermail/linux-mtd-cvs/2012-November/008218.html https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/fs/jffs2/file.c?id=157078f64b8a9cd7011b6b900b2f2498df850748
如下案例A B形成死锁
1个核的进程在做如下操作 generic_file_aio_read =>do_generic_file_read(filp, ppos, &desc, file_read_actor); =>error = lock_page_killable(page); /* Get exclusive access to the page ... */ ////////////(+A) =>error = mapping->a_ops->readpage(filp, page); /* Start the actual read. The read will unlock the page. */ =>static int jffs2_readpage (struct file *filp, struct page *pg) =>mutex_lock(&f->sem); //////////////////////////////////////////(+B) =>ret = jffs2_do_readpage_unlock(pg->mapping->host, pg); 另外一个核的进程做如下操作 jffs2_write_begin =>mutex_lock(&f->sem); ///////////////////////////(+B) =>pg = grab_cache_page_write_begin(mapping, index, flags); =>page = find_lock_page(mapping, index); =>page = find_get_page(mapping, offset); if (page) { lock_page(page); //(+A) /* Has the page been truncated? */ if (unlikely(page->mapping != mapping)) { unlock_page(page); page_cache_release(page); goto repeat; } VM_BUG_ON(page->index != offset); } return page;另外一种场景 A锁和C锁形成死锁
generic_file_aio_write =>ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); =>written_buffered = generic_file_buffered_write(iocb, iov, nr_segs, pos, ppos, count, written); =>status = generic_perform_write(file, &i, pos); =>status = a_ops->write_begin(file, mapping, pos, bytes, flags, &page, &fsdata); =>jffs2_write_begin =>ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); =>pg = grab_cache_page_write_begin(mapping, index, flags); =>=>page = find_lock_page(mapping, index); =>page = find_get_page(mapping, offset); if (page) { lock_page(page); //(+A) /* Has the page been truncated? */ if (unlikely(page->mapping != mapping)) { unlock_page(page); page_cache_release(page); goto repeat; } VM_BUG_ON(page->index != offset); } return page; =>status = a_ops->write_end(file, mapping, pos, bytes, copied, page, fsdata); =>static int jffs2_write_end(struct file *filp, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *pg, void *fsdata) =>ret = jffs2_write_inode_range(c, f, ri, page_address(pg) + aligned_start, (pg->index << PAGE_CACHE_SHIFT) + aligned_start, end - aligned_start, &writtenlen); =>ret = jffs2_reserve_space(c, sizeof(*ri) + JFFS2_MIN_DATA_LEN, &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); =>mutex_lock(&c->alloc_sem); ///////////////////////////////////(C) jffs2_write_begin =>ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); =>mutex_lock(&c->alloc_sem); ///////////////////////////////(+C) =>mutex_lock(&f->sem); ///////////////////////////(+B) =>pg = grab_cache_page_write_begin(mapping, index, flags); =>page = find_lock_page(mapping, index); =>page = find_get_page(mapping, offset); if (page) { lock_page(page); //(+A) /* Has the page been truncated? */ if (unlikely(page->mapping != mapping)) { unlock_page(page); page_cache_release(page); goto repeat; } VM_BUG_ON(page->index != offset); } return page;第三个案例,还是jffs2 有如下调用栈:
第一个调用栈 __switch_to schedule inode_wait __wait_on_bit out_of_line_wait_on_bit ifind_fast iget_locked jffs2_iget jffs2_gc_fetch_inode jffs2_garbage_collect_pass jffs2_garbage_collect_thread kthread original_kernel_thread 第二个调用栈 __switch_to schedule __mutex_lock_slow_path mutex_lock jffs2_reserve_space jffs2_write_inode_range jffs2_write_end generic_file_buffered_write __generic_file_aio_write generic_file_aio_write generic_file_aio_write vfs_write sys_write ret_from_syscall 第三个调用栈 __switch_to schedule __mutex_lock_interruptible_slowpatch mutex_lock_interruptible jffs2_garbage_collect_pass jffs2_reserve_space jffs2_do_create jffs2_create vfs_create do_last do_filp_open do_sys_open ret_from_syscall根据第一个和第二个调用栈可以找到AB锁(根据第一个和第三个也可以找到AB锁),根据第三个调用栈可以找到AB BA死锁
AB锁 jffs2_garbage_collect_thread =>if (jffs2_garbage_collect_pass(c) == -ENOSPC) =>if (mutex_lock_interruptible(&c->alloc_sem)) /////////////////(+B) =>f = jffs2_gc_fetch_inode(c, inum, !nlink); =>inode = jffs2_iget(OFNI_BS_2SFFJ(c), inum); =>inode = iget_locked(sb, ino); =>inode = ifind_fast(sb, head, ino); =>wait_on_inode(inode); =>wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE);////////////////////(+A) 注意查看里面代码和注释的内容,不要想当然 wait_on_bit - wait for a bit to be cleared,等待__I_NEW被清零 =>return out_of_line_wait_on_bit(word, bit, action, mode); =>wait_queue_head_t *wq = bit_waitqueue(word, bit);//通过等待队列实现 DEFINE_WAIT_BIT(wait, word, bit); return __wait_on_bit(wq, &wait, action, mode); BA锁 jffs2_create =>inode = jffs2_new_inode(dir_i, mode, ri); =>if (insert_inode_locked(inode) < 0) =>inode->i_state |= I_NEW; //////////////////////////////(+A) 设置NEW =>ret = jffs2_do_create(c, dir_f, f, ri, &dentry->d_name); =>ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); =>mutex_lock(&c->alloc_sem); ////////////////////(+B) =>mutex_unlock(&c->alloc_sem); ///////////////////(-B) =>unlock_new_inode(inode); =>inode->i_state &= ~I_NEW; ////////////////////////////////(-A) wake_up_bit(&inode->i_state, __I_NEW);经验总结: 把各种调用栈全部看完,画出流程图,不要看到第二个调用栈就不往下看了。 分析wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE); 要细致,看一下到底是清除__I_NEW还是设置__I_NEW; 分析清楚是清除__I_NEW之后,分析一下__I_NEW在什么场景会设置,结合调用栈可以把ABBA锁找到
