ObjectStore获取文件系统的fsid

xiaoxiao2021-02-28  122

ceph version: Kraken ObjectStore获取文件系统的fsid。OSD在用户态又构造了一层自己文件系统来管理数据,并为其分配了唯一标识UUID。该UUID是其文件系统元信息中的一员,底层使用的驱动不同其保存的位置也不同,如BlueStore,保存在块设备的第一个块中,FileStore,保存在日志设备中的第一个块中。

获取fsid方法:

int ObjectStore::probe_block_device_fsid( CephContext *cct, const string& path, uuid_d *fsid) { int r; //优先选择bluestore #if defined(HAVE_LIBAIO) // first try bluestore -- it has a crc on its header and will fail // reliably. r = BlueStore::get_block_device_fsid(cct, path, fsid); if (r == 0) { ¦ lgeneric_dout(cct, 0) << __func__ << " " << path << " is bluestore, " << *fsid << dendl; ¦ return r; } #endif // okay, try FileStore (journal). r = FileStore::get_block_device_fsid(cct, path, fsid); if (r == 0) { ¦ lgeneric_dout(cct, 0) << __func__ << " " << path << " is filestore, " << *fsid << dendl; ¦ return r; } return -EINVAL; }
BlueStore 获取osd文件系统的OSD uuid,该uuid保存在内存结构的bluestore_bdev_label_t,该结构保存在磁盘的第一个块中。
int BlueStore::get_block_device_fsid(CephContext* cct, const string& path, ¦ ¦ ¦uuid_d *fsid) { bluestore_bdev_label_t label; int r = _read_bdev_label(cct, path, &label); if (r < 0) ¦ return r; *fsid = label.osd_uuid; return 0; }
读取第一个block,反序列化得到label
int BlueStore::_read_bdev_label(CephContext* cct, string path, bluestore_bdev_label_t *label) { dout(10) << __func__ << dendl; //打开设备 int fd = ::open(path.c_str(), O_RDONLY); if (fd < 0) { ¦ fd = -errno; ¦ derr << __func__ << " failed to open " << path << ": " << cpp_strerror(fd) << dendl; ¦ return fd; } bufferlist bl; //从设备中读取指定大小的数据 int r = bl.read_fd(fd, BDEV_LABEL_BLOCK_SIZE); //BDEV_LABEL_BLOCK_SIZE = 4096第一个数据块 VOID_TEMP_FAILURE_RETRY(::close(fd)); if (r < 0) { ¦ derr << __func__ << " failed to read from " << path << ": " << cpp_strerror(r) << dendl; ¦ return r; } //校验数据的完整性,并将其反序列化 uint32_t crc, expected_crc; bufferlist::iterator p = bl.begin(); try { ¦ ::decode(*label, p); ¦ bufferlist t; ¦ t.substr_of(bl, 0, p.get_off()); ¦ crc = t.crc32c(-1); ¦ ::decode(expected_crc, p); } catch (buffer::error& e) { ¦ derr << __func__ << " unable to decode label at offset " << p.get_off() << ": " << e.what() << dendl; ¦ return -EINVAL; } if (crc != expected_crc) { ¦ derr << __func__ << " bad crc on label, expected " << expected_crc << " != actual " << crc << dendl; ¦ return -EIO; } dout(10) << __func__ << " got " << *label << dendl; return 0; }

FileStore 获取osd文件系统的OSD uuid

int FileStore::get_block_device_fsid(CephContext* cct, const string& path, uuid_d *fsid) { // make sure we don't try to use aio or direct_io (and get annoying // error messages from failing to do so); performance implications // should be irrelevant for this use FileJournal j(cct, *fsid, 0, 0, path.c_str(), false, false); return j.peek_fsid(*fsid); } // This can not be used on an active journal int FileJournal::peek_fsid(uuid_d& fsid) { assert(fd == -1); int r = _open(false, false); if (r) ¦ return r; r = read_header(&header); if (r < 0) ¦ goto out; fsid = header.fsid; out: close(); return r; } int FileJournal::_open(bool forwrite, bool create) { int flags, ret; if (forwrite) { ¦ flags = O_RDWR; ¦ if (directio) ¦ ¦ flags |= O_DIRECT | O_DSYNC; } else { ¦ flags = O_RDONLY; } if (create) ¦ flags |= O_CREAT; if (fd >= 0) { ¦ if (TEMP_FAILURE_RETRY(::close(fd))) { ¦ ¦ int err = errno; ¦ ¦ derr << "FileJournal::_open: error closing old fd: " ¦ ¦<< cpp_strerror(err) << dendl; ¦ } } //打开日志设备 fd = TEMP_FAILURE_RETRY(::open(fn.c_str(), flags, 0644)); if (fd < 0) { ¦ int err = errno; ¦ dout(2) << "FileJournal::_open unable to open journal " ¦ ¦ << fn << ": " << cpp_strerror(err) << dendl; ¦ return -err; } //获取指定文件的元信息,读取初始化日志文件(或设备)的相关数据(大小,块大小) struct stat st; ret = ::fstat(fd, &st); if (ret) { ¦ ret = errno; ¦ derr << "FileJournal::_open: unable to fstat journal: " << cpp_strerror(ret) << dendl; ¦ ret = -ret; ¦ goto out_fd; } //判断是常规文件还是裸块设备 if (S_ISBLK(st.st_mode)) { ¦ ret = _open_block_device(); } else if (S_ISREG(st.st_mode)) { ¦ if (aio && !force_aio) { ¦ ¦ derr << "FileJournal::_open: disabling aio for non-block journal. Use " ¦ ¦<< "journal_force_aio to force use of aio anyway" << dendl; ¦ ¦ aio = false; ¦ } ¦ ret = _open_file(st.st_size, st.st_blksize, create); } else { ¦ derr << "FileJournal::_open: wrong journal file type: " << st.st_mode ¦<< dendl; ¦ ret = -EINVAL; } if (ret) ¦ goto out_fd; //初始化libaio #ifdef HAVE_LIBAIO if (aio) { ¦ aio_ctx = 0; ¦ ret = io_setup(128, &aio_ctx); ¦ if (ret < 0) { ¦ ¦ switch (ret) { // Contrary to naive expectations -EAGIAN means ... case -EAGAIN: ¦ derr << "FileJournal::_open: user's limit of aio events exceeded. " ¦ ¦ ¦ ¦<< "Try increasing /proc/sys/fs/aio-max-nr" << dendl; ¦ break; default: ¦ derr << "FileJournal::_open: unable to setup io_context " << cpp_strerror(-ret) << dendl; ¦ break; ¦ ¦ } ¦ ¦ goto out_fd; ¦ } } #endif /* We really want max_size to be a multiple of block_size. */ max_size -= max_size % block_size; dout(1) << "_open " << fn << " fd " << fd ¦ << ": " << max_size ¦ << " bytes, block size " << block_size ¦ << " bytes, directio = " << directio ¦ << ", aio = " << aio ¦ << dendl; return 0; out_fd: VOID_TEMP_FAILURE_RETRY(::close(fd)); fd = -1; return ret; }
获取块设备的大小

获取块设备大小,检查是否大于最小日志大小要求。

int FileJournal::_open_block_device() { int64_t bdev_sz = 0; int ret = get_block_device_size(fd, &bdev_sz); if (ret) { ¦ dout(0) << __func__ << ": failed to read block device size." << dendl; ¦ return -EIO; } /* Check for bdev_sz too small */ if (bdev_sz < ONE_MEG) { ¦ dout(0) << __func__ << ": your block device must be at least " ¦ ¦ << ONE_MEG << " bytes to be used for a Ceph journal." << dendl; ¦ return -EINVAL; } dout(10) << __func__ << ": ignoring osd journal size. " ¦ ¦<< "We'll use the entire block device (size: " << bdev_sz << ")" ¦ ¦<< dendl; max_size = bdev_sz; block_size = cct->_conf->journal_block_size; if (cct->_conf->journal_discard) { //获取磁盘对discard的支持(/sys/block/sdb/queue/discard_granularity) ¦ discard = block_device_support_discard(fn.c_str()); ¦ dout(10) << fn << " support discard: " << (int)discard << dendl; } return 0; } //获取块设备的大小 int get_block_device_size(int fd, int64_t *psize) { #ifdef BLKGETSIZE64 int ret = ::ioctl(fd, BLKGETSIZE64, psize); #elif defined(BLKGETSIZE) unsigned long sectors = 0; int ret = ::ioctl(fd, BLKGETSIZE, §ors); *psize = sectors * 512ULL; #else // cppcheck-suppress preprocessorErrorDirective # error "Linux configuration error (get_block_device_size)" #endif if (ret < 0) ret = -errno; return ret; }
记录OSD日志的是一个文件,会使用该方法来打开该日志文件。
int FileJournal::_open_file(int64_t oldsize, blksize_t blksize, bool create) { int ret; //配置日志文件的大小 int64_t conf_journal_sz(cct->_conf->osd_journal_size); conf_journal_sz <<= 20; if ((cct->_conf->osd_journal_size == 0) && (oldsize < ONE_MEG)) { ¦ derr << "I'm sorry, I don't know how large of a journal to create." ¦<< "Please specify a block device to use as the journal OR " ¦<< "set osd_journal_size in your ceph.conf" << dendl; ¦ return -EINVAL; } if (create && (oldsize < conf_journal_sz)) { ¦ uint64_t newsize(conf_journal_sz); ¦ dout(10) << __func__ << " _open extending to " << newsize << " bytes" << dendl; //扩展日志文件大小,但是该方法只分配了虚拟的空间,即没有实际的数据块 ¦ ret = ::ftruncate(fd, newsize); ¦ if (ret < 0) { ¦ ¦ int err = errno; ¦ ¦ derr << "FileJournal::_open_file : unable to extend journal to " ¦ ¦<< newsize << " bytes: " << cpp_strerror(err) << dendl; ¦ ¦ return -err; ¦ } #ifdef HAVE_POSIX_FALLOCATE //为文件分配实际的磁盘空间,以防止磁盘空间不足导致写入失败。 ¦ ret = ::posix_fallocate(fd, 0, newsize); ¦ if (ret) { ¦ ¦ derr << "FileJournal::_open_file : unable to preallocation journal to " ¦ ¦<< newsize << " bytes: " << cpp_strerror(ret) << dendl; ¦ ¦ return -ret; ¦ } ¦ max_size = newsize; #elif defined(__APPLE__) ¦ fstore_t store; ¦ store.fst_flags = F_ALLOCATECONTIG; ¦ store.fst_posmode = F_PEOFPOSMODE; ¦ store.fst_offset = 0; ¦ store.fst_length = newsize; //同上 ¦ ret = ::fcntl(fd, F_PREALLOCATE, &store); ¦ if (ret == -1) { ¦ ¦ ret = -errno; ¦ ¦ derr << "FileJournal::_open_file : unable to preallocation journal to " ¦ ¦<< newsize << " bytes: " << cpp_strerror(ret) << dendl; ¦ ¦ return ret; ¦ } ¦ max_size = newsize; #else # error "Journal pre-allocation not supported on platform." #endif } else { ¦ max_size = oldsize; } block_size = cct->_conf->journal_block_size; //初始化日志空间,通过填充‘0’ if (create && cct->_conf->journal_zero_on_create) { ¦ derr << "FileJournal::_open_file : zeroing journal" << dendl; ¦ uint64_t write_size = 1 << 20; ¦ char *buf; //申请一块block_size内存对其的write_size大小的内存空间。 ¦ ret = ::posix_memalign((void **)&buf, block_size, write_size); ¦ if (ret != 0) { ¦ ¦ return -ret; ¦ } ¦ memset(static_cast<void*>(buf), 0, write_size); ¦ uint64_t i = 0; ¦ for (; (i + write_size) <= (uint64_t)max_size; i += write_size) { ¦ ¦ ret = ::pwrite(fd, static_cast<void*>(buf), write_size, i); ¦ ¦ if (ret < 0) { free(buf); return -errno; ¦ ¦ } ¦ } ¦ if (i < (uint64_t)max_size) { ¦ ¦ ret = ::pwrite(fd, static_cast<void*>(buf), max_size - i, i); ¦ ¦ if (ret < 0) { free(buf); return -errno; ¦ ¦ } ¦ } ¦ free(buf); } dout(10) << "_open journal is not a block device, NOT checking disk " ¦ ¦ ¦ ¦ ¦<< "write cache on '" << fn << "'" << dendl; return 0; }
读取日志的头,该头在日志的第一个块中
int FileJournal::read_header(header_t *hdr) const { dout(10) << "read_header" << dendl; bufferlist bl; buffer::ptr bp = buffer::create_page_aligned(block_size); char* bpdata = bp.c_str(); int r = ::pread(fd, bpdata, bp.length(), 0); if (r < 0) { ¦ int err = errno; ¦ dout(0) << "read_header got " << cpp_strerror(err) << dendl; ¦ return -err; } // don't use bp.zero() here, because it also invalidates // crc cache (which is not yet populated anyway) if (bp.length() != (size_t)r) { ¦ ¦ // r will be always less or equal than bp.length ¦ ¦ bpdata += r; ¦ ¦ memset(bpdata, 0, bp.length() - r); } bl.push_back(std::move(bp)); try { ¦ bufferlist::iterator p = bl.begin(); ¦ ::decode(*hdr, p); } catch (buffer::error& e) { ¦ derr << "read_header error decoding journal header" << dendl; ¦ return -EINVAL; } /* ¦* Unfortunately we weren't initializing the flags field for new ¦* journals! Aie. This is safe(ish) now that we have only one ¦* flag. Probably around when we add the next flag we need to ¦* remove this or else this (eventually old) code will clobber newer ¦* code's flags. ¦*/ if (hdr->flags > 3) { ¦ derr << "read_header appears to have gibberish flags; assuming 0" << dendl; ¦ hdr->flags = 0; } print_header(*hdr); return 0; } void FileJournal::print_header(const header_t &header) const { dout(10) << "header: block_size " << header.block_size ¦ ¦<< " alignment " << header.alignment ¦ ¦<< " max_size " << header.max_size ¦ ¦<< dendl; dout(10) << "header: start " << header.start << dendl; dout(10) << " write_pos " << write_pos << dendl; }
转载请注明原文地址: https://www.6miu.com/read-36725.html

最新回复(0)