每做一次Compaction就会在当前版本的基础上生成新的版本,代表当前数据库内存数据和磁盘文件的存活状态,版本的变化以VersionEdit增量的形式写入MANIFEST文件,数据库恢复时也基于此进行重建。
先来看Version的定义:
class Version { public: // Append to *iters a sequence of iterators that will // yield the contents of this Version when merged together. // REQUIRES: This version has been saved (see VersionSet::SaveTo) void AddIterators(const ReadOptions&, std::vector<Iterator*>* iters); // Lookup the value for key. If found, store it in *val and // return OK. Else return a non-OK status. Fills *stats. // REQUIRES: lock is not held struct GetStats { FileMetaData* seek_file; int seek_file_level; }; Status Get(const ReadOptions&, const LookupKey& key, std::string* val, GetStats* stats); // Adds "stats" into the current state. Returns true if a new // compaction may need to be triggered, false otherwise. // REQUIRES: lock is held bool UpdateStats(const GetStats& stats); // Record a sample of bytes read at the specified internal key. // Samples are taken approximately once every config::kReadBytesPeriod // bytes. Returns true if a new compaction may need to be triggered. // REQUIRES: lock is held bool RecordReadSample(Slice key); // Reference count management (so Versions do not disappear out from // under live iterators) void Ref(); void Unref(); void GetOverlappingInputs( int level, const InternalKey* begin, // NULL means before all keys const InternalKey* end, // NULL means after all keys std::vector<FileMetaData*>* inputs); // Returns true iff some file in the specified level overlaps // some part of [*smallest_user_key,*largest_user_key]. // smallest_user_key==NULL represents a key smaller than all keys in the DB. // largest_user_key==NULL represents a key largest than all keys in the DB. bool OverlapInLevel(int level, const Slice* smallest_user_key, const Slice* largest_user_key); // Return the level at which we should place a new memtable compaction // result that covers the range [smallest_user_key,largest_user_key]. int PickLevelForMemTableOutput(const Slice& smallest_user_key, const Slice& largest_user_key); int NumFiles(int level) const { return files_[level].size(); } // Return a human readable string that describes this version's contents. std::string DebugString() const; private: friend class Compaction; friend class VersionSet; class LevelFileNumIterator; Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const; // Call func(arg, level, f) for every file that overlaps user_key in // order from newest to oldest. If an invocation of func returns // false, makes no more calls. // // REQUIRES: user portion of internal_key == user_key. void ForEachOverlapping(Slice user_key, Slice internal_key, void* arg, bool (*func)(void*, int, FileMetaData*)); VersionSet* vset_; // VersionSet to which this Version belongs Version* next_; // Next version in linked list Version* prev_; // Previous version in linked list int refs_; // Number of live refs to this version // List of files per level std::vector<FileMetaData*> files_[config::kNumLevels]; // Next file to compact based on seek stats. FileMetaData* file_to_compact_; int file_to_compact_level_; // Level that should be compacted next and its compaction score. // Score < 1 means compaction is not strictly needed. These fields // are initialized by Finalize(). double compaction_score_; int compaction_level_; explicit Version(VersionSet* vset) : vset_(vset), next_(this), prev_(this), refs_(0), file_to_compact_(NULL), file_to_compact_level_(-1), compaction_score_(-1), compaction_level_(-1) { } ~Version(); // No copying allowed Version(const Version&); void operator=(const Version&); };
不同版本Version在VersionSet内部以双向链表(含表头节点)的形式组织,current_始终指向当前最新版本。
class VersionSet { public: VersionSet(const std::string& dbname, const Options* options, TableCache* table_cache, const InternalKeyComparator*); ~VersionSet(); // Apply *edit to the current version to form a new descriptor that // is both saved to persistent state and installed as the new // current version. Will release *mu while actually writing to the file. // REQUIRES: *mu is held on entry. // REQUIRES: no other thread concurrently calls LogAndApply() Status LogAndApply(VersionEdit* edit, port::Mutex* mu) EXCLUSIVE_LOCKS_REQUIRED(mu); // Recover the last saved descriptor from persistent storage. Status Recover(bool *save_manifest); // Return the current version. Version* current() const { return current_; } // Return the current manifest file number uint64_t ManifestFileNumber() const { return manifest_file_number_; } // Allocate and return a new file number uint64_t NewFileNumber() { return next_file_number_++; } // Arrange to reuse "file_number" unless a newer file number has // already been allocated. // REQUIRES: "file_number" was returned by a call to NewFileNumber(). void ReuseFileNumber(uint64_t file_number) { if (next_file_number_ == file_number + 1) { next_file_number_ = file_number; } } // Return the number of Table files at the specified level. int NumLevelFiles(int level) const; // Return the combined file size of all files at the specified level. int64_t NumLevelBytes(int level) const; // Return the last sequence number. uint64_t LastSequence() const { return last_sequence_; } // Set the last sequence number to s. void SetLastSequence(uint64_t s) { assert(s >= last_sequence_); last_sequence_ = s; } // Mark the specified file number as used. void MarkFileNumberUsed(uint64_t number); // Return the current log file number. uint64_t LogNumber() const { return log_number_; } // Return the log file number for the log file that is currently // being compacted, or zero if there is no such log file. uint64_t PrevLogNumber() const { return prev_log_number_; } // Pick level and inputs for a new compaction. // Returns NULL if there is no compaction to be done. // Otherwise returns a pointer to a heap-allocated object that // describes the compaction. Caller should delete the result. Compaction* PickCompaction(); // Return a compaction object for compacting the range [begin,end] in // the specified level. Returns NULL if there is nothing in that // level that overlaps the specified range. Caller should delete // the result. Compaction* CompactRange( int level, const InternalKey* begin, const InternalKey* end); // Return the maximum overlapping data (in bytes) at next level for any // file at a level >= 1. int64_t MaxNextLevelOverlappingBytes(); // Create an iterator that reads over the compaction inputs for "*c". // The caller should delete the iterator when no longer needed. Iterator* MakeInputIterator(Compaction* c); // Returns true iff some level needs a compaction. bool NeedsCompaction() const { Version* v = current_; return (v->compaction_score_ >= 1) || (v->file_to_compact_ != NULL); } // Add all files listed in any live version to *live. // May also mutate some internal state. void AddLiveFiles(std::set* live); // Return the approximate offset in the database of the data for // "key" as of version "v". uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key); // Return a human-readable short (single-line) summary of the number // of files per level. Uses *scratch as backing store. struct LevelSummaryStorage { char buffer[100]; }; const char* LevelSummary(LevelSummaryStorage* scratch) const; private: class Builder; friend class Compaction; friend class Version; bool ReuseManifest(const std::string& dscname, const std::string& dscbase); void Finalize(Version* v); void GetRange(const std::vector<FileMetaData*>& inputs, InternalKey* smallest, InternalKey* largest); void GetRange2(const std::vector<FileMetaData*>& inputs1, const std::vector<FileMetaData*>& inputs2, InternalKey* smallest, InternalKey* largest); void SetupOtherInputs(Compaction* c); // Save current contents to *log Status WriteSnapshot(log::Writer* log); void AppendVersion(Version* v); Env* const env_; const std::string dbname_; const Options* const options_; TableCache* const table_cache_; const InternalKeyComparator icmp_; uint64_t next_file_number_; uint64_t manifest_file_number_; uint64_t last_sequence_; uint64_t log_number_; uint64_t prev_log_number_; // 0 or backing store for memtable being compacted // Opened lazily WritableFile* descriptor_file_; log::Writer* descriptor_log_; Version dummy_versions_; // Head of circular doubly-linked list of versions. Version* current_; // == dummy_versions_.prev_ // Per-level key at which the next compaction at that level should start. // Either an empty string, or a valid InternalKey. std::string compact_pointer_[config::kNumLevels]; // No copying allowed VersionSet(const VersionSet&); void operator=(const VersionSet&); };
VersionSet则记录DB的全部版本数据,Compaction状态,全局信息等。
class VersionEdit { public: VersionEdit() { Clear(); } ~VersionEdit() { } void Clear(); void SetComparatorName(const Slice& name) { has_comparator_ = true; comparator_ = name.ToString(); } void SetLogNumber(uint64_t num) { has_log_number_ = true; log_number_ = num; } void SetPrevLogNumber(uint64_t num) { has_prev_log_number_ = true; prev_log_number_ = num; } void SetNextFile(uint64_t num) { has_next_file_number_ = true; next_file_number_ = num; } void SetLastSequence(SequenceNumber seq) { has_last_sequence_ = true; last_sequence_ = seq; } void SetCompactPointer(int level, const InternalKey& key) { compact_pointers_.push_back(std::make_pair(level, key)); } // Add the specified file at the specified number. // REQUIRES: This version has not been saved (see VersionSet::SaveTo) // REQUIRES: "smallest" and "largest" are smallest and largest keys in file void AddFile(int level, uint64_t file, uint64_t file_size, const InternalKey& smallest, const InternalKey& largest) { FileMetaData f; f.number = file; f.file_size = file_size; f.smallest = smallest; f.largest = largest; new_files_.push_back(std::make_pair(level, f)); } // Delete the specified "file" from the specified "level". void DeleteFile(int level, uint64_t file) { deleted_files_.insert(std::make_pair(level, file)); } void EncodeTo(std::string* dst) const; Status DecodeFrom(const Slice& src); std::string DebugString() const; private: friend class VersionSet; typedef std::set< std::pair<int, uint64_t> > DeletedFileSet; std::string comparator_; uint64_t log_number_; uint64_t prev_log_number_; uint64_t next_file_number_; SequenceNumber last_sequence_; bool has_comparator_; bool has_log_number_; bool has_prev_log_number_; bool has_next_file_number_; bool has_last_sequence_; std::vector< std::pair<int, InternalKey> > compact_pointers_; DeletedFileSet deleted_files_; std::vector< std::pair<int, FileMetaData> > new_files_; };
VersionEdit记录SSTable文件的增删信息,版本变化,CompactPointer数据等,以序列化到MANIFEST文件的形式存在,序列化的格式比较简单,不再赘述。
Version可看作文件级别的快照,而Snapshot是记录级别。