前段时间跟同事一块联调某系统时,client发送thrift序列化后的数据,本地打log能正确读到该字段,而server却收不到该字段的值,感到比较诡异,通过修改下读写的方法就ok了,花了一点时间踩了一个小坑,先从业务代码片段,再到源码分析,最后再与protobuf作对应点的简单比较,与大家分享下。
先看下thrift IDL在序列化读写时的用法,以某业务中简化版为例,定义及两种读写方式如下:
struct UrlItem { 1: required string url; 2: optional string referer; } //第一种用法 UrlItem url_item; url_item.url = "http://47.110.236.62/"; url_item.referer = "https://www.google.com.hk/"; log(LOG_NOTICE, "url:%s\treferer:%s\n", url_item.url.c_str(), url_item.referer.c_str()); //第二种用法 url_item.__set_url("http://47.110.236.62/"); url_item.__set_referer("https://www.google.com.hk/"); log(LOG_NOTICE, "url:%s\treferer:%s\n", url_item.url.c_str(), url_item.referer.c_str());
以上省略掉网络发送的过程,从log上看,两种用法本地log均符合预期,但是第一种用法server端却没有收到url_item对应的referer数据,第二种用法却正常,UrlItem实际是一个成员均为public的类,对public成员直接点调用与thrift生成的set方法调用看来是有差异,具体在哪呢?让我们从源码来看:
typedef struct _UrlItem__isset { _UrlItem__isset() : referer(false) {} bool referer; } _UrlItem__isset; class UrlItem { public: static const char* ascii_fingerprint; // = "5B708A954C550ECA9C1A49D3C5CAFAB9"; static const uint8_t binary_fingerprint[16]; // = {0x5B,0x70,0x8A,0x95,0x4C,0x55,0x0E,0xCA,0x9C,0x1A,0x49,0xD3,0xC5,0xCA,0xFA,0xB9}; UrlItem() : url(""), referer("") { } virtual ~UrlItem() throw() {} std::string url; std::string referer; _UrlItem__isset __isset; void __set_url(const std::string& val) { url = val; } void __set_referer(const std::string& val) { referer = val; __isset.referer = true; }
从上面thirft对IDL生成的代码可以看到,用了一个额外的结构体_UrlItem__isset用来标记optional成员的写(赋值)状态,使用__set_XX方法会对__isset对应字段置true,直接点成员赋值却没有该作用。那这样对网络发送有神马作用呢?让我们继续看序列化读写源码:
uint32_t UrlItem::read(::apache::thrift::protocol::TProtocol* iprot) { uint32_t xfer = 0; std::string fname; ::apache::thrift::protocol::TType ftype; int16_t fid; xfer += iprot->readStructBegin(fname); using ::apache::thrift::protocol::TProtocolException; bool isset_url = false; while (true) { xfer += iprot->readFieldBegin(fname, ftype, fid); if (ftype == ::apache::thrift::protocol::T_STOP) { break; } switch (fid) { case 1: if (ftype == ::apache::thrift::protocol::T_STRING) { xfer += iprot->readString(this->url); isset_url = true; } else { xfer += iprot->skip(ftype); } break; case 2: if (ftype == ::apache::thrift::protocol::T_STRING) { xfer += iprot->readString(this->referer); this->__isset.referer = true; } else { xfer += iprot->skip(ftype); } break; default: xfer += iprot->skip(ftype); break; } xfer += iprot->readFieldEnd(); } xfer += iprot->readStructEnd(); if (!isset_url) throw TProtocolException(TProtocolException::INVALID_DATA); return xfer; } uint32_t UrlItem::write(::apache::thrift::protocol::TProtocol* oprot) const { uint32_t xfer = 0; xfer += oprot->writeStructBegin("UrlItem"); xfer += oprot->writeFieldBegin("url", ::apache::thrift::protocol::T_STRING, 1); xfer += oprot->writeString(this->url); xfer += oprot->writeFieldEnd(); if (this->__isset.referer) { xfer += oprot->writeFieldBegin("referer", ::apache::thrift::protocol::T_STRING, 2); xfer += oprot->writeString(this->referer); xfer += oprot->writeFieldEnd(); } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; }
显然,序列化写操作会有对optional成员的写标记进行判断,当通过网络对外发送时会调用write方法,当__isset对应成员没有置写标记为true时该字段也就不会对外发送了,而对于required成员必选字段则不会有此问题,哼,问题就在这里,这样做有神马好处呢?这样能减少网络发送的开销,对于未使用的optional可选字段就没有必要发送了,thrift IDL的实现这里使用结构体对于每个可选成员都用bool变量来标记,这里还是相对比较浪费的,一个字段一个字节呢;而对于反序列化进行读操作会通过字段类型和字段ID去判断,无需担心。因此,当写optional字段时一定要采用__set_XX成员方法的方式!
google的protobuf又是怎么处理的呢?笔者把相关代码片断放一块了:
message UrlItem { required string url = 1; optional string referer = 2; } class UrlItem : public ::google::protobuf::Message { public: UrlItem(); virtual ~UrlItem(); UrlItem(const UrlItem& from); // required string url = 1; inline bool has_url() const; inline void clear_url(); static const int kUrlFieldNumber = 1; inline const ::std::string& url() const; inline void set_url(const ::std::string& value); inline void set_url(const char* value); inline void set_url(const char* value, size_t size); inline ::std::string* mutable_url(); inline ::std::string* release_url(); // optional string referer = 2; inline bool has_referer() const; inline void clear_referer(); static const int kRefererFieldNumber = 2; inline const ::std::string& referer() const; inline void set_referer(const ::std::string& value); inline void set_referer(const char* value); inline void set_referer(const char* value, size_t size); inline ::std::string* mutable_referer(); inline ::std::string* release_referer(); private: inline void set_has_url(); inline void clear_has_url(); inline void set_has_referer(); inline void clear_has_referer(); ::google::protobuf::UnknownFieldSet _unknown_fields_; ::std::string* url_; ::std::string* referer_; mutable int _cached_size_; ::google::protobuf::uint32 _has_bits_[(2 + 31) / 32]; }; // required string url = 1; inline bool UrlItem::has_url() const { return (_has_bits_[0] & 0x00000001u) != 0; } inline void UrlItem::set_has_url() { _has_bits_[0] |= 0x00000001u; } inline void UrlItem::clear_has_url() { _has_bits_[0] &= ~0x00000001u; } inline void UrlItem::clear_url() { if (url_ != &::google::protobuf::internal::kEmptyString) { url_->clear(); } clear_has_url(); } inline const ::std::string& UrlItem::url() const { return *url_; } inline void UrlItem::set_url(const ::std::string& value) { set_has_url(); if (url_ == &::google::protobuf::internal::kEmptyString) { url_ = new ::std::string; } url_->assign(value); } inline void UrlItem::set_url(const char* value) { set_has_url(); if (url_ == &::google::protobuf::internal::kEmptyString) { url_ = new ::std::string; } url_->assign(value); } inline void UrlItem::set_url(const char* value, size_t size) { set_has_url(); if (url_ == &::google::protobuf::internal::kEmptyString) { url_ = new ::std::string; } url_->assign(reinterpret_cast(value), size); } inline ::std::string* UrlItem::mutable_url() { set_has_url(); if (url_ == &::google::protobuf::internal::kEmptyString) { url_ = new ::std::string; } return url_; } inline ::std::string* UrlItem::release_url() { clear_has_url(); if (url_ == &::google::protobuf::internal::kEmptyString) { return NULL; } else { ::std::string* temp = url_; url_ = const_cast< ::std::string*>(&::google::protobuf::internal::kEmptyString); return temp; } } // optional string referer = 2; inline bool UrlItem::has_referer() const { return (_has_bits_[0] & 0x00000002u) != 0; } inline void UrlItem::set_has_referer() { _has_bits_[0] |= 0x00000002u; } inline void UrlItem::clear_has_referer() { _has_bits_[0] &= ~0x00000002u; } inline void UrlItem::clear_referer() { if (referer_ != &::google::protobuf::internal::kEmptyString) { referer_->clear(); } clear_has_referer(); } inline const ::std::string& UrlItem::referer() const { return *referer_; } inline void UrlItem::set_referer(const ::std::string& value) { set_has_referer(); if (referer_ == &::google::protobuf::internal::kEmptyString) { referer_ = new ::std::string; } referer_->assign(value); } inline void UrlItem::set_referer(const char* value) { set_has_referer(); if (referer_ == &::google::protobuf::internal::kEmptyString) { referer_ = new ::std::string; } referer_->assign(value); } inline void UrlItem::set_referer(const char* value, size_t size) { set_has_referer(); if (referer_ == &::google::protobuf::internal::kEmptyString) { referer_ = new ::std::string; } referer_->assign(reinterpret_cast(value), size); } inline ::std::string* UrlItem::mutable_referer() { set_has_referer(); if (referer_ == &::google::protobuf::internal::kEmptyString) { referer_ = new ::std::string; } return referer_; } inline ::std::string* UrlItem::release_referer() { clear_has_referer(); if (referer_ == &::google::protobuf::internal::kEmptyString) { return NULL; } else { ::std::string* temp = referer_; referer_ = const_cast< ::std::string*>(&::google::protobuf::internal::kEmptyString); return temp; } }
google protobuf的实现这里用到标志位比特_has_bits_,相对thrift省却不少空间哦,而且使用了varint可变字节压缩编码,当数据值相对较小时是很节省的(虽然thrift也是用了基于可变字节编码的zigzag,对于负数值更省空间),再者protobuf序列化反序列化等实现上也更高效(比较下与thrift的代码就可看出),如果我有选型的决定权,肯定推崇google protobuf啦~
谢谢你,困扰了一整个下午了…
非常感谢