protozero
Minimalistic protocol buffer decoder and encoder in C++.
pbf_reader.hpp
Go to the documentation of this file.
1 #ifndef PROTOZERO_PBF_READER_HPP
2 #define PROTOZERO_PBF_READER_HPP
3 
4 /*****************************************************************************
5 
6 protozero - Minimalistic protocol buffer decoder and encoder in C++.
7 
8 This file is from https://github.com/mapbox/protozero where you can find more
9 documentation.
10 
11 *****************************************************************************/
12 
19 #include <cstddef>
20 #include <cstdint>
21 #include <string>
22 #include <utility>
23 
24 #include <protozero/config.hpp>
25 #include <protozero/exception.hpp>
26 #include <protozero/iterators.hpp>
27 #include <protozero/types.hpp>
28 #include <protozero/varint.hpp>
29 
30 #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN
31 # include <protozero/byteswap.hpp>
32 #endif
33 
34 namespace protozero {
35 
60 class pbf_reader {
61 
62  // A pointer to the next unread data.
63  const char* m_data = nullptr;
64 
65  // A pointer to one past the end of data.
66  const char* m_end = nullptr;
67 
68  // The wire type of the current field.
69  pbf_wire_type m_wire_type = pbf_wire_type::unknown;
70 
71  // The tag of the current field.
72  pbf_tag_type m_tag = 0;
73 
74  template <typename T>
75  T get_fixed() {
76  T result;
77  skip_bytes(sizeof(T));
78  std::memcpy(&result, m_data - sizeof(T), sizeof(T));
79 #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN
80  detail::byteswap_inplace(&result);
81 #endif
82  return result;
83  }
84 
85  template <typename T>
87  protozero_assert(tag() != 0 && "call next() before accessing field value");
88  const auto len = get_len_and_skip();
89  protozero_assert(len % sizeof(T) == 0);
91  const_fixed_iterator<T>(m_data, m_data)};
92  }
93 
94  template <typename T>
95  T get_varint() {
96  return static_cast<T>(decode_varint(&m_data, m_end));
97  }
98 
99  template <typename T>
100  T get_svarint() {
101  protozero_assert((has_wire_type(pbf_wire_type::varint) || has_wire_type(pbf_wire_type::length_delimited)) && "not a varint");
102  return static_cast<T>(decode_zigzag64(decode_varint(&m_data, m_end)));
103  }
104 
105  pbf_length_type get_length() {
106  return get_varint<pbf_length_type>();
107  }
108 
109  void skip_bytes(pbf_length_type len) {
110  if (m_data + len > m_end) {
111  throw end_of_buffer_exception();
112  }
113  m_data += len;
114 
115  // In debug builds reset the tag to zero so that we can detect (some)
116  // wrong code.
117 #ifndef NDEBUG
118  m_tag = 0;
119 #endif
120  }
121 
122  pbf_length_type get_len_and_skip() {
123  const auto len = get_length();
124  skip_bytes(len);
125  return len;
126  }
127 
128  template <typename T>
129  iterator_range<T> get_packed() {
130  protozero_assert(tag() != 0 && "call next() before accessing field value");
131  const auto len = get_len_and_skip();
132  return iterator_range<T>{T{m_data - len, m_data},
133  T{m_data, m_data}};
134  }
135 
136 public:
137 
148  explicit pbf_reader(const data_view& view) noexcept
149  : m_data(view.data()),
150  m_end(view.data() + view.size()),
151  m_wire_type(pbf_wire_type::unknown),
152  m_tag(0) {
153  }
154 
165  pbf_reader(const char* data, std::size_t size) noexcept
166  : m_data(data),
167  m_end(data + size),
168  m_wire_type(pbf_wire_type::unknown),
169  m_tag(0) {
170  }
171 
182  pbf_reader(const std::pair<const char*, std::size_t>& data) noexcept
183  : m_data(data.first),
184  m_end(data.first + data.second),
185  m_wire_type(pbf_wire_type::unknown),
186  m_tag(0) {
187  }
188 
199  pbf_reader(const std::string& data) noexcept
200  : m_data(data.data()),
201  m_end(data.data() + data.size()),
202  m_wire_type(pbf_wire_type::unknown),
203  m_tag(0) {
204  }
205 
210  pbf_reader() noexcept = default;
211 
213  pbf_reader(const pbf_reader&) noexcept = default;
214 
216  pbf_reader(pbf_reader&&) noexcept = default;
217 
219  pbf_reader& operator=(const pbf_reader& other) noexcept = default;
220 
222  pbf_reader& operator=(pbf_reader&& other) noexcept = default;
223 
224  ~pbf_reader() = default;
225 
231  void swap(pbf_reader& other) noexcept {
232  using std::swap;
233  swap(m_data, other.m_data);
234  swap(m_end, other.m_end);
235  swap(m_wire_type, other.m_wire_type);
236  swap(m_tag, other.m_tag);
237  }
238 
244  operator bool() const noexcept {
245  return m_data < m_end;
246  }
247 
257  std::size_t length() const noexcept {
258  return std::size_t(m_end - m_data);
259  }
260 
276  bool next() {
277  if (m_data == m_end) {
278  return false;
279  }
280 
281  const auto value = get_varint<uint32_t>();
282  m_tag = pbf_tag_type(value >> 3);
283 
284  // tags 0 and 19000 to 19999 are not allowed as per
285  // https://developers.google.com/protocol-buffers/docs/proto
286  protozero_assert(((m_tag > 0 && m_tag < 19000) ||
287  (m_tag > 19999 && m_tag <= ((1 << 29) - 1))) && "tag out of range");
288 
289  m_wire_type = pbf_wire_type(value & 0x07);
290  switch (m_wire_type) {
291  case pbf_wire_type::varint:
292  case pbf_wire_type::fixed64:
293  case pbf_wire_type::length_delimited:
294  case pbf_wire_type::fixed32:
295  break;
296  default:
298  }
299 
300  return true;
301  }
302 
331  bool next(pbf_tag_type next_tag) {
332  while (next()) {
333  if (m_tag == next_tag) {
334  return true;
335  } else {
336  skip();
337  }
338  }
339  return false;
340  }
341 
371  while (next()) {
372  if (m_tag == next_tag && m_wire_type == wire_type) {
373  return true;
374  } else {
375  skip();
376  }
377  }
378  return false;
379  }
380 
390  pbf_tag_type tag() const noexcept {
391  return m_tag;
392  }
393 
409  pbf_wire_type wire_type() const noexcept {
410  return m_wire_type;
411  }
412 
435  uint32_t tag_and_type() const noexcept {
437  }
438 
445  bool has_wire_type(pbf_wire_type type) const noexcept {
446  return wire_type() == type;
447  }
448 
455  void skip() {
456  protozero_assert(tag() != 0 && "call next() before calling skip()");
457  switch (wire_type()) {
458  case pbf_wire_type::varint:
459  skip_varint(&m_data, m_end);
460  break;
461  case pbf_wire_type::fixed64:
462  skip_bytes(8);
463  break;
464  case pbf_wire_type::length_delimited:
465  skip_bytes(get_length());
466  break;
467  case pbf_wire_type::fixed32:
468  skip_bytes(4);
469  break;
470  default:
471  protozero_assert(false && "can not be here because next() should have thrown already");
472  }
473  }
474 
476 
487  bool get_bool() {
488  protozero_assert(tag() != 0 && "call next() before accessing field value");
489  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
490  protozero_assert((*m_data & 0x80) == 0 && "not a 1 byte varint");
491  skip_bytes(1);
492  return m_data[-1] != 0; // -1 okay because we incremented m_data the line before
493  }
494 
502  int32_t get_enum() {
503  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
504  return get_varint<int32_t>();
505  }
506 
514  int32_t get_int32() {
515  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
516  return get_varint<int32_t>();
517  }
518 
526  int32_t get_sint32() {
527  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
528  return get_svarint<int32_t>();
529  }
530 
538  uint32_t get_uint32() {
539  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
540  return get_varint<uint32_t>();
541  }
542 
550  int64_t get_int64() {
551  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
552  return get_varint<int64_t>();
553  }
554 
562  int64_t get_sint64() {
563  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
564  return get_svarint<int64_t>();
565  }
566 
574  uint64_t get_uint64() {
575  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
576  return get_varint<uint64_t>();
577  }
578 
586  uint32_t get_fixed32() {
587  protozero_assert(tag() != 0 && "call next() before accessing field value");
588  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
589  return get_fixed<uint32_t>();
590  }
591 
599  int32_t get_sfixed32() {
600  protozero_assert(tag() != 0 && "call next() before accessing field value");
601  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
602  return get_fixed<int32_t>();
603  }
604 
612  uint64_t get_fixed64() {
613  protozero_assert(tag() != 0 && "call next() before accessing field value");
614  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
615  return get_fixed<uint64_t>();
616  }
617 
625  int64_t get_sfixed64() {
626  protozero_assert(tag() != 0 && "call next() before accessing field value");
627  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
628  return get_fixed<int64_t>();
629  }
630 
638  float get_float() {
639  protozero_assert(tag() != 0 && "call next() before accessing field value");
640  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
641  return get_fixed<float>();
642  }
643 
651  double get_double() {
652  protozero_assert(tag() != 0 && "call next() before accessing field value");
653  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
654  return get_fixed<double>();
655  }
656 
667  protozero_assert(tag() != 0 && "call next() before accessing field value");
668  protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message");
669  const auto len = get_len_and_skip();
670  return data_view{m_data - len, len};
671  }
672 
673 #ifndef PROTOZERO_STRICT_API
674 
682  std::pair<const char*, pbf_length_type> get_data() {
683  protozero_assert(tag() != 0 && "call next() before accessing field value");
684  protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message");
685  const auto len = get_len_and_skip();
686  return std::make_pair(m_data - len, len);
687  }
688 #endif
689 
697  std::string get_bytes() {
698  return std::string(get_view());
699  }
700 
708  std::string get_string() {
709  return std::string(get_view());
710  }
711 
720  return pbf_reader(get_view());
721  }
722 
724 
727 
730 
733 
736 
739 
742 
745 
748 
750 
764  return get_packed<pbf_reader::const_bool_iterator>();
765  }
766 
777  return get_packed<pbf_reader::const_enum_iterator>();
778  }
779 
790  return get_packed<pbf_reader::const_int32_iterator>();
791  }
792 
803  return get_packed<pbf_reader::const_sint32_iterator>();
804  }
805 
816  return get_packed<pbf_reader::const_uint32_iterator>();
817  }
818 
829  return get_packed<pbf_reader::const_int64_iterator>();
830  }
831 
842  return get_packed<pbf_reader::const_sint64_iterator>();
843  }
844 
855  return get_packed<pbf_reader::const_uint64_iterator>();
856  }
857 
867  auto get_packed_fixed32() -> decltype(packed_fixed<uint32_t>()) {
868  return packed_fixed<uint32_t>();
869  }
870 
880  auto get_packed_sfixed32() -> decltype(packed_fixed<int32_t>()) {
881  return packed_fixed<int32_t>();
882  }
883