// Copyright 2017 Daniel Parker // Distributed under the Boost license, Version 1.0. // (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // See https://github.com/danielaparker/jsoncons for latest version #ifndef JSONCONS_BSON_BSON_PARSER_HPP #define JSONCONS_BSON_BSON_PARSER_HPP #include #include #include #include // std::move #include #include #include #include #include #include #include namespace jsoncons { namespace bson { enum class parse_mode {root,before_done,document,array,value}; struct parse_state { parse_mode mode; std::size_t length; uint8_t type; std::size_t index; parse_state(parse_mode mode, std::size_t length, uint8_t type = 0) : mode(mode), length(length), type(type), index(0) { } parse_state(const parse_state&) = default; parse_state(parse_state&&) = default; }; template > class basic_bson_parser : public ser_context { using char_type = char; using char_traits_type = std::char_traits; using temp_allocator_type = Allocator; using char_allocator_type = typename std::allocator_traits:: template rebind_alloc; using byte_allocator_type = typename std::allocator_traits:: template rebind_alloc; using parse_state_allocator_type = typename std::allocator_traits:: template rebind_alloc; Src source_; bson_decode_options options_; bool more_; bool done_; std::basic_string,char_allocator_type> text_buffer_; std::vector state_stack_; int nesting_depth_; public: template basic_bson_parser(Source&& source, const bson_decode_options& options = bson_decode_options(), const Allocator alloc = Allocator()) : source_(std::forward(source)), options_(options), more_(true), done_(false), text_buffer_(alloc), state_stack_(alloc), nesting_depth_(0) { state_stack_.emplace_back(parse_mode::root,0); } void restart() { more_ = true; } void reset() { state_stack_.clear(); state_stack_.emplace_back(parse_mode::root,0); more_ = true; done_ = false; } bool done() const { return done_; } bool stopped() const { return !more_; } std::size_t line() const override { return 0; } std::size_t column() const override { return source_.position(); } void parse(json_visitor& visitor, std::error_code& ec) { if (source_.is_error()) { ec = bson_errc::source_error; return; } while (!done_ && more_) { switch (state_stack_.back().mode) { case parse_mode::root: state_stack_.back().mode = parse_mode::before_done; begin_document(visitor, ec); break; case parse_mode::document: { uint8_t t{}; if (source_.get(t) == 0) { ec = bson_errc::unexpected_eof; return; } if (t != 0x00) { read_e_name(visitor,jsoncons::bson::detail::bson_container_type::document,ec); state_stack_.back().mode = parse_mode::value; state_stack_.back().type = t; } else { end_document(visitor,ec); } break; } case parse_mode::array: { uint8_t t{}; if (source_.get(t) == 0) { ec = bson_errc::unexpected_eof; return; } if (t != 0x00) { read_e_name(visitor,jsoncons::bson::detail::bson_container_type::array,ec); read_value(visitor,t,ec); } else { end_array(visitor,ec); } break; } case parse_mode::value: state_stack_.back().mode = parse_mode::document; read_value(visitor,state_stack_.back().type,ec); break; case parse_mode::before_done: { JSONCONS_ASSERT(state_stack_.size() == 1); state_stack_.clear(); more_ = false; done_ = true; visitor.flush(); break; } } } } private: void begin_document(json_visitor& visitor, std::error_code& ec) { if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) { ec = bson_errc::max_nesting_depth_exceeded; return; } uint8_t buf[sizeof(int32_t)]; if (source_.read(buf, sizeof(int32_t)) != sizeof(int32_t)) { ec = bson_errc::unexpected_eof; return; } const uint8_t* endp; auto length = jsoncons::detail::little_to_native(buf, buf+sizeof(int32_t),&endp); more_ = visitor.begin_object(semantic_tag::none, *this, ec); state_stack_.emplace_back(parse_mode::document,length); } void end_document(json_visitor& visitor, std::error_code& ec) { --nesting_depth_; more_ = visitor.end_object(*this,ec); state_stack_.pop_back(); } void begin_array(json_visitor& visitor, std::error_code& ec) { if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) { ec = bson_errc::max_nesting_depth_exceeded; return; } uint8_t buf[sizeof(int32_t)]; if (source_.read(buf, sizeof(int32_t)) != sizeof(int32_t)) { ec = bson_errc::unexpected_eof; return; } const uint8_t* endp; /* auto len = */ jsoncons::detail::little_to_native(buf, buf+sizeof(int32_t),&endp); more_ = visitor.begin_array(semantic_tag::none, *this, ec); state_stack_.emplace_back(parse_mode::array,0); } void end_array(json_visitor& visitor, std::error_code& ec) { --nesting_depth_; more_ = visitor.end_array(*this, ec); state_stack_.pop_back(); } void read_e_name(json_visitor& visitor, jsoncons::bson::detail::bson_container_type type, std::error_code& ec) { text_buffer_.clear(); uint8_t c{}; while (source_.get(c) > 0 && c != 0) { text_buffer_.push_back(c); } if (type == jsoncons::bson::detail::bson_container_type::document) { auto result = unicons::validate(text_buffer_.begin(),text_buffer_.end()); if (result.ec != unicons::conv_errc()) { ec = bson_errc::invalid_utf8_text_string; return; } more_ = visitor.key(basic_string_view(text_buffer_.data(),text_buffer_.length()), *this, ec); } } void read_value(json_visitor& visitor, uint8_t type, std::error_code& ec) { switch (type) { case jsoncons::bson::detail::bson_format::double_cd: { uint8_t buf[sizeof(double)]; if (source_.read(buf, sizeof(double)) != sizeof(double)) { ec = bson_errc::unexpected_eof; return; } const uint8_t* endp; double res = jsoncons::detail::little_to_native(buf,buf+sizeof(buf),&endp); more_ = visitor.double_value(res, semantic_tag::none, *this, ec); break; } case jsoncons::bson::detail::bson_format::string_cd: { uint8_t buf[sizeof(int32_t)]; if (source_.read(buf, sizeof(int32_t)) != sizeof(int32_t)) { ec = bson_errc::unexpected_eof; return; } const uint8_t* endp; auto len = jsoncons::detail::little_to_native(buf, buf+sizeof(buf),&endp); if (len < 1) { ec = bson_errc::string_length_is_non_positive; return; } std::vector s; std::size_t size = static_cast(len-1); if (source_reader::read(source_,s,size) != size) { ec = bson_errc::unexpected_eof; return; } uint8_t c{}; if (source_.get(c) == 0) // discard 0 { ec = bson_errc::unexpected_eof; return; } auto result = unicons::validate(s.begin(),s.end()); if (result.ec != unicons::conv_errc()) { ec = bson_errc::invalid_utf8_text_string; return; } more_ = visitor.string_value(basic_string_view(s.data(),s.size()), semantic_tag::none, *this, ec); break; } case jsoncons::bson::detail::bson_format::document_cd: { begin_document(visitor,ec); break; } case jsoncons::bson::detail::bson_format::array_cd: { begin_array(visitor,ec); break; } case jsoncons::bson::detail::bson_format::null_cd: { more_ = visitor.null_value(semantic_tag::none, *this, ec); break; } case jsoncons::bson::detail::bson_format::bool_cd: { uint8_t val{}; if (source_.get(val) == 0) { ec = bson_errc::unexpected_eof; return; } more_ = visitor.bool_value(val != 0, semantic_tag::none, *this, ec); break; } case jsoncons::bson::detail::bson_format::int32_cd: { uint8_t buf[sizeof(int32_t)]; if (source_.read(buf, sizeof(int32_t)) != sizeof(int32_t)) { ec = bson_errc::unexpected_eof; return; } const uint8_t* endp; auto val = jsoncons::detail::little_to_native(buf, buf+sizeof(int32_t),&endp); more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); break; } case jsoncons::bson::detail::bson_format::timestamp_cd: { uint8_t buf[sizeof(uint64_t)]; if (source_.read(buf, sizeof(uint64_t)) != sizeof(uint64_t)) { ec = bson_errc::unexpected_eof; return; } const uint8_t* endp; auto val = jsoncons::detail::little_to_native(buf, buf+sizeof(uint64_t),&endp); more_ = visitor.uint64_value(val, semantic_tag::timestamp, *this, ec); break; } case jsoncons::bson::detail::bson_format::int64_cd: { uint8_t buf[sizeof(int64_t)]; if (source_.read(buf, sizeof(int64_t)) != sizeof(int64_t)) { ec = bson_errc::unexpected_eof; return; } const uint8_t* endp; auto val = jsoncons::detail::little_to_native(buf, buf+sizeof(int64_t),&endp); more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); break; } case jsoncons::bson::detail::bson_format::datetime_cd: { uint8_t buf[sizeof(int64_t)]; if (source_.read(buf, sizeof(int64_t)) != sizeof(int64_t)) { ec = bson_errc::unexpected_eof; return; } const uint8_t* endp; auto val = jsoncons::detail::little_to_native(buf, buf+sizeof(int64_t),&endp); more_ = visitor.int64_value(val, semantic_tag::timestamp, *this, ec); break; } case jsoncons::bson::detail::bson_format::binary_cd: { uint8_t buf[sizeof(int32_t)]; if (source_.read(buf, sizeof(int32_t)) != sizeof(int32_t)) { ec = bson_errc::unexpected_eof; return; } const uint8_t* endp; const auto len = jsoncons::detail::little_to_native(buf, buf+sizeof(int32_t),&endp); if (len < 0) { ec = bson_errc::length_is_negative; return; } std::vector v; if (source_reader::read(source_, v, len) != static_cast(len)) { ec = bson_errc::unexpected_eof; return; } more_ = visitor.byte_string_value(byte_string_view(v.data(),v.size()), semantic_tag::none, *this, ec); break; } default: { ec = bson_errc::unknown_type; return; } } } }; }} #endif