diff --git a/include/fkYAML/detail/conversions/from_node.hpp b/include/fkYAML/detail/conversions/from_node.hpp index d4555b60..8de5b41b 100644 --- a/include/fkYAML/detail/conversions/from_node.hpp +++ b/include/fkYAML/detail/conversions/from_node.hpp @@ -362,31 +362,39 @@ struct from_node_int_helper { } }; -/// @brief Helper struct for node-to-int conversion if IntType is not the node's integer value type. +/// @brief Partial specialization for uint64_t when integer_type != uint64_t (the common int64_t case). +/// This must be declared BEFORE the generic specialization so the compiler always +/// prefers it for uint64_t. Using a hardcoded 'false' (not a value-dependent expression) avoids +/// the MSVC ambiguity that arises when std::is_same<...>::value is used as a template argument. /// @tparam BasicNodeType A basic_node template instance type. -/// @tparam IntType Target integer value type (different from BasicNodeType::integer_type) +template +struct from_node_int_helper { + /// @brief Convert node's integer value to uint64_t via as_uint(). + /// @param n A node object. + /// @return The node value as uint64_t. + static uint64_t convert(const BasicNodeType& n) { + return n.as_uint(); + } +}; + +/// @brief Helper struct for node-to-int conversion if IntType is not the node's integer value type +/// and IntType is not uint64_t (covered by the explicit specialization above). +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam IntType Target integer value type (different from BasicNodeType::integer_type, not uint64_t) template struct from_node_int_helper { - /// @brief Convert node's integer value to non-uint64_t integer types. + /// @brief Convert node's integer value to a narrower signed/unsigned integer type. /// @param n A node object. /// @return An integer value converted from the node's integer value. static IntType convert(const BasicNodeType& n) { using node_int_type = typename BasicNodeType::integer_type; const node_int_type tmp_int = n.as_int(); - // under/overflow check. - if (std::is_same::value) { - if FK_YAML_UNLIKELY (tmp_int < 0) { - throw exception("Integer value underflow detected."); - } + if FK_YAML_UNLIKELY (tmp_int < static_cast(std::numeric_limits::min())) { + throw exception("Integer value underflow detected."); } - else { - if FK_YAML_UNLIKELY (tmp_int < static_cast(std::numeric_limits::min())) { - throw exception("Integer value underflow detected."); - } - if FK_YAML_UNLIKELY (static_cast(std::numeric_limits::max()) < tmp_int) { - throw exception("Integer value overflow detected."); - } + if FK_YAML_UNLIKELY (static_cast(std::numeric_limits::max()) < tmp_int) { + throw exception("Integer value overflow detected."); } return static_cast(tmp_int); diff --git a/include/fkYAML/detail/conversions/to_node.hpp b/include/fkYAML/detail/conversions/to_node.hpp index bdc06c1b..5af0ef21 100644 --- a/include/fkYAML/detail/conversions/to_node.hpp +++ b/include/fkYAML/detail/conversions/to_node.hpp @@ -65,6 +65,16 @@ struct external_node_constructor { n.m_value.integer = i; } + /// @brief Constructs an INTEGER node from a uint64_t value that exceeds the signed range. + /// The raw bit pattern is stored in the integer field and the uint_bit flag is set so that + /// get_value() / as_uint() can recover the original unsigned value. + static void unsigned_integer_scalar(BasicNodeType& n, const typename BasicNodeType::integer_type i) { + destroy(n); + n.m_attrs |= node_attr_bits::int_bit; + n.m_attrs |= node_attr_bits::uint_bit; + n.m_value.integer = i; + } + static void float_scalar(BasicNodeType& n, const typename BasicNodeType::float_number_type f) { destroy(n); n.m_attrs |= node_attr_bits::float_bit; @@ -81,7 +91,9 @@ struct external_node_constructor { private: static void destroy(BasicNodeType& n) { n.m_value.destroy(n.m_attrs & node_attr_mask::value); - n.m_attrs &= ~node_attr_mask::value; + // Clear both the value-type bits and the uint_bit style flag so that any + // subsequent reassignment starts from a clean state. + n.m_attrs &= ~(node_attr_mask::value | node_attr_bits::uint_bit); } }; diff --git a/include/fkYAML/detail/input/scalar_parser.hpp b/include/fkYAML/detail/input/scalar_parser.hpp index 06e761b4..adac874d 100644 --- a/include/fkYAML/detail/input/scalar_parser.hpp +++ b/include/fkYAML/detail/input/scalar_parser.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -534,6 +535,24 @@ class scalar_parser { if FK_YAML_LIKELY (converted) { return basic_node_type(integer); } + + // For untagged plain integer scalars, attempt a uint64_t parse to handle large + // positive values that exceed int64_t max (e.g. xxHash/UUID results like + // 15745692345339290292). This only applies when integer_type is a signed 64-bit + // type; any other width would not be able to represent the value anyway. + if (tag_type != tag_t::INTEGER && std::is_signed::value && + sizeof(integer_type) == sizeof(uint64_t)) { + uint64_t u64 = 0; + if (detail::atoi(token.begin(), token.end(), u64)) { + basic_node_type node; + // Store the bit pattern in the signed field and set uint_bit so that + // as_uint() / get_value() can recover the correct value. + detail::external_node_constructor::unsigned_integer_scalar( + node, static_cast(u64)); + return node; + } + } + if FK_YAML_UNLIKELY (tag_type == tag_t::INTEGER) { throw parse_error("Failed to convert a scalar to an integer.", m_line, m_indent); } diff --git a/include/fkYAML/detail/node_attrs.hpp b/include/fkYAML/detail/node_attrs.hpp index ddf84ed2..ecc70aba 100644 --- a/include/fkYAML/detail/node_attrs.hpp +++ b/include/fkYAML/detail/node_attrs.hpp @@ -59,6 +59,11 @@ constexpr node_attr_t string_bit = 1u << 6; /// A utility bit set to filter scalar node bits. constexpr node_attr_t scalar_bits = null_bit | bool_bit | int_bit | float_bit | string_bit; +/// The unsigned integer flag bit. +/// Set on INTEGER nodes whose stored int64_t value represents a uint64_t that exceeds INT64_MAX. +/// This allows values such as xxHash/UUID results to round-trip correctly through get_value(). +constexpr node_attr_t uint_bit = 1u << 16; // lives in the style bits area (0x00FF0000) + /// The anchor node bit. constexpr node_attr_t anchor_bit = 0x01000000u; /// The alias node bit. diff --git a/include/fkYAML/node.hpp b/include/fkYAML/node.hpp index c5e7a3d5..43d70034 100644 --- a/include/fkYAML/node.hpp +++ b/include/fkYAML/node.hpp @@ -1472,6 +1472,36 @@ class basic_node { throw fkyaml::type_error("The node value is not a boolean.", get_type()); } + /// @brief Checks if the node is an integer that was parsed from a uint64_t value exceeding INT64_MAX. + /// @return true if the node holds an unsigned integer, false otherwise. + bool is_uint() const noexcept { + return resolve_reference().is_uint_impl(); + } + + /// @brief Returns the integer node value as an unsigned 64-bit integer. + /// This is valid both for nodes where integer_type is unsigned and for nodes where a large + /// positive decimal scalar (> INT64_MAX) was stored with the uint_bit flag set. + /// @throw fkyaml::type_error if the node is not a compatible integer. + /// @return The node value as uint64_t. + uint64_t as_uint() const { + const basic_node& act_node = resolve_reference(); + if FK_YAML_LIKELY (act_node.is_integer_impl()) { + // When integer_type is unsigned the stored value IS the uint64_t directly. + if (std::is_unsigned::value) { + return static_cast(act_node.m_value.integer); + } + // When integer_type is signed, only uint_bit-marked nodes carry a uint64_t. + if (act_node.m_attrs & detail::node_attr_bits::uint_bit) { + return static_cast(act_node.m_value.integer); + } + // Signed values in the non-negative range can be returned safely. + if (act_node.m_value.integer >= static_cast(0)) { + return static_cast(act_node.m_value.integer); + } + } + throw fkyaml::type_error("The node value cannot be represented as an unsigned integer.", get_type()); + } + /// @brief Returns reference to the integer node value. /// @throw fkyaml::type_error The node value is not an integer. /// @return Reference to the integer node value. @@ -1479,18 +1509,30 @@ class basic_node { integer_type& as_int() { basic_node& act_node = resolve_reference(); if FK_YAML_LIKELY (act_node.is_integer_impl()) { + if FK_YAML_UNLIKELY (act_node.is_uint_impl()) { + throw fkyaml::type_error( + "The integer value exceeds INT64_MAX and cannot be returned as a signed integer. " + "Use as_uint() instead.", + get_type()); + } return act_node.m_value.integer; } throw fkyaml::type_error("The node value is not an integer.", get_type()); } /// @brief Returns reference to the integer node value. - /// @throw fkyaml::type_error The node value is not an integer. + /// @throw fkyaml::type_error The node value is not an integer, or exceeds INT64_MAX. /// @return Constant reference to the integer node value. /// @sa https://fktn-k.github.io/fkYAML/api/basic_node/as_int/ const integer_type& as_int() const { const basic_node& act_node = resolve_reference(); if FK_YAML_LIKELY (act_node.is_integer_impl()) { + if FK_YAML_UNLIKELY (act_node.is_uint_impl()) { + throw fkyaml::type_error( + "The integer value exceeds INT64_MAX and cannot be returned as a signed integer. " + "Use as_uint() instead.", + get_type()); + } return act_node.m_value.integer; } throw fkyaml::type_error("The node value is not an integer.", get_type()); @@ -1768,6 +1810,12 @@ class basic_node { return m_attrs & detail::node_attr_bits::int_bit; } + bool is_uint_impl() const noexcept { + // Both int_bit and uint_bit must be set: this node stores a uint64_t value + // whose bit pattern was placed into the signed integer_type field. + return (m_attrs & detail::node_attr_bits::int_bit) && (m_attrs & detail::node_attr_bits::uint_bit); + } + bool is_float_number_impl() const noexcept { return m_attrs & detail::node_attr_bits::float_bit; } diff --git a/single_include/fkYAML/node.hpp b/single_include/fkYAML/node.hpp index 05e79572..7753c6fd 100644 --- a/single_include/fkYAML/node.hpp +++ b/single_include/fkYAML/node.hpp @@ -5356,7 +5356,7 @@ FK_YAML_DETAIL_NAMESPACE_END #endif /* FK_YAML_CONVERSIONS_SCALAR_CONV_HPP */ -// #include +// #include // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library // | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 @@ -5365,1295 +5365,830 @@ FK_YAML_DETAIL_NAMESPACE_END // SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani // SPDX-License-Identifier: MIT -#ifndef FK_YAML_DETAIL_ENCODINGS_YAML_ESCAPER_HPP -#define FK_YAML_DETAIL_ENCODINGS_YAML_ESCAPER_HPP +#ifndef FK_YAML_DETAIL_CONVERSIONS_TO_NODE_HPP +#define FK_YAML_DETAIL_CONVERSIONS_TO_NODE_HPP -#include +#include // #include -// #include +// #include -// #include +// #include -// #include +// #include + +// #include + +// #include +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library +// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani +// SPDX-License-Identifier: MIT + +#ifndef FK_YAML_DETAIL_NODE_ATTRS_HPP +#define FK_YAML_DETAIL_NODE_ATTRS_HPP + +#include +#include + +// #include + +// #include FK_YAML_DETAIL_NAMESPACE_BEGIN -class yaml_escaper { - using iterator = ::std::string::const_iterator; +/// @brief The type for node attribute bits. +using node_attr_t = uint32_t; -public: - static bool unescape(const char*& begin, const char* end, std::string& buff) { - FK_YAML_ASSERT(*begin == '\\' && std::distance(begin, end) > 0); - bool ret = true; +/// @brief The namespace to define bit masks for node attribute bits. +namespace node_attr_mask { - switch (*++begin) { - case 'a': - buff.push_back('\a'); - break; - case 'b': - buff.push_back('\b'); - break; - case 't': - case '\t': - buff.push_back('\t'); - break; - case 'n': - buff.push_back('\n'); - break; - case 'v': - buff.push_back('\v'); - break; - case 'f': - buff.push_back('\f'); - break; - case 'r': - buff.push_back('\r'); - break; - case 'e': - buff.push_back(static_cast(0x1B)); - break; - case ' ': - buff.push_back(' '); - break; - case '\"': - buff.push_back('\"'); - break; - case '/': - buff.push_back('/'); - break; - case '\\': - buff.push_back('\\'); - break; - case 'N': // next line - unescape_escaped_unicode(0x85u, buff); - break; - case '_': // non-breaking space - unescape_escaped_unicode(0xA0u, buff); - break; - case 'L': // line separator - unescape_escaped_unicode(0x2028u, buff); - break; - case 'P': // paragraph separator - unescape_escaped_unicode(0x2029u, buff); - break; - case 'x': { - char32_t codepoint {0}; - ret = extract_codepoint(begin, end, 1, codepoint); - if FK_YAML_LIKELY (ret) { - unescape_escaped_unicode(codepoint, buff); - } - break; - } - case 'u': { - char32_t codepoint {0}; - ret = extract_codepoint(begin, end, 2, codepoint); - if FK_YAML_LIKELY (ret) { - unescape_escaped_unicode(codepoint, buff); - } - break; - } - case 'U': { - char32_t codepoint {0}; - ret = extract_codepoint(begin, end, 4, codepoint); - if FK_YAML_LIKELY (ret) { - unescape_escaped_unicode(codepoint, buff); - } - break; - } - default: - // Unsupported escape sequence is found in a string token. - ret = false; - break; - } +/// The bit mask for node value type bits. +constexpr node_attr_t value = 0x0000FFFFu; +/// The bit mask for node style type bits. (bits are not yet defined.) +constexpr node_attr_t style = 0x00FF0000u; +/// The bit mask for node property related bits. +constexpr node_attr_t props = 0xFF000000u; +/// The bit mask for anchor/alias node type bits. +constexpr node_attr_t anchoring = 0x03000000u; +/// The bit mask for anchor offset value bits. +constexpr node_attr_t anchor_offset = 0xFC000000u; +/// The bit mask for all the bits for node attributes. +constexpr node_attr_t all = std::numeric_limits::max(); - return ret; +} // namespace node_attr_mask + +/// @brief The namespace to define bits for node attributes. +namespace node_attr_bits { + +/// The sequence node bit. +constexpr node_attr_t seq_bit = 1u << 0; +/// The mapping node bit. +constexpr node_attr_t map_bit = 1u << 1; +/// The null scalar node bit. +constexpr node_attr_t null_bit = 1u << 2; +/// The boolean scalar node bit. +constexpr node_attr_t bool_bit = 1u << 3; +/// The integer scalar node bit. +constexpr node_attr_t int_bit = 1u << 4; +/// The floating point scalar node bit. +constexpr node_attr_t float_bit = 1u << 5; +/// The string scalar node bit. +constexpr node_attr_t string_bit = 1u << 6; + +/// A utility bit set to filter scalar node bits. +constexpr node_attr_t scalar_bits = null_bit | bool_bit | int_bit | float_bit | string_bit; + +/// The unsigned integer flag bit. +/// Set on INTEGER nodes whose stored int64_t value represents a uint64_t that exceeds INT64_MAX. +/// This allows values such as xxHash/UUID results to round-trip correctly through get_value(). +constexpr node_attr_t uint_bit = 1u << 16; // lives in the style bits area (0x00FF0000) + +/// The anchor node bit. +constexpr node_attr_t anchor_bit = 0x01000000u; +/// The alias node bit. +constexpr node_attr_t alias_bit = 0x02000000u; + +/// A utility bit set for initialization. +constexpr node_attr_t default_bits = null_bit; + +/// @brief Converts a node_type value to a node_attr_t value. +/// @param t A type of node value. +/// @return The associated node value bit. +inline node_attr_t from_node_type(node_type t) noexcept { + switch (t) { + case node_type::SEQUENCE: + return seq_bit; + case node_type::MAPPING: + return map_bit; + case node_type::NULL_OBJECT: + return null_bit; + case node_type::BOOLEAN: + return bool_bit; + case node_type::INTEGER: + return int_bit; + case node_type::FLOAT: + return float_bit; + case node_type::STRING: + return string_bit; + default: // LCOV_EXCL_LINE + return node_attr_mask::all; // LCOV_EXCL_LINE } +} - static ::std::string escape(const char* begin, const char* end, bool& is_escaped) { - ::std::string escaped {}; - escaped.reserve(std::distance(begin, end)); - for (; begin != end; ++begin) { - switch (*begin) { - case 0x01: - escaped += "\\u0001"; - is_escaped = true; - break; - case 0x02: - escaped += "\\u0002"; - is_escaped = true; - break; - case 0x03: - escaped += "\\u0003"; - is_escaped = true; - break; - case 0x04: - escaped += "\\u0004"; - is_escaped = true; - break; - case 0x05: - escaped += "\\u0005"; - is_escaped = true; - break; - case 0x06: - escaped += "\\u0006"; - is_escaped = true; - break; - case '\a': - escaped += "\\a"; - is_escaped = true; - break; - case '\b': - escaped += "\\b"; - is_escaped = true; - break; - case '\t': - escaped += "\\t"; - is_escaped = true; - break; - case '\n': - escaped += "\\n"; - is_escaped = true; - break; - case '\v': - escaped += "\\v"; - is_escaped = true; - break; - case '\f': - escaped += "\\f"; - is_escaped = true; - break; - case '\r': - escaped += "\\r"; - is_escaped = true; - break; - case 0x0E: - escaped += "\\u000E"; - is_escaped = true; - break; - case 0x0F: - escaped += "\\u000F"; - is_escaped = true; - break; - case 0x10: - escaped += "\\u0010"; - is_escaped = true; - break; - case 0x11: - escaped += "\\u0011"; - is_escaped = true; - break; - case 0x12: - escaped += "\\u0012"; - is_escaped = true; - break; - case 0x13: - escaped += "\\u0013"; - is_escaped = true; - break; - case 0x14: - escaped += "\\u0014"; - is_escaped = true; - break; - case 0x15: - escaped += "\\u0015"; - is_escaped = true; - break; - case 0x16: - escaped += "\\u0016"; - is_escaped = true; - break; - case 0x17: - escaped += "\\u0017"; - is_escaped = true; - break; - case 0x18: - escaped += "\\u0018"; - is_escaped = true; - break; - case 0x19: - escaped += "\\u0019"; - is_escaped = true; - break; - case 0x1A: - escaped += "\\u001A"; - is_escaped = true; - break; - case 0x1B: - escaped += "\\e"; - is_escaped = true; - break; - case 0x1C: - escaped += "\\u001C"; - is_escaped = true; - break; - case 0x1D: - escaped += "\\u001D"; - is_escaped = true; - break; - case 0x1E: - escaped += "\\u001E"; - is_escaped = true; - break; - case 0x1F: - escaped += "\\u001F"; - is_escaped = true; - break; - case '\"': - escaped += "\\\""; - is_escaped = true; - break; - case '\\': - escaped += "\\\\"; - is_escaped = true; - break; - default: - const std::ptrdiff_t diff = static_cast(std::distance(begin, end)); - if (diff > 1) { - if (*begin == static_cast(0xC2u) && *(begin + 1) == static_cast(0x85u)) { - escaped += "\\N"; - std::advance(begin, 1); - is_escaped = true; - break; - } - if (*begin == static_cast(0xC2u) && *(begin + 1) == static_cast(0xA0u)) { - escaped += "\\_"; - std::advance(begin, 1); - is_escaped = true; - break; - } +/// @brief Converts a node_attr_t value to a node_type value. +/// @param bits node attribute bits +/// @return An associated node value type with the given node value bit. +inline node_type to_node_type(node_attr_t bits) noexcept { + switch (bits & node_attr_mask::value) { + case seq_bit: + return node_type::SEQUENCE; + case map_bit: + return node_type::MAPPING; + case null_bit: + return node_type::NULL_OBJECT; + case bool_bit: + return node_type::BOOLEAN; + case int_bit: + return node_type::INTEGER; + case float_bit: + return node_type::FLOAT; + case string_bit: + return node_type::STRING; + default: // LCOV_EXCL_LINE + detail::unreachable(); // LCOV_EXCL_LINE + } +} - if (diff > 2) { - if (*begin == static_cast(0xE2u) && *(begin + 1) == static_cast(0x80u) && - *(begin + 2) == static_cast(0xA8u)) { - escaped += "\\L"; - std::advance(begin, 2); - is_escaped = true; - break; - } - if (*begin == static_cast(0xE2u) && *(begin + 1) == static_cast(0x80u) && - *(begin + 2) == static_cast(0xA9u)) { - escaped += "\\P"; - std::advance(begin, 2); - is_escaped = true; - break; - } - } - } - escaped += *begin; - break; - } - } - return escaped; - } // LCOV_EXCL_LINE +/// @brief Get an anchor offset used to reference an anchor node from the given attribute bits. +/// @param attrs node attribute bits +/// @return An anchor offset value. +inline uint32_t get_anchor_offset(node_attr_t attrs) noexcept { + return (attrs & node_attr_mask::anchor_offset) >> 26; +} -private: - static bool convert_hexchar_to_byte(char source, uint8_t& byte) { - if ('0' <= source && source <= '9') { - // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) - byte = static_cast(source - '0'); - return true; - } +/// @brief Set an anchor offset value to the appropriate bits. +/// @param offset An anchor offset value. +/// @param attrs node attribute bit set into which the offset value is written. +inline void set_anchor_offset(uint32_t offset, node_attr_t& attrs) noexcept { + attrs &= ~node_attr_mask::anchor_offset; + attrs |= (offset & 0x3Fu) << 26; +} - if ('A' <= source && source <= 'F') { - // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) - byte = static_cast(source - 'A' + 10); - return true; - } +} // namespace node_attr_bits - if ('a' <= source && source <= 'f') { - // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) - byte = static_cast(source - 'a' + 10); - return true; - } +FK_YAML_DETAIL_NAMESPACE_END - // The given character is not hexadecimal. - return false; - } +#endif /* FK_YAML_DETAIL_NODE_ATTRS_HPP */ - static bool extract_codepoint(const char*& begin, const char* end, int bytes_to_read, char32_t& codepoint) { - const bool has_enough_room = static_cast(std::distance(begin, end)) >= (bytes_to_read - 1); - if (!has_enough_room) { - return false; - } +// #include - const int read_size = bytes_to_read * 2; - uint8_t byte {0}; - codepoint = 0; - for (int i = read_size - 1; i >= 0; i--) { - const bool is_valid = convert_hexchar_to_byte(*++begin, byte); - if (!is_valid) { - return false; - } - // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) - codepoint |= static_cast(byte << (4 * i)); - } +FK_YAML_DETAIL_NAMESPACE_BEGIN - return true; +/////////////////////////////////// +// external_node_constructor // +/////////////////////////////////// + +/// @brief The external constructor template for basic_node objects. +/// @note All the non-specialized instantiations results in compilation error since such instantiations are not +/// supported. +/// @warning All the specialization must call n.m_value.destroy() first in the construct function to avoid +/// memory leak. +/// @tparam node_type The resulting YAML node value type. +template +struct external_node_constructor { + template + static void sequence(BasicNodeType& n, Args&&... args) { + destroy(n); + n.m_attrs |= node_attr_bits::seq_bit; + n.m_value.p_seq = create_object(std::forward(args)...); } - static void unescape_escaped_unicode(char32_t codepoint, std::string& buff) { - // the inner curly braces are necessary to build with older compilers. - std::array encode_buff {{}}; - uint32_t encoded_size {0}; - utf8::from_utf32(codepoint, encode_buff, encoded_size); - buff.append(reinterpret_cast(encode_buff.data()), encoded_size); + template + static void mapping(BasicNodeType& n, Args&&... args) { + destroy(n); + n.m_attrs |= node_attr_bits::map_bit; + n.m_value.p_map = create_object(std::forward(args)...); } -}; -FK_YAML_DETAIL_NAMESPACE_END + static void null_scalar(BasicNodeType& n, std::nullptr_t) { + destroy(n); + n.m_attrs |= node_attr_bits::null_bit; + n.m_value.p_map = nullptr; + } -#endif /* FK_YAML_DETAIL_ENCODINGS_YAML_ESCAPER_HPP */ + static void boolean_scalar(BasicNodeType& n, const typename BasicNodeType::boolean_type b) { + destroy(n); + n.m_attrs |= node_attr_bits::bool_bit; + n.m_value.boolean = b; + } -// #include + static void integer_scalar(BasicNodeType& n, const typename BasicNodeType::integer_type i) { + destroy(n); + n.m_attrs |= node_attr_bits::int_bit; + n.m_value.integer = i; + } -// #include -// _______ __ __ __ _____ __ __ __ -// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 -// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML -// -// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani -// SPDX-License-Identifier: MIT + /// @brief Constructs an INTEGER node from a uint64_t value that exceeds the signed range. + /// The raw bit pattern is stored in the integer field and the uint_bit flag is set so that + /// get_value() / as_uint() can recover the original unsigned value. + static void unsigned_integer_scalar(BasicNodeType& n, const typename BasicNodeType::integer_type i) { + destroy(n); + n.m_attrs |= node_attr_bits::int_bit; + n.m_attrs |= node_attr_bits::uint_bit; + n.m_value.integer = i; + } -#ifndef FK_YAML_DETAIL_INPUT_SCALAR_SCANNER_HPP -#define FK_YAML_DETAIL_INPUT_SCALAR_SCANNER_HPP + static void float_scalar(BasicNodeType& n, const typename BasicNodeType::float_number_type f) { + destroy(n); + n.m_attrs |= node_attr_bits::float_bit; + n.m_value.float_val = f; + } -#include -#include + template + static void string_scalar(BasicNodeType& n, Args&&... args) { + destroy(n); + n.m_attrs |= node_attr_bits::string_bit; + n.m_value.p_str = create_object(std::forward(args)...); + } -// #include +private: + static void destroy(BasicNodeType& n) { + n.m_value.destroy(n.m_attrs & node_attr_mask::value); + // Clear both the value-type bits and the uint_bit style flag so that any + // subsequent reassignment starts from a clean state. + n.m_attrs &= ~(node_attr_mask::value | node_attr_bits::uint_bit); + } +}; -// #include +///////////////// +// to_node // +///////////////// -// #include +/// @brief to_node function for BasicNodeType::sequence_type objects. +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam T A sequence node value type. +/// @param n A basic_node object. +/// @param s A sequence node value object. +template < + typename BasicNodeType, typename T, + enable_if_t< + conjunction< + is_basic_node, + std::is_same>>::value, + int> = 0> +inline void to_node(BasicNodeType& n, T&& s) noexcept { + external_node_constructor::sequence(n, std::forward(s)); +} +/// @brief to_node function for compatible sequence types. +/// @note This overload is enabled when +/// * both begin()/end() functions are callable on a `CompatSeqType` object +/// * CompatSeqType doesn't have `mapped_type` (mapping-like type) +/// * BasicNodeType::string_type cannot be constructed from a CompatSeqType object (string-like type) +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam CompatSeqType A container type. +/// @param n A basic_node object. +/// @param s A container object. +template < + typename BasicNodeType, typename CompatSeqType, + enable_if_t< + conjunction< + is_basic_node, + negation>>, + negation>, detect::has_begin_end, + negation, detect::has_mapped_type>>, + negation>>::value, + int> = 0> +// NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) +inline void to_node(BasicNodeType& n, CompatSeqType&& s) { + using std::begin; + using std::end; + external_node_constructor::sequence(n, begin(s), end(s)); +} -FK_YAML_DETAIL_NAMESPACE_BEGIN +/// @brief to_node function for std::pair objects. +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam T The first type of std::pair. +/// @tparam U The second type of std::pair. +/// @param n A basic_node object. +/// @param p A std::pair object. +template +inline void to_node(BasicNodeType& n, const std::pair& p) { + n = {p.first, p.second}; +} -/// @brief The class which detects a scalar value type by scanning contents. -class scalar_scanner { -public: - /// @brief Detects a scalar value type by scanning the contents ranged by the given iterators. - /// @param begin The iterator to the first element of the scalar. - /// @param end The iterator to the past-the-end element of the scalar. - /// @return A detected scalar value type. - static node_type scan(const char* begin, const char* end) noexcept { - if (begin == end) { - return node_type::STRING; - } +/// @brief concrete implementation of to_node function for std::tuple objects. +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam ...Types The value types of std::tuple. +/// @tparam ...Idx Index sequence values for std::tuple value types. +/// @param n A basic_node object. +/// @param t A std::tuple object. +/// @param _ An index sequence. (unused) +template +inline void to_node_tuple_impl(BasicNodeType& n, const std::tuple& t, index_sequence /*unused*/) { + n = {std::get(t)...}; +} - const auto len = static_cast(std::distance(begin, end)); - if (len > 5) { - return scan_possible_number_token(begin, len); - } +/// @brief to_node function for std::tuple objects with no value types. +/// @note This implementation is needed since calling `to_node_tuple_impl()` with an empty tuple creates a null node. +/// @tparam BasicNodeType A basic_node template instance type. +/// @param n A basic_node object. +/// @param _ A std::tuple object. (unused) +template +inline void to_node(BasicNodeType& n, const std::tuple<>& /*unused*/) { + n = BasicNodeType::sequence(); +} - const char* p_begin = &*begin; +/// @brief to_node function for std::tuple objects with at least one value type. +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam ...FirstType The first value types of std::tuple. +/// @tparam ...RestTypes The rest value types of std::tuple. (maybe empty) +/// @param n A basic_node object. +/// @param t A std::tuple object. +template +inline void to_node(BasicNodeType& n, const std::tuple& t) { + to_node_tuple_impl(n, t, index_sequence_for {}); +} - switch (len) { - case 1: - if (*p_begin == '~') { - return node_type::NULL_OBJECT; - } - break; - case 4: - switch (*p_begin) { - case 'n': - // no possible case of begin a number otherwise. - return (std::strncmp(p_begin + 1, "ull", 3) == 0) ? node_type::NULL_OBJECT : node_type::STRING; - case 'N': - // no possible case of begin a number otherwise. - return ((std::strncmp(p_begin + 1, "ull", 3) == 0) || (std::strncmp(p_begin + 1, "ULL", 3) == 0)) - ? node_type::NULL_OBJECT - : node_type::STRING; - case 't': - // no possible case of being a number otherwise. - return (std::strncmp(p_begin + 1, "rue", 3) == 0) ? node_type::BOOLEAN : node_type::STRING; - case 'T': - // no possible case of being a number otherwise. - return ((std::strncmp(p_begin + 1, "rue", 3) == 0) || (std::strncmp(p_begin + 1, "RUE", 3) == 0)) - ? node_type::BOOLEAN - : node_type::STRING; - case '.': { - const char* p_from_second = p_begin + 1; - const bool is_inf_or_nan_scalar = - (std::strncmp(p_from_second, "inf", 3) == 0) || (std::strncmp(p_from_second, "Inf", 3) == 0) || - (std::strncmp(p_from_second, "INF", 3) == 0) || (std::strncmp(p_from_second, "nan", 3) == 0) || - (std::strncmp(p_from_second, "NaN", 3) == 0) || (std::strncmp(p_from_second, "NAN", 3) == 0); - if (is_inf_or_nan_scalar) { - return node_type::FLOAT; - } - // maybe a number. - break; - } - default: - break; - } - break; - case 5: - switch (*p_begin) { - case 'f': - // no possible case of being a number otherwise. - return (std::strncmp(p_begin + 1, "alse", 4) == 0) ? node_type::BOOLEAN : node_type::STRING; - case 'F': - // no possible case of being a number otherwise. - return ((std::strncmp(p_begin + 1, "alse", 4) == 0) || (std::strncmp(p_begin + 1, "ALSE", 4) == 0)) - ? node_type::BOOLEAN - : node_type::STRING; - case '+': - case '-': - if (*(p_begin + 1) == '.') { - const char* p_from_third = p_begin + 2; - const bool is_min_inf = (std::strncmp(p_from_third, "inf", 3) == 0) || - (std::strncmp(p_from_third, "Inf", 3) == 0) || - (std::strncmp(p_from_third, "INF", 3) == 0); - if (is_min_inf) { - return node_type::FLOAT; - } - } - // maybe a number. - break; - default: - break; - } - break; - default: - break; - } +/// @brief to_node function for BasicNodeType::mapping_type objects. +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam T A mapping node value type. +/// @param n A basic_node object. +/// @param m A mapping node value object. +template < + typename BasicNodeType, typename T, + enable_if_t< + conjunction< + is_basic_node, std::is_same>>::value, + int> = 0> +inline void to_node(BasicNodeType& n, T&& m) noexcept { + external_node_constructor::mapping(n, std::forward(m)); +} - return scan_possible_number_token(begin, len); +/// @brief to_node function for compatible mapping types. +/// @note This overload is enabled when +/// * both begin()/end() functions are callable on a `CompatMapType` object +/// * CompatMapType has both `key_type` and `mapped_type` +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam CompatMapType A container type. +/// @param n A basic_node object. +/// @param m A container object. +template < + typename BasicNodeType, typename CompatMapType, + enable_if_t< + conjunction< + is_basic_node, negation>, + negation>>, + detect::has_begin_end, detect::has_key_type, + detect::has_mapped_type>::value, + int> = 0> +inline void to_node(BasicNodeType& n, CompatMapType&& m) { + external_node_constructor::mapping(n); + auto& map = n.as_map(); + for (const auto& pair : std::forward(m)) { + map.emplace(pair.first, pair.second); } +} -private: - /// @brief Detects a scalar value type from the contents (possibly an integer or a floating-point value). - /// @param itr The iterator to the first element of the scalar. - /// @param len The length of the scalar contents. - /// @return A detected scalar value type. - static node_type scan_possible_number_token(const char* itr, uint32_t len) noexcept { - FK_YAML_ASSERT(len > 0); +/// @brief to_node function for null objects. +/// @tparam BasicNodeType A mapping node value type. +/// @tparam NullType This must be std::nullptr_t type +template ::value, int> = 0> +inline void to_node(BasicNodeType& n, std::nullptr_t /*unused*/) { + external_node_constructor::null_scalar(n, nullptr); +} - switch (*itr) { - case '-': - return (len > 1) ? scan_negative_number(++itr, --len) : node_type::STRING; - case '+': - return (len > 1) ? scan_decimal_number(++itr, --len) : node_type::STRING; - case '.': - // some integer(s) required after the decimal point as a floating point value. - return (len > 1) ? scan_after_decimal_point(++itr, --len) : node_type::STRING; - case '0': - return (len > 1) ? scan_after_zero_at_first(++itr, --len) : node_type::INTEGER; - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - return (len > 1) ? scan_decimal_number(++itr, --len) : node_type::INTEGER; - default: - return node_type::STRING; - } - } - - /// @brief Detects a scalar value type by scanning the contents right after the negative sign. - /// @param itr The iterator to the past-the-negative-sign element of the scalar. - /// @param len The length of the scalar contents left unscanned. - /// @return A detected scalar value type. - static node_type scan_negative_number(const char* itr, uint32_t len) noexcept { - FK_YAML_ASSERT(len > 0); - - if (is_digit(*itr)) { - return (len > 1) ? scan_decimal_number(++itr, --len) : node_type::INTEGER; - } - - if (*itr == '.') { - // some integer(s) required after "-." as a floating point value. - return (len > 1) ? scan_after_decimal_point(++itr, --len) : node_type::STRING; - } +/// @brief to_node function for BasicNodeType::boolean_type objects. +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam T A boolean scalar node value type. +/// @param n A basic_node object. +/// @param b A boolean scalar node value object. +template ::value, int> = 0> +inline void to_node(BasicNodeType& n, typename BasicNodeType::boolean_type b) noexcept { + external_node_constructor::boolean_scalar(n, b); +} - return node_type::STRING; - } +/// @brief to_node function for integers. +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam T An integer type. +/// @param n A basic_node object. +/// @param i An integer object. +template < + typename BasicNodeType, typename T, + enable_if_t, is_non_bool_integral>::value, int> = 0> +inline void to_node(BasicNodeType& n, T i) noexcept { + external_node_constructor::integer_scalar(n, i); +} - /// @brief Detects a scalar value type by scanning the contents right after the beginning 0. - /// @param itr The iterator to the past-the-zero element of the scalar. - /// @param len The length of the scalar left unscanned. - /// @return A detected scalar value type. - static node_type scan_after_zero_at_first(const char* itr, uint32_t len) noexcept { - FK_YAML_ASSERT(len > 0); +/// @brief to_node function for floating point numbers. +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam T A floating point number type. +/// @param n A basic_node object. +/// @param f A floating point number object. +template < + typename BasicNodeType, typename T, + enable_if_t, std::is_floating_point>::value, int> = 0> +inline void to_node(BasicNodeType& n, T f) noexcept { + external_node_constructor::float_scalar(n, f); +} - if (is_digit(*itr)) { - // a token consisting of the beginning '0' and some following numbers, e.g., `0123`, is not an integer - // according to https://yaml.org/spec/1.2.2/#10213-integer. - return node_type::STRING; - } +/// @brief to_node function for compatible strings. +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam T A compatible string type. +/// @param n A basic_node object. +/// @param s A compatible string object. +template < + typename BasicNodeType, typename T, + enable_if_t< + conjunction< + is_basic_node, negation>, + std::is_constructible>::value, + int> = 0> +inline void to_node(BasicNodeType& n, T&& s) { + external_node_constructor::string_scalar(n, std::forward(s)); +} - switch (*itr) { - case '.': - // 0 can be omitted after `0.`. - return (len > 1) ? scan_after_decimal_point(++itr, --len) : node_type::FLOAT; - case 'e': - case 'E': - // some integer(s) required after the exponent sign as a floating point value. - return (len > 1) ? scan_after_exponent(++itr, --len) : node_type::STRING; - case 'o': - return (len > 1) ? scan_octal_number(++itr, --len) : node_type::STRING; - case 'x': - return (len > 1) ? scan_hexadecimal_number(++itr, --len) : node_type::STRING; - default: - return node_type::STRING; - } +/// @brief A function object to call to_node functions. +/// @note User-defined specialization is available by providing implementation **OUTSIDE** fkyaml namespace. +struct to_node_fn { + /// @brief Call to_node function suitable for the given T type. + /// @tparam BasicNodeType A basic_node template instance type. + /// @tparam T A target value type assigned to the basic_node object. + /// @param n A basic_node object. + /// @param val A target object assigned to the basic_node object. + /// @return decltype(to_node(n, std::forward(val))) void by default. User can set it to some other type. + template + auto operator()(BasicNodeType& n, T&& val) const + noexcept(noexcept(to_node(n, std::forward(val)))) -> decltype(to_node(n, std::forward(val))) { + return to_node(n, std::forward(val)); } +}; - /// @brief Detects a scalar value type by scanning the contents part starting with a decimal. - /// @param itr The iterator to the beginning decimal element of the scalar. - /// @param len The length of the scalar left unscanned. - /// @return A detected scalar value type. - static node_type scan_decimal_number(const char* itr, uint32_t len) noexcept { - FK_YAML_ASSERT(len > 0); +FK_YAML_DETAIL_NAMESPACE_END - if (is_digit(*itr)) { - return (len > 1) ? scan_decimal_number(++itr, --len) : node_type::INTEGER; - } +FK_YAML_NAMESPACE_BEGIN - switch (*itr) { - case '.': { - // 0 can be omitted after the decimal point - return (len > 1) ? scan_after_decimal_point(++itr, --len) : node_type::FLOAT; - } - case 'e': - case 'E': - // some integer(s) required after the exponent - return (len > 1) ? scan_after_exponent(++itr, --len) : node_type::STRING; - default: - return node_type::STRING; - } - } +#ifndef FK_YAML_HAS_CXX_17 +// anonymous namespace to hold `to_node` functor. +// see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4381.html for why it's needed. +namespace // NOLINT(cert-dcl59-cpp,fuchsia-header-anon-namespaces,google-build-namespaces) +{ +#endif - /// @brief Detects a scalar value type by scanning the contents right after a decimal point. - /// @param itr The iterator to the past-the-decimal-point element of the scalar. - /// @param len The length of the scalar left unscanned. - /// @return A detected scalar value type. - static node_type scan_after_decimal_point(const char* itr, uint32_t len) noexcept { - FK_YAML_ASSERT(len > 0); +/// @brief A global object to represent ADL friendly to_node functor. +// NOLINTNEXTLINE(misc-definitions-in-headers) +FK_YAML_INLINE_VAR constexpr const auto& to_node = detail::static_const::value; - for (uint32_t i = 0; i < len; i++) { - const char c = *itr++; +#ifndef FK_YAML_HAS_CXX_17 +} // namespace +#endif - if (is_digit(c)) { - continue; - } +FK_YAML_NAMESPACE_END - if (c == 'e' || c == 'E') { - if (i == len - 1) { - // some integer(s) required after the exponent - return node_type::STRING; - } - return scan_after_exponent(itr, len - i - 1); - } +#endif /* FK_YAML_DETAIL_CONVERSIONS_TO_NODE_HPP */ - return node_type::STRING; - } +// #include +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library +// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani +// SPDX-License-Identifier: MIT - return node_type::FLOAT; - } +#ifndef FK_YAML_DETAIL_ENCODINGS_YAML_ESCAPER_HPP +#define FK_YAML_DETAIL_ENCODINGS_YAML_ESCAPER_HPP - /// @brief Detects a scalar value type by scanning the contents right after the exponent prefix ("e" or "E"). - /// @param itr The iterator to the past-the-exponent-prefix element of the scalar. - /// @param len The length of the scalar left unscanned. - /// @return A detected scalar value type. - static node_type scan_after_exponent(const char* itr, uint32_t len) noexcept { - FK_YAML_ASSERT(len > 0); +#include - const char c = *itr; - if (c == '+' || c == '-') { - if (len == 1) { - // some integer(s) required after the sign. - return node_type::STRING; - } - ++itr; - --len; - } +// #include - for (uint32_t i = 0; i < len; i++) { - if (!is_digit(*itr++)) { - return node_type::STRING; - } - } +// #include - return node_type::FLOAT; - } +// #include - /// @brief Detects a scalar value type by scanning the contents assuming octal numbers. - /// @param itr The iterator to the octal-number element of the scalar. - /// @param len The length of the scalar left unscanned. - /// @return A detected scalar value type. - static node_type scan_octal_number(const char* itr, uint32_t len) noexcept { - FK_YAML_ASSERT(len > 0); +// #include - switch (*itr) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - return (len > 1) ? scan_octal_number(++itr, --len) : node_type::INTEGER; - default: - return node_type::STRING; - } - } - /// @brief Detects a scalar value type by scanning the contents assuming hexadecimal numbers. - /// @param itr The iterator to the hexadecimal-number element of the scalar. - /// @param len The length of the scalar left unscanned. - /// @return A detected scalar value type. - static node_type scan_hexadecimal_number(const char* itr, uint32_t len) noexcept { - FK_YAML_ASSERT(len > 0); +FK_YAML_DETAIL_NAMESPACE_BEGIN - if (is_xdigit(*itr)) { - return (len > 1) ? scan_hexadecimal_number(++itr, --len) : node_type::INTEGER; - } - return node_type::STRING; - } +class yaml_escaper { + using iterator = ::std::string::const_iterator; - /// @brief Check if the given character is a digit. - /// @note This function is needed to avoid assertion failures in `std::isdigit()` especially when compiled with - /// MSVC. - /// @param c A character to be checked. - /// @return true if the given character is a digit, false otherwise. - static bool is_digit(char c) { - return ('0' <= c && c <= '9'); - } +public: + static bool unescape(const char*& begin, const char* end, std::string& buff) { + FK_YAML_ASSERT(*begin == '\\' && std::distance(begin, end) > 0); + bool ret = true; - /// @brief Check if the given character is a hex-digit. - /// @note This function is needed to avoid assertion failures in `std::isxdigit()` especially when compiled with - /// MSVC. - /// @param c A character to be checked. - /// @return true if the given character is a hex-digit, false otherwise. - static bool is_xdigit(char c) { - return (('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f')); - } -}; - -FK_YAML_DETAIL_NAMESPACE_END - -#endif /* FK_YAML_DETAIL_INPUT_SCALAR_SCANNER_HPP */ - -// #include -// _______ __ __ __ _____ __ __ __ -// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 -// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML -// -// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani -// SPDX-License-Identifier: MIT - -#ifndef FK_YAML_DETAIL_INPUT_TAG_T_HPP -#define FK_YAML_DETAIL_INPUT_TAG_T_HPP - -#include - -// #include - - -FK_YAML_DETAIL_NAMESPACE_BEGIN - -/// @brief Definition of YAML tag types. -enum class tag_t : std::uint8_t { - NONE, //!< Represents a non-specific tag "?". - NON_SPECIFIC, //!< Represents a non-specific tag "!". - CUSTOM_TAG, //!< Represents a custom tag - SEQUENCE, //!< Represents a sequence tag. - MAPPING, //!< Represents a mapping tag. - NULL_VALUE, //!< Represents a null value tag. - BOOLEAN, //!< Represents a boolean tag. - INTEGER, //!< Represents an integer type - FLOATING_NUMBER, //!< Represents a floating point number tag. - STRING, //!< Represents a string tag. -}; - -FK_YAML_DETAIL_NAMESPACE_END - -#endif /* FK_YAML_DETAIL_INPUT_TAG_T_HPP */ - -// #include - -// #include - -// #include - -// #include - -// #include - - -FK_YAML_DETAIL_NAMESPACE_BEGIN - -/// @brief A parser for YAML scalars. -/// @tparam BasicNodeType A type of the container for parsed YAML scalars. -template -class scalar_parser { - static_assert(is_basic_node::value, "scalar_parser only accepts basic_node<...>"); - -public: - using basic_node_type = BasicNodeType; - -private: - /** A type for boolean node values. */ - using boolean_type = typename basic_node_type::boolean_type; - /** A type for integer node values. */ - using integer_type = typename basic_node_type::integer_type; - /** A type for floating point node values. */ - using float_number_type = typename basic_node_type::float_number_type; - /** A type for string node values. */ - using string_type = typename basic_node_type::string_type; - -public: - /// @brief Constructs a new scalar_parser object. - /// @param line Current line. - /// @param indent Current indentation. - scalar_parser(uint32_t line, uint32_t indent) noexcept - : m_line(line), - m_indent(indent) { - } - - /// @brief Destroys a scalar_parser object. - ~scalar_parser() noexcept = default; - - // std::string's copy constructor/assignment operator may throw a exception. - scalar_parser(const scalar_parser&) = default; - scalar_parser& operator=(const scalar_parser&) = default; - - scalar_parser(scalar_parser&&) noexcept = default; - scalar_parser& operator=(scalar_parser&&) noexcept(std::is_nothrow_move_assignable::value) = default; - - /// @brief Parses a token into a flow scalar (either plain, single quoted or double quoted) - /// @param lex_type Lexical token type for the scalar. - /// @param tag_type Tag type for the scalar. - /// @param token Scalar contents. - /// @return Parsed YAML flow scalar object. - basic_node_type parse_flow(lexical_token_t lex_type, tag_t tag_type, str_view token) { - FK_YAML_ASSERT( - lex_type == lexical_token_t::PLAIN_SCALAR || lex_type == lexical_token_t::SINGLE_QUOTED_SCALAR || - lex_type == lexical_token_t::DOUBLE_QUOTED_SCALAR); - FK_YAML_ASSERT(tag_type != tag_t::SEQUENCE && tag_type != tag_t::MAPPING); - - token = parse_flow_scalar_token(lex_type, token); - const node_type value_type = decide_value_type(lex_type, tag_type, token); - return create_scalar_node(value_type, tag_type, token); - } - - /// @brief Parses a token into a block scalar (either literal or folded) - /// @param lex_type Lexical token type for the scalar. - /// @param tag_type Tag type for the scalar. - /// @param token Scalar contents. - /// @param header Block scalar header information. - /// @return Parsed YAML block scalar object. - basic_node_type parse_block( - lexical_token_t lex_type, tag_t tag_type, str_view token, const block_scalar_header& header) { - FK_YAML_ASSERT( - lex_type == lexical_token_t::BLOCK_LITERAL_SCALAR || lex_type == lexical_token_t::BLOCK_FOLDED_SCALAR); - FK_YAML_ASSERT(tag_type != tag_t::SEQUENCE && tag_type != tag_t::MAPPING); - - if (lex_type == lexical_token_t::BLOCK_LITERAL_SCALAR) { - token = parse_block_literal_scalar(token, header); - } - else { - token = parse_block_folded_scalar(token, header); - } - - const node_type value_type = decide_value_type(lex_type, tag_type, token); - return create_scalar_node(value_type, tag_type, token); - } - -private: - /// @brief Parses a token into a flow scalar contents. - /// @param lex_type Lexical token type for the scalar. - /// @param token Scalar contents. - /// @return View into the parsed scalar contents. - str_view parse_flow_scalar_token(lexical_token_t lex_type, str_view token) { - switch (lex_type) { - case lexical_token_t::PLAIN_SCALAR: - token = parse_plain_scalar(token); + switch (*++begin) { + case 'a': + buff.push_back('\a'); break; - case lexical_token_t::SINGLE_QUOTED_SCALAR: - token = parse_single_quoted_scalar(token); + case 'b': + buff.push_back('\b'); break; - case lexical_token_t::DOUBLE_QUOTED_SCALAR: - token = parse_double_quoted_scalar(token); + case 't': + case '\t': + buff.push_back('\t'); break; - default: // LCOV_EXCL_LINE - unreachable(); // LCOV_EXCL_LINE - } - - return token; - } - - /// @brief Parses plain scalar contents. - /// @param token Scalar contents. - /// @return View into the parsed scalar contents. - str_view parse_plain_scalar(str_view token) noexcept { - // plain scalars cannot be empty. - FK_YAML_ASSERT(!token.empty()); - - std::size_t newline_pos = token.find('\n'); - if (newline_pos == str_view::npos) { - return token; - } - - m_use_owned_buffer = true; - - if (m_buffer.capacity() < token.size()) { - m_buffer.reserve(token.size()); - } - - do { - process_line_folding(token, newline_pos); - newline_pos = token.find('\n'); - } while (newline_pos != str_view::npos); - - m_buffer.append(token.begin(), token.size()); - - return {m_buffer}; - } - - /// @brief Parses single quoted scalar contents. - /// @param token Scalar contents. - /// @return View into the parsed scalar contents. - str_view parse_single_quoted_scalar(str_view token) noexcept { - if (token.empty()) { - return token; - } - - constexpr str_view filter {"\'\n"}; - std::size_t pos = token.find_first_of(filter); - if (pos == str_view::npos) { - return token; - } - - m_use_owned_buffer = true; - - if (m_buffer.capacity() < token.size()) { - m_buffer.reserve(token.size()); - } - - do { - FK_YAML_ASSERT(pos < token.size()); - FK_YAML_ASSERT(token[pos] == '\'' || token[pos] == '\n'); - - if (token[pos] == '\'') { - // unescape escaped single quote. ('' -> ') - FK_YAML_ASSERT(pos + 1 < token.size()); - m_buffer.append(token.begin(), token.begin() + (pos + 1)); - token.remove_prefix(pos + 2); // move next to the escaped single quote. - } - else { - process_line_folding(token, pos); - } - - pos = token.find_first_of(filter); - } while (pos != str_view::npos); - - if (!token.empty()) { - m_buffer.append(token.begin(), token.size()); - } - - return {m_buffer}; - } - - /// @brief Parses double quoted scalar contents. - /// @param token Scalar contents. - /// @return View into the parsed scalar contents. - str_view parse_double_quoted_scalar(str_view token) { - if (token.empty()) { - return token; - } - - constexpr str_view filter {"\\\n"}; - std::size_t pos = token.find_first_of(filter); - if (pos == str_view::npos) { - return token; - } - - m_use_owned_buffer = true; - - if (m_buffer.capacity() < token.size()) { - m_buffer.reserve(token.size()); - } - - do { - FK_YAML_ASSERT(pos < token.size()); - FK_YAML_ASSERT(token[pos] == '\\' || token[pos] == '\n'); - - if (token[pos] == '\\') { - FK_YAML_ASSERT(pos + 1 < token.size()); - m_buffer.append(token.begin(), token.begin() + pos); - - if (token[pos + 1] != '\n') { - token.remove_prefix(pos); - const char* p_escape_begin = token.begin(); - const bool is_valid_escaping = yaml_escaper::unescape(p_escape_begin, token.end(), m_buffer); - if FK_YAML_UNLIKELY (!is_valid_escaping) { - throw parse_error( - "Unsupported escape sequence is found in a double quoted scalar.", m_line, m_indent); - } - - // `p_escape_begin` points to the last element of the escape sequence. - token.remove_prefix((p_escape_begin - token.begin()) + 1); - } - else { - std::size_t non_space_pos = token.find_first_not_of(" \t", pos + 2); - if (non_space_pos == str_view::npos) { - non_space_pos = token.size(); - } - token.remove_prefix(non_space_pos); - } - } - else { - process_line_folding(token, pos); - } - - pos = token.find_first_of(filter); - } while (pos != str_view::npos); - - if (!token.empty()) { - m_buffer.append(token.begin(), token.size()); - } - - return {m_buffer}; - } - - /// @brief Parses block literal scalar contents. - /// @param token Scalar contents. - /// @param header Block scalar header information. - /// @return View into the parsed scalar contents. - str_view parse_block_literal_scalar(str_view token, const block_scalar_header& header) { - if FK_YAML_UNLIKELY (token.empty()) { - return token; - } - - m_use_owned_buffer = true; - m_buffer.reserve(token.size()); - - std::size_t cur_line_begin_pos = 0; - do { - bool has_newline_at_end = true; - std::size_t cur_line_end_pos = token.find('\n', cur_line_begin_pos); - if (cur_line_end_pos == str_view::npos) { - has_newline_at_end = false; - cur_line_end_pos = token.size(); - } - - const std::size_t line_size = cur_line_end_pos - cur_line_begin_pos; - const str_view line = token.substr(cur_line_begin_pos, line_size); - - if (line.size() > header.indent) { - m_buffer.append(line.begin() + header.indent, line.end()); - } - - if (!has_newline_at_end) { - break; - } - - m_buffer.push_back('\n'); - cur_line_begin_pos = cur_line_end_pos + 1; - } while (cur_line_begin_pos < token.size()); - - process_chomping(header.chomp); - - return {m_buffer}; - } - - /// @brief Parses block folded scalar contents. - /// @param token Scalar contents. - /// @param header Block scalar header information. - /// @return View into the parsed scalar contents. - str_view parse_block_folded_scalar(str_view token, const block_scalar_header& header) { - if FK_YAML_UNLIKELY (token.empty()) { - return token; - } - - m_use_owned_buffer = true; - m_buffer.reserve(token.size()); - - constexpr str_view white_space_filter {" \t"}; - - std::size_t cur_line_begin_pos = 0; - bool has_newline_at_end = true; - bool can_be_folded = false; - do { - std::size_t cur_line_end_pos = token.find('\n', cur_line_begin_pos); - if (cur_line_end_pos == str_view::npos) { - has_newline_at_end = false; - cur_line_end_pos = token.size(); - } - - const std::size_t line_size = cur_line_end_pos - cur_line_begin_pos; - const str_view line = token.substr(cur_line_begin_pos, line_size); - const bool is_empty = line.find_first_not_of(white_space_filter) == str_view::npos; - - if (line.size() <= header.indent) { - // A less-indented line is turned into a newline. - m_buffer.push_back('\n'); - can_be_folded = false; - } - else if (is_empty) { - // more-indented empty lines are not folded. - m_buffer.push_back('\n'); - m_buffer.append(line.begin() + header.indent, line.end()); - m_buffer.push_back('\n'); - } - else { - const std::size_t non_space_pos = line.find_first_not_of(white_space_filter); - const bool is_more_indented = (non_space_pos != str_view::npos) && (non_space_pos > header.indent); - - if (can_be_folded) { - if (is_more_indented) { - // The content line right before more-indented lines is not folded. - m_buffer.push_back('\n'); - } - else { - m_buffer.push_back(' '); - } - - can_be_folded = false; - } - - m_buffer.append(line.begin() + header.indent, line.end()); - - if (is_more_indented && has_newline_at_end) { - // more-indented lines are not folded. - m_buffer.push_back('\n'); - } - else { - can_be_folded = true; - } - } - - if (!has_newline_at_end) { - break; - } - - cur_line_begin_pos = cur_line_end_pos + 1; - } while (cur_line_begin_pos < token.size()); - - if (has_newline_at_end && can_be_folded) { - // The final content line break are not folded. - m_buffer.push_back('\n'); - } - - process_chomping(header.chomp); - - return {m_buffer}; - } - - /// @brief Discards final content line break and trailing empty lines depending on the given chomping type. - /// @param chomp Chomping method type. - void process_chomping(chomping_indicator_t chomp) { - switch (chomp) { - case chomping_indicator_t::STRIP: { - const std::size_t content_end_pos = m_buffer.find_last_not_of('\n'); - if (content_end_pos == std::string::npos) { - // if the scalar has no content line, all lines are considered as trailing empty lines. - m_buffer.clear(); - break; - } - - if (content_end_pos == m_buffer.size() - 1) { - // no last content line break nor trailing empty lines. - break; + case 'n': + buff.push_back('\n'); + break; + case 'v': + buff.push_back('\v'); + break; + case 'f': + buff.push_back('\f'); + break; + case 'r': + buff.push_back('\r'); + break; + case 'e': + buff.push_back(static_cast(0x1B)); + break; + case ' ': + buff.push_back(' '); + break; + case '\"': + buff.push_back('\"'); + break; + case '/': + buff.push_back('/'); + break; + case '\\': + buff.push_back('\\'); + break; + case 'N': // next line + unescape_escaped_unicode(0x85u, buff); + break; + case '_': // non-breaking space + unescape_escaped_unicode(0xA0u, buff); + break; + case 'L': // line separator + unescape_escaped_unicode(0x2028u, buff); + break; + case 'P': // paragraph separator + unescape_escaped_unicode(0x2029u, buff); + break; + case 'x': { + char32_t codepoint {0}; + ret = extract_codepoint(begin, end, 1, codepoint); + if FK_YAML_LIKELY (ret) { + unescape_escaped_unicode(codepoint, buff); } - - // remove the last content line break and all trailing empty lines. - m_buffer.erase(content_end_pos + 1); - break; } - case chomping_indicator_t::CLIP: { - const std::size_t content_end_pos = m_buffer.find_last_not_of('\n'); - if (content_end_pos == std::string::npos) { - // if the scalar has no content line, all lines are considered as trailing empty lines. - m_buffer.clear(); - break; + case 'u': { + char32_t codepoint {0}; + ret = extract_codepoint(begin, end, 2, codepoint); + if FK_YAML_LIKELY (ret) { + unescape_escaped_unicode(codepoint, buff); } - - if (content_end_pos == m_buffer.size() - 1) { - // no trailing empty lines - break; + break; + } + case 'U': { + char32_t codepoint {0}; + ret = extract_codepoint(begin, end, 4, codepoint); + if FK_YAML_LIKELY (ret) { + unescape_escaped_unicode(codepoint, buff); } - - // remove all trailing empty lines. - m_buffer.erase(content_end_pos + 2); - break; } - case chomping_indicator_t::KEEP: + default: + // Unsupported escape sequence is found in a string token. + ret = false; break; } - } - /// @brief Applies line folding to flow scalar contents. - /// @param token Flow scalar contents. - /// @param newline_pos Position of the target newline code. - void process_line_folding(str_view& token, std::size_t newline_pos) noexcept { - // discard trailing white spaces which precedes the line break in the current line. - const std::size_t last_non_space_pos = token.substr(0, newline_pos + 1).find_last_not_of(" \t"); - if (last_non_space_pos == str_view::npos) { - m_buffer.append(token.begin(), newline_pos); - } - else { - m_buffer.append(token.begin(), last_non_space_pos + 1); - } - token.remove_prefix(newline_pos + 1); // move next to the LF + return ret; + } - uint32_t empty_line_counts = 0; - do { - const std::size_t non_space_pos = token.find_first_not_of(" \t"); - if (non_space_pos == str_view::npos) { - // Line folding ignores trailing spaces. - token.remove_prefix(token.size()); + static ::std::string escape(const char* begin, const char* end, bool& is_escaped) { + ::std::string escaped {}; + escaped.reserve(std::distance(begin, end)); + for (; begin != end; ++begin) { + switch (*begin) { + case 0x01: + escaped += "\\u0001"; + is_escaped = true; break; - } - if (token[non_space_pos] != '\n') { - token.remove_prefix(non_space_pos); + case 0x02: + escaped += "\\u0002"; + is_escaped = true; break; - } - - token.remove_prefix(non_space_pos + 1); - ++empty_line_counts; - } while (true); - - if (empty_line_counts > 0) { - m_buffer.append(empty_line_counts, '\n'); - } - else { - m_buffer.push_back(' '); - } - } - - /// @brief Decides scalar value type based on the lexical/tag types and scalar contents. - /// @param lex_type Lexical token type for the scalar. - /// @param tag_type Tag type for the scalar. - /// @param token Scalar contents. - /// @return Scalar value type. - node_type decide_value_type(lexical_token_t lex_type, tag_t tag_type, str_view token) const noexcept { - node_type value_type {node_type::STRING}; - if (lex_type == lexical_token_t::PLAIN_SCALAR) { - value_type = scalar_scanner::scan(token.begin(), token.end()); - } + case 0x03: + escaped += "\\u0003"; + is_escaped = true; + break; + case 0x04: + escaped += "\\u0004"; + is_escaped = true; + break; + case 0x05: + escaped += "\\u0005"; + is_escaped = true; + break; + case 0x06: + escaped += "\\u0006"; + is_escaped = true; + break; + case '\a': + escaped += "\\a"; + is_escaped = true; + break; + case '\b': + escaped += "\\b"; + is_escaped = true; + break; + case '\t': + escaped += "\\t"; + is_escaped = true; + break; + case '\n': + escaped += "\\n"; + is_escaped = true; + break; + case '\v': + escaped += "\\v"; + is_escaped = true; + break; + case '\f': + escaped += "\\f"; + is_escaped = true; + break; + case '\r': + escaped += "\\r"; + is_escaped = true; + break; + case 0x0E: + escaped += "\\u000E"; + is_escaped = true; + break; + case 0x0F: + escaped += "\\u000F"; + is_escaped = true; + break; + case 0x10: + escaped += "\\u0010"; + is_escaped = true; + break; + case 0x11: + escaped += "\\u0011"; + is_escaped = true; + break; + case 0x12: + escaped += "\\u0012"; + is_escaped = true; + break; + case 0x13: + escaped += "\\u0013"; + is_escaped = true; + break; + case 0x14: + escaped += "\\u0014"; + is_escaped = true; + break; + case 0x15: + escaped += "\\u0015"; + is_escaped = true; + break; + case 0x16: + escaped += "\\u0016"; + is_escaped = true; + break; + case 0x17: + escaped += "\\u0017"; + is_escaped = true; + break; + case 0x18: + escaped += "\\u0018"; + is_escaped = true; + break; + case 0x19: + escaped += "\\u0019"; + is_escaped = true; + break; + case 0x1A: + escaped += "\\u001A"; + is_escaped = true; + break; + case 0x1B: + escaped += "\\e"; + is_escaped = true; + break; + case 0x1C: + escaped += "\\u001C"; + is_escaped = true; + break; + case 0x1D: + escaped += "\\u001D"; + is_escaped = true; + break; + case 0x1E: + escaped += "\\u001E"; + is_escaped = true; + break; + case 0x1F: + escaped += "\\u001F"; + is_escaped = true; + break; + case '\"': + escaped += "\\\""; + is_escaped = true; + break; + case '\\': + escaped += "\\\\"; + is_escaped = true; + break; + default: + const std::ptrdiff_t diff = static_cast(std::distance(begin, end)); + if (diff > 1) { + if (*begin == static_cast(0xC2u) && *(begin + 1) == static_cast(0x85u)) { + escaped += "\\N"; + std::advance(begin, 1); + is_escaped = true; + break; + } + if (*begin == static_cast(0xC2u) && *(begin + 1) == static_cast(0xA0u)) { + escaped += "\\_"; + std::advance(begin, 1); + is_escaped = true; + break; + } - switch (tag_type) { - case tag_t::NULL_VALUE: - value_type = node_type::NULL_OBJECT; - break; - case tag_t::BOOLEAN: - value_type = node_type::BOOLEAN; - break; - case tag_t::INTEGER: - value_type = node_type::INTEGER; - break; - case tag_t::FLOATING_NUMBER: - value_type = node_type::FLOAT; - break; - case tag_t::STRING: - case tag_t::NON_SPECIFIC: - // scalars with the non-specific tag is resolved to a string tag. - // See the "Non-Specific Tags" section in https://yaml.org/spec/1.2.2/#691-node-tags. - value_type = node_type::STRING; - break; - case tag_t::NONE: - case tag_t::CUSTOM_TAG: - default: - break; + if (diff > 2) { + if (*begin == static_cast(0xE2u) && *(begin + 1) == static_cast(0x80u) && + *(begin + 2) == static_cast(0xA8u)) { + escaped += "\\L"; + std::advance(begin, 2); + is_escaped = true; + break; + } + if (*begin == static_cast(0xE2u) && *(begin + 1) == static_cast(0x80u) && + *(begin + 2) == static_cast(0xA9u)) { + escaped += "\\P"; + std::advance(begin, 2); + is_escaped = true; + break; + } + } + } + escaped += *begin; + break; + } } + return escaped; + } // LCOV_EXCL_LINE - return value_type; - } - - /// @brief Creates YAML scalar object based on the value type and contents. - /// @param type Scalar value type. - /// @param token Scalar contents. - /// @return A YAML scalar object. - basic_node_type create_scalar_node(node_type val_type, tag_t tag_type, str_view token) { - switch (val_type) { - case node_type::NULL_OBJECT: { - std::nullptr_t null = nullptr; - const bool converted = detail::aton(token.begin(), token.end(), null); - if FK_YAML_UNLIKELY (!converted) { - throw parse_error("Failed to convert a scalar to a null.", m_line, m_indent); - } - // The default basic_node object is a null scalar node. - return basic_node_type {}; +private: + static bool convert_hexchar_to_byte(char source, uint8_t& byte) { + if ('0' <= source && source <= '9') { + // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) + byte = static_cast(source - '0'); + return true; } - case node_type::BOOLEAN: { - auto boolean = static_cast(false); - const bool converted = detail::atob(token.begin(), token.end(), boolean); - if FK_YAML_UNLIKELY (!converted) { - throw parse_error("Failed to convert a scalar to a boolean.", m_line, m_indent); - } - return basic_node_type(boolean); + + if ('A' <= source && source <= 'F') { + // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) + byte = static_cast(source - 'A' + 10); + return true; } - case node_type::INTEGER: { - integer_type integer = 0; - const bool converted = detail::atoi(token.begin(), token.end(), integer); - if FK_YAML_LIKELY (converted) { - return basic_node_type(integer); - } - if FK_YAML_UNLIKELY (tag_type == tag_t::INTEGER) { - throw parse_error("Failed to convert a scalar to an integer.", m_line, m_indent); - } - // conversion error from a scalar which is not tagged with !!int is recovered by treating it as a string - // scalar. See https://github.com/fktn-k/fkYAML/issues/428. - return basic_node_type(string_type(token.begin(), token.end())); + if ('a' <= source && source <= 'f') { + // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) + byte = static_cast(source - 'a' + 10); + return true; } - case node_type::FLOAT: { - float_number_type float_val = 0; - const bool converted = detail::atof(token.begin(), token.end(), float_val); - if FK_YAML_LIKELY (converted) { - return basic_node_type(float_val); - } - if FK_YAML_UNLIKELY (tag_type == tag_t::FLOATING_NUMBER) { - throw parse_error("Failed to convert a scalar to a floating point value", m_line, m_indent); - } - // conversion error from a scalar which is not tagged with !!float is recovered by treating it as a string - // scalar. See https://github.com/fktn-k/fkYAML/issues/428. - return basic_node_type(string_type(token.begin(), token.end())); + // The given character is not hexadecimal. + return false; + } + + static bool extract_codepoint(const char*& begin, const char* end, int bytes_to_read, char32_t& codepoint) { + const bool has_enough_room = static_cast(std::distance(begin, end)) >= (bytes_to_read - 1); + if (!has_enough_room) { + return false; } - case node_type::STRING: - if (!m_use_owned_buffer) { - return basic_node_type(string_type(token.begin(), token.end())); + + const int read_size = bytes_to_read * 2; + uint8_t byte {0}; + codepoint = 0; + + for (int i = read_size - 1; i >= 0; i--) { + const bool is_valid = convert_hexchar_to_byte(*++begin, byte); + if (!is_valid) { + return false; } - m_use_owned_buffer = false; - return basic_node_type(std::move(m_buffer)); - default: // LCOV_EXCL_LINE - detail::unreachable(); // LCOV_EXCL_LINE + // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) + codepoint |= static_cast(byte << (4 * i)); } + + return true; } - /// Current line - uint32_t m_line {0}; - /// Current indentation for the scalar - uint32_t m_indent {0}; - /// Whether the parsed contents are stored in an owned buffer. - bool m_use_owned_buffer {false}; - /// Owned buffer storage for parsing. This buffer is used when scalar contents need mutation. - std::string m_buffer; + static void unescape_escaped_unicode(char32_t codepoint, std::string& buff) { + // the inner curly braces are necessary to build with older compilers. + std::array encode_buff {{}}; + uint32_t encoded_size {0}; + utf8::from_utf32(codepoint, encode_buff, encoded_size); + buff.append(reinterpret_cast(encode_buff.data()), encoded_size); + } }; FK_YAML_DETAIL_NAMESPACE_END -#endif /* FK_YAML_DETAIL_INPUT_SCALAR_PARSER_HPP */ +#endif /* FK_YAML_DETAIL_ENCODINGS_YAML_ESCAPER_HPP */ -// #include +// #include + +// #include // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library // | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 @@ -6662,193 +6197,334 @@ FK_YAML_DETAIL_NAMESPACE_END // SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani // SPDX-License-Identifier: MIT -#ifndef FK_YAML_DETAIL_INPUT_TAG_RESOLVER_HPP -#define FK_YAML_DETAIL_INPUT_TAG_RESOLVER_HPP +#ifndef FK_YAML_DETAIL_INPUT_SCALAR_SCANNER_HPP +#define FK_YAML_DETAIL_INPUT_SCALAR_SCANNER_HPP -#include +#include #include // #include // #include -// #include +// #include + + +FK_YAML_DETAIL_NAMESPACE_BEGIN + +/// @brief The class which detects a scalar value type by scanning contents. +class scalar_scanner { +public: + /// @brief Detects a scalar value type by scanning the contents ranged by the given iterators. + /// @param begin The iterator to the first element of the scalar. + /// @param end The iterator to the past-the-end element of the scalar. + /// @return A detected scalar value type. + static node_type scan(const char* begin, const char* end) noexcept { + if (begin == end) { + return node_type::STRING; + } + + const auto len = static_cast(std::distance(begin, end)); + if (len > 5) { + return scan_possible_number_token(begin, len); + } + + const char* p_begin = &*begin; + + switch (len) { + case 1: + if (*p_begin == '~') { + return node_type::NULL_OBJECT; + } + break; + case 4: + switch (*p_begin) { + case 'n': + // no possible case of begin a number otherwise. + return (std::strncmp(p_begin + 1, "ull", 3) == 0) ? node_type::NULL_OBJECT : node_type::STRING; + case 'N': + // no possible case of begin a number otherwise. + return ((std::strncmp(p_begin + 1, "ull", 3) == 0) || (std::strncmp(p_begin + 1, "ULL", 3) == 0)) + ? node_type::NULL_OBJECT + : node_type::STRING; + case 't': + // no possible case of being a number otherwise. + return (std::strncmp(p_begin + 1, "rue", 3) == 0) ? node_type::BOOLEAN : node_type::STRING; + case 'T': + // no possible case of being a number otherwise. + return ((std::strncmp(p_begin + 1, "rue", 3) == 0) || (std::strncmp(p_begin + 1, "RUE", 3) == 0)) + ? node_type::BOOLEAN + : node_type::STRING; + case '.': { + const char* p_from_second = p_begin + 1; + const bool is_inf_or_nan_scalar = + (std::strncmp(p_from_second, "inf", 3) == 0) || (std::strncmp(p_from_second, "Inf", 3) == 0) || + (std::strncmp(p_from_second, "INF", 3) == 0) || (std::strncmp(p_from_second, "nan", 3) == 0) || + (std::strncmp(p_from_second, "NaN", 3) == 0) || (std::strncmp(p_from_second, "NAN", 3) == 0); + if (is_inf_or_nan_scalar) { + return node_type::FLOAT; + } + // maybe a number. + break; + } + default: + break; + } + break; + case 5: + switch (*p_begin) { + case 'f': + // no possible case of being a number otherwise. + return (std::strncmp(p_begin + 1, "alse", 4) == 0) ? node_type::BOOLEAN : node_type::STRING; + case 'F': + // no possible case of being a number otherwise. + return ((std::strncmp(p_begin + 1, "alse", 4) == 0) || (std::strncmp(p_begin + 1, "ALSE", 4) == 0)) + ? node_type::BOOLEAN + : node_type::STRING; + case '+': + case '-': + if (*(p_begin + 1) == '.') { + const char* p_from_third = p_begin + 2; + const bool is_min_inf = (std::strncmp(p_from_third, "inf", 3) == 0) || + (std::strncmp(p_from_third, "Inf", 3) == 0) || + (std::strncmp(p_from_third, "INF", 3) == 0); + if (is_min_inf) { + return node_type::FLOAT; + } + } + // maybe a number. + break; + default: + break; + } + break; + default: + break; + } + + return scan_possible_number_token(begin, len); + } + +private: + /// @brief Detects a scalar value type from the contents (possibly an integer or a floating-point value). + /// @param itr The iterator to the first element of the scalar. + /// @param len The length of the scalar contents. + /// @return A detected scalar value type. + static node_type scan_possible_number_token(const char* itr, uint32_t len) noexcept { + FK_YAML_ASSERT(len > 0); + + switch (*itr) { + case '-': + return (len > 1) ? scan_negative_number(++itr, --len) : node_type::STRING; + case '+': + return (len > 1) ? scan_decimal_number(++itr, --len) : node_type::STRING; + case '.': + // some integer(s) required after the decimal point as a floating point value. + return (len > 1) ? scan_after_decimal_point(++itr, --len) : node_type::STRING; + case '0': + return (len > 1) ? scan_after_zero_at_first(++itr, --len) : node_type::INTEGER; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return (len > 1) ? scan_decimal_number(++itr, --len) : node_type::INTEGER; + default: + return node_type::STRING; + } + } -// #include + /// @brief Detects a scalar value type by scanning the contents right after the negative sign. + /// @param itr The iterator to the past-the-negative-sign element of the scalar. + /// @param len The length of the scalar contents left unscanned. + /// @return A detected scalar value type. + static node_type scan_negative_number(const char* itr, uint32_t len) noexcept { + FK_YAML_ASSERT(len > 0); -// #include + if (is_digit(*itr)) { + return (len > 1) ? scan_decimal_number(++itr, --len) : node_type::INTEGER; + } -// #include + if (*itr == '.') { + // some integer(s) required after "-." as a floating point value. + return (len > 1) ? scan_after_decimal_point(++itr, --len) : node_type::STRING; + } -// #include + return node_type::STRING; + } + /// @brief Detects a scalar value type by scanning the contents right after the beginning 0. + /// @param itr The iterator to the past-the-zero element of the scalar. + /// @param len The length of the scalar left unscanned. + /// @return A detected scalar value type. + static node_type scan_after_zero_at_first(const char* itr, uint32_t len) noexcept { + FK_YAML_ASSERT(len > 0); -FK_YAML_DETAIL_NAMESPACE_BEGIN + if (is_digit(*itr)) { + // a token consisting of the beginning '0' and some following numbers, e.g., `0123`, is not an integer + // according to https://yaml.org/spec/1.2.2/#10213-integer. + return node_type::STRING; + } -static constexpr str_view default_primary_handle_prefix {"!"}; -static constexpr str_view default_secondary_handle_prefix {"tag:yaml.org,2002:"}; + switch (*itr) { + case '.': + // 0 can be omitted after `0.`. + return (len > 1) ? scan_after_decimal_point(++itr, --len) : node_type::FLOAT; + case 'e': + case 'E': + // some integer(s) required after the exponent sign as a floating point value. + return (len > 1) ? scan_after_exponent(++itr, --len) : node_type::STRING; + case 'o': + return (len > 1) ? scan_octal_number(++itr, --len) : node_type::STRING; + case 'x': + return (len > 1) ? scan_hexadecimal_number(++itr, --len) : node_type::STRING; + default: + return node_type::STRING; + } + } -template -class tag_resolver { - static_assert(is_basic_node::value, "tag_resolver only accepts basic_node<...>."); - using doc_metainfo_type = document_metainfo; + /// @brief Detects a scalar value type by scanning the contents part starting with a decimal. + /// @param itr The iterator to the beginning decimal element of the scalar. + /// @param len The length of the scalar left unscanned. + /// @return A detected scalar value type. + static node_type scan_decimal_number(const char* itr, uint32_t len) noexcept { + FK_YAML_ASSERT(len > 0); -public: - /// @brief Resolve the input tag name into an expanded tag name prepended with a registered prefix. - /// @param tag The input tag name. - /// @return The type of a node deduced from the given tag name. - static tag_t resolve_tag(const str_view tag, const std::shared_ptr& directives) { - const std::string normalized = normalize_tag_name(tag, directives); - return convert_to_tag_type(normalized); - } + if (is_digit(*itr)) { + return (len > 1) ? scan_decimal_number(++itr, --len) : node_type::INTEGER; + } -private: - static std::string normalize_tag_name(const str_view tag, const std::shared_ptr& directives) { - if FK_YAML_UNLIKELY (tag.empty()) { - throw invalid_tag("tag must not be empty.", ""); + switch (*itr) { + case '.': { + // 0 can be omitted after the decimal point + return (len > 1) ? scan_after_decimal_point(++itr, --len) : node_type::FLOAT; } - if FK_YAML_UNLIKELY (tag[0] != '!') { - throw invalid_tag("tag must start with \'!\'", std::string(tag.begin(), tag.end()).c_str()); + case 'e': + case 'E': + // some integer(s) required after the exponent + return (len > 1) ? scan_after_exponent(++itr, --len) : node_type::STRING; + default: + return node_type::STRING; } + } - if (tag.size() == 1) { - // Non-specific tag ("!") will be interpreted as one of the following: - // * tag:yaml.org,2002:seq - // * tag:yaml.org,2002:map - // * tag:yaml.org,2002:str - // See the "Non-Specific Tags" section in https://yaml.org/spec/1.2.2/#691-node-tags. - // The interpretation cannot take place here because the input lacks the corresponding value. - return {tag.begin(), tag.end()}; - } + /// @brief Detects a scalar value type by scanning the contents right after a decimal point. + /// @param itr The iterator to the past-the-decimal-point element of the scalar. + /// @param len The length of the scalar left unscanned. + /// @return A detected scalar value type. + static node_type scan_after_decimal_point(const char* itr, uint32_t len) noexcept { + FK_YAML_ASSERT(len > 0); - std::string normalized {"!<"}; - switch (tag[1]) { - case '!': { - // handle a secondary tag handle (!!suffix -> !<[secondary][suffix]>) - const bool is_null_or_empty = !directives || directives->secondary_handle_prefix.empty(); - if (is_null_or_empty) { - normalized.append(default_secondary_handle_prefix.begin(), default_secondary_handle_prefix.end()); - } - else { - normalized += directives->secondary_handle_prefix; + for (uint32_t i = 0; i < len; i++) { + const char c = *itr++; + + if (is_digit(c)) { + continue; } - const str_view body = tag.substr(2); - normalized.append(body.begin(), body.end()); - break; - } - case '<': - if (tag[2] == '!') { - const bool is_null_or_empty = !directives || directives->primary_handle_prefix.empty(); - if (is_null_or_empty) { - normalized.append(default_primary_handle_prefix.begin(), default_primary_handle_prefix.end()); - } - else { - normalized += directives->primary_handle_prefix; + if (c == 'e' || c == 'E') { + if (i == len - 1) { + // some integer(s) required after the exponent + return node_type::STRING; } - - const str_view body = tag.substr(3); - return normalized.append(body.begin(), body.end()); + return scan_after_exponent(itr, len - i - 1); } - // verbatim tags must be delivered as-is to the application. - // See https://yaml.org/spec/1.2.2/#691-node-tags for more details. - return {tag.begin(), tag.end()}; - default: { - const std::size_t tag_end_pos = tag.find_first_of('!', 1); - - // handle a named handle (!tag!suffix -> !<[tag][suffix]>) - if (tag_end_pos != std::string::npos) { - // there must be a non-empty suffix. (already checked by the lexer.) - FK_YAML_ASSERT(tag_end_pos < tag.size() - 1); - - const bool is_null_or_empty = !directives || directives->named_handle_map.empty(); - if FK_YAML_UNLIKELY (is_null_or_empty) { - throw invalid_tag( - "named handle has not been registered.", std::string(tag.begin(), tag.end()).c_str()); - } + return node_type::STRING; + } - // find the extracted named handle in the map. - const str_view named_handle = tag.substr(0, tag_end_pos + 1); - auto named_handle_itr = directives->named_handle_map.find({named_handle.begin(), named_handle.end()}); - auto end_itr = directives->named_handle_map.end(); - if FK_YAML_UNLIKELY (named_handle_itr == end_itr) { - throw invalid_tag( - "named handle has not been registered.", std::string(tag.begin(), tag.end()).c_str()); - } + return node_type::FLOAT; + } - // The YAML spec prohibits expanding the percent-encoded characters (%xx -> a UTF-8 byte). - // So no conversion takes place. - // See https://yaml.org/spec/1.2.2/#56-miscellaneous-characters for more details. + /// @brief Detects a scalar value type by scanning the contents right after the exponent prefix ("e" or "E"). + /// @param itr The iterator to the past-the-exponent-prefix element of the scalar. + /// @param len The length of the scalar left unscanned. + /// @return A detected scalar value type. + static node_type scan_after_exponent(const char* itr, uint32_t len) noexcept { + FK_YAML_ASSERT(len > 0); - normalized += named_handle_itr->second; - const str_view body = tag.substr(tag_end_pos + 1); - normalized.append(body.begin(), body.end()); - break; + const char c = *itr; + if (c == '+' || c == '-') { + if (len == 1) { + // some integer(s) required after the sign. + return node_type::STRING; } + ++itr; + --len; + } - // handle a primary tag handle (!suffix -> !<[primary][suffix]>) - const bool is_null_or_empty = !directives || directives->primary_handle_prefix.empty(); - if (is_null_or_empty) { - normalized.append(default_primary_handle_prefix.begin(), default_primary_handle_prefix.end()); - } - else { - normalized += directives->primary_handle_prefix; + for (uint32_t i = 0; i < len; i++) { + if (!is_digit(*itr++)) { + return node_type::STRING; } - - const str_view body = tag.substr(1); - normalized.append(body.begin(), body.end()); - break; - } } - normalized += ">"; - return normalized; + return node_type::FLOAT; } - static tag_t convert_to_tag_type(const std::string& normalized) { - if (normalized == "!") { - return tag_t::NON_SPECIFIC; - } + /// @brief Detects a scalar value type by scanning the contents assuming octal numbers. + /// @param itr The iterator to the octal-number element of the scalar. + /// @param len The length of the scalar left unscanned. + /// @return A detected scalar value type. + static node_type scan_octal_number(const char* itr, uint32_t len) noexcept { + FK_YAML_ASSERT(len > 0); - if (normalized.size() < 24 /* size of ! 1) ? scan_octal_number(++itr, --len) : node_type::INTEGER; + default: + return node_type::STRING; } + } - if (normalized == "!") { - return tag_t::SEQUENCE; - } - if (normalized == "!") { - return tag_t::MAPPING; - } - if (normalized == "!") { - return tag_t::NULL_VALUE; - } - if (normalized == "!") { - return tag_t::BOOLEAN; - } - if (normalized == "!") { - return tag_t::INTEGER; - } - if (normalized == "!") { - return tag_t::FLOATING_NUMBER; - } - if (normalized == "!") { - return tag_t::STRING; + /// @brief Detects a scalar value type by scanning the contents assuming hexadecimal numbers. + /// @param itr The iterator to the hexadecimal-number element of the scalar. + /// @param len The length of the scalar left unscanned. + /// @return A detected scalar value type. + static node_type scan_hexadecimal_number(const char* itr, uint32_t len) noexcept { + FK_YAML_ASSERT(len > 0); + + if (is_xdigit(*itr)) { + return (len > 1) ? scan_hexadecimal_number(++itr, --len) : node_type::INTEGER; } + return node_type::STRING; + } - return tag_t::CUSTOM_TAG; + /// @brief Check if the given character is a digit. + /// @note This function is needed to avoid assertion failures in `std::isdigit()` especially when compiled with + /// MSVC. + /// @param c A character to be checked. + /// @return true if the given character is a digit, false otherwise. + static bool is_digit(char c) { + return ('0' <= c && c <= '9'); + } + + /// @brief Check if the given character is a hex-digit. + /// @note This function is needed to avoid assertion failures in `std::isxdigit()` especially when compiled with + /// MSVC. + /// @param c A character to be checked. + /// @return true if the given character is a hex-digit, false otherwise. + static bool is_xdigit(char c) { + return (('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f')); } }; FK_YAML_DETAIL_NAMESPACE_END -#endif /* FK_YAML_DETAIL_INPUT_TAG_RESOLVER_HPP */ +#endif /* FK_YAML_DETAIL_INPUT_SCALAR_SCANNER_HPP */ -// #include +// #include // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library // | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 @@ -6857,2537 +6533,2718 @@ FK_YAML_DETAIL_NAMESPACE_END // SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani // SPDX-License-Identifier: MIT -#ifndef FK_YAML_DETAIL_META_INPUT_ADAPTER_TRAITS_HPP -#define FK_YAML_DETAIL_META_INPUT_ADAPTER_TRAITS_HPP +#ifndef FK_YAML_DETAIL_INPUT_TAG_T_HPP +#define FK_YAML_DETAIL_INPUT_TAG_T_HPP -#include +#include // #include -// #include -// #include +FK_YAML_DETAIL_NAMESPACE_BEGIN + +/// @brief Definition of YAML tag types. +enum class tag_t : std::uint8_t { + NONE, //!< Represents a non-specific tag "?". + NON_SPECIFIC, //!< Represents a non-specific tag "!". + CUSTOM_TAG, //!< Represents a custom tag + SEQUENCE, //!< Represents a sequence tag. + MAPPING, //!< Represents a mapping tag. + NULL_VALUE, //!< Represents a null value tag. + BOOLEAN, //!< Represents a boolean tag. + INTEGER, //!< Represents an integer type + FLOATING_NUMBER, //!< Represents a floating point number tag. + STRING, //!< Represents a string tag. +}; + +FK_YAML_DETAIL_NAMESPACE_END + +#endif /* FK_YAML_DETAIL_INPUT_TAG_T_HPP */ + +// #include + +// #include + +// #include + +// #include + +// #include FK_YAML_DETAIL_NAMESPACE_BEGIN -/////////////////////////////////////////// -// Input Adapter API detection traits -/////////////////////////////////////////// +/// @brief A parser for YAML scalars. +/// @tparam BasicNodeType A type of the container for parsed YAML scalars. +template +class scalar_parser { + static_assert(is_basic_node::value, "scalar_parser only accepts basic_node<...>"); -/// @brief A type which represents get_buffer_view function. -/// @tparam T A target type. -template -using get_buffer_view_fn_t = decltype(std::declval().get_buffer_view()); +public: + using basic_node_type = BasicNodeType; -/// @brief Type traits to check if InputAdapterType has get_buffer_view member function. -/// @tparam InputAdapterType An input adapter type to check if it has get_buffer_view function. -/// @tparam typename N/A -template -using has_get_buffer_view = is_detected; +private: + /** A type for boolean node values. */ + using boolean_type = typename basic_node_type::boolean_type; + /** A type for integer node values. */ + using integer_type = typename basic_node_type::integer_type; + /** A type for floating point node values. */ + using float_number_type = typename basic_node_type::float_number_type; + /** A type for string node values. */ + using string_type = typename basic_node_type::string_type; -//////////////////////////////// -// is_input_adapter traits -//////////////////////////////// +public: + /// @brief Constructs a new scalar_parser object. + /// @param line Current line. + /// @param indent Current indentation. + scalar_parser(uint32_t line, uint32_t indent) noexcept + : m_line(line), + m_indent(indent) { + } -/// @brief Type traits to check if T is an input adapter type. -/// @tparam T A target type. -/// @tparam typename N/A -template -struct is_input_adapter : std::false_type {}; + /// @brief Destroys a scalar_parser object. + ~scalar_parser() noexcept = default; -/// @brief A partial specialization of is_input_adapter if T is an input adapter type. -/// @tparam InputAdapterType -template -struct is_input_adapter::value>> : std::true_type { -}; + // std::string's copy constructor/assignment operator may throw a exception. + scalar_parser(const scalar_parser&) = default; + scalar_parser& operator=(const scalar_parser&) = default; -FK_YAML_DETAIL_NAMESPACE_END + scalar_parser(scalar_parser&&) noexcept = default; + scalar_parser& operator=(scalar_parser&&) noexcept(std::is_nothrow_move_assignable::value) = default; -#endif /* FK_YAML_DETAIL_META_INPUT_ADAPTER_TRAITS_HPP */ + /// @brief Parses a token into a flow scalar (either plain, single quoted or double quoted) + /// @param lex_type Lexical token type for the scalar. + /// @param tag_type Tag type for the scalar. + /// @param token Scalar contents. + /// @return Parsed YAML flow scalar object. + basic_node_type parse_flow(lexical_token_t lex_type, tag_t tag_type, str_view token) { + FK_YAML_ASSERT( + lex_type == lexical_token_t::PLAIN_SCALAR || lex_type == lexical_token_t::SINGLE_QUOTED_SCALAR || + lex_type == lexical_token_t::DOUBLE_QUOTED_SCALAR); + FK_YAML_ASSERT(tag_type != tag_t::SEQUENCE && tag_type != tag_t::MAPPING); -// #include + token = parse_flow_scalar_token(lex_type, token); + const node_type value_type = decide_value_type(lex_type, tag_type, token); + return create_scalar_node(value_type, tag_type, token); + } -// #include + /// @brief Parses a token into a block scalar (either literal or folded) + /// @param lex_type Lexical token type for the scalar. + /// @param tag_type Tag type for the scalar. + /// @param token Scalar contents. + /// @param header Block scalar header information. + /// @return Parsed YAML block scalar object. + basic_node_type parse_block( + lexical_token_t lex_type, tag_t tag_type, str_view token, const block_scalar_header& header) { + FK_YAML_ASSERT( + lex_type == lexical_token_t::BLOCK_LITERAL_SCALAR || lex_type == lexical_token_t::BLOCK_FOLDED_SCALAR); + FK_YAML_ASSERT(tag_type != tag_t::SEQUENCE && tag_type != tag_t::MAPPING); -// #include -// _______ __ __ __ _____ __ __ __ -// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 -// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML -// -// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani -// SPDX-License-Identifier: MIT + if (lex_type == lexical_token_t::BLOCK_LITERAL_SCALAR) { + token = parse_block_literal_scalar(token, header); + } + else { + token = parse_block_folded_scalar(token, header); + } -#ifndef FK_YAML_DETAIL_NODE_ATTRS_HPP -#define FK_YAML_DETAIL_NODE_ATTRS_HPP + const node_type value_type = decide_value_type(lex_type, tag_type, token); + return create_scalar_node(value_type, tag_type, token); + } -#include -#include +private: + /// @brief Parses a token into a flow scalar contents. + /// @param lex_type Lexical token type for the scalar. + /// @param token Scalar contents. + /// @return View into the parsed scalar contents. + str_view parse_flow_scalar_token(lexical_token_t lex_type, str_view token) { + switch (lex_type) { + case lexical_token_t::PLAIN_SCALAR: + token = parse_plain_scalar(token); + break; + case lexical_token_t::SINGLE_QUOTED_SCALAR: + token = parse_single_quoted_scalar(token); + break; + case lexical_token_t::DOUBLE_QUOTED_SCALAR: + token = parse_double_quoted_scalar(token); + break; + default: // LCOV_EXCL_LINE + unreachable(); // LCOV_EXCL_LINE + } -// #include + return token; + } -// #include + /// @brief Parses plain scalar contents. + /// @param token Scalar contents. + /// @return View into the parsed scalar contents. + str_view parse_plain_scalar(str_view token) noexcept { + // plain scalars cannot be empty. + FK_YAML_ASSERT(!token.empty()); + std::size_t newline_pos = token.find('\n'); + if (newline_pos == str_view::npos) { + return token; + } -FK_YAML_DETAIL_NAMESPACE_BEGIN + m_use_owned_buffer = true; -/// @brief The type for node attribute bits. -using node_attr_t = uint32_t; + if (m_buffer.capacity() < token.size()) { + m_buffer.reserve(token.size()); + } -/// @brief The namespace to define bit masks for node attribute bits. -namespace node_attr_mask { + do { + process_line_folding(token, newline_pos); + newline_pos = token.find('\n'); + } while (newline_pos != str_view::npos); + + m_buffer.append(token.begin(), token.size()); + + return {m_buffer}; + } + + /// @brief Parses single quoted scalar contents. + /// @param token Scalar contents. + /// @return View into the parsed scalar contents. + str_view parse_single_quoted_scalar(str_view token) noexcept { + if (token.empty()) { + return token; + } + + constexpr str_view filter {"\'\n"}; + std::size_t pos = token.find_first_of(filter); + if (pos == str_view::npos) { + return token; + } + + m_use_owned_buffer = true; + + if (m_buffer.capacity() < token.size()) { + m_buffer.reserve(token.size()); + } + + do { + FK_YAML_ASSERT(pos < token.size()); + FK_YAML_ASSERT(token[pos] == '\'' || token[pos] == '\n'); + + if (token[pos] == '\'') { + // unescape escaped single quote. ('' -> ') + FK_YAML_ASSERT(pos + 1 < token.size()); + m_buffer.append(token.begin(), token.begin() + (pos + 1)); + token.remove_prefix(pos + 2); // move next to the escaped single quote. + } + else { + process_line_folding(token, pos); + } -/// The bit mask for node value type bits. -constexpr node_attr_t value = 0x0000FFFFu; -/// The bit mask for node style type bits. (bits are not yet defined.) -constexpr node_attr_t style = 0x00FF0000u; -/// The bit mask for node property related bits. -constexpr node_attr_t props = 0xFF000000u; -/// The bit mask for anchor/alias node type bits. -constexpr node_attr_t anchoring = 0x03000000u; -/// The bit mask for anchor offset value bits. -constexpr node_attr_t anchor_offset = 0xFC000000u; -/// The bit mask for all the bits for node attributes. -constexpr node_attr_t all = std::numeric_limits::max(); + pos = token.find_first_of(filter); + } while (pos != str_view::npos); -} // namespace node_attr_mask + if (!token.empty()) { + m_buffer.append(token.begin(), token.size()); + } -/// @brief The namespace to define bits for node attributes. -namespace node_attr_bits { + return {m_buffer}; + } -/// The sequence node bit. -constexpr node_attr_t seq_bit = 1u << 0; -/// The mapping node bit. -constexpr node_attr_t map_bit = 1u << 1; -/// The null scalar node bit. -constexpr node_attr_t null_bit = 1u << 2; -/// The boolean scalar node bit. -constexpr node_attr_t bool_bit = 1u << 3; -/// The integer scalar node bit. -constexpr node_attr_t int_bit = 1u << 4; -/// The floating point scalar node bit. -constexpr node_attr_t float_bit = 1u << 5; -/// The string scalar node bit. -constexpr node_attr_t string_bit = 1u << 6; + /// @brief Parses double quoted scalar contents. + /// @param token Scalar contents. + /// @return View into the parsed scalar contents. + str_view parse_double_quoted_scalar(str_view token) { + if (token.empty()) { + return token; + } -/// A utility bit set to filter scalar node bits. -constexpr node_attr_t scalar_bits = null_bit | bool_bit | int_bit | float_bit | string_bit; + constexpr str_view filter {"\\\n"}; + std::size_t pos = token.find_first_of(filter); + if (pos == str_view::npos) { + return token; + } -/// The anchor node bit. -constexpr node_attr_t anchor_bit = 0x01000000u; -/// The alias node bit. -constexpr node_attr_t alias_bit = 0x02000000u; + m_use_owned_buffer = true; -/// A utility bit set for initialization. -constexpr node_attr_t default_bits = null_bit; + if (m_buffer.capacity() < token.size()) { + m_buffer.reserve(token.size()); + } -/// @brief Converts a node_type value to a node_attr_t value. -/// @param t A type of node value. -/// @return The associated node value bit. -inline node_attr_t from_node_type(node_type t) noexcept { - switch (t) { - case node_type::SEQUENCE: - return seq_bit; - case node_type::MAPPING: - return map_bit; - case node_type::NULL_OBJECT: - return null_bit; - case node_type::BOOLEAN: - return bool_bit; - case node_type::INTEGER: - return int_bit; - case node_type::FLOAT: - return float_bit; - case node_type::STRING: - return string_bit; - default: // LCOV_EXCL_LINE - return node_attr_mask::all; // LCOV_EXCL_LINE - } -} + do { + FK_YAML_ASSERT(pos < token.size()); + FK_YAML_ASSERT(token[pos] == '\\' || token[pos] == '\n'); -/// @brief Converts a node_attr_t value to a node_type value. -/// @param bits node attribute bits -/// @return An associated node value type with the given node value bit. -inline node_type to_node_type(node_attr_t bits) noexcept { - switch (bits & node_attr_mask::value) { - case seq_bit: - return node_type::SEQUENCE; - case map_bit: - return node_type::MAPPING; - case null_bit: - return node_type::NULL_OBJECT; - case bool_bit: - return node_type::BOOLEAN; - case int_bit: - return node_type::INTEGER; - case float_bit: - return node_type::FLOAT; - case string_bit: - return node_type::STRING; - default: // LCOV_EXCL_LINE - detail::unreachable(); // LCOV_EXCL_LINE - } -} + if (token[pos] == '\\') { + FK_YAML_ASSERT(pos + 1 < token.size()); + m_buffer.append(token.begin(), token.begin() + pos); -/// @brief Get an anchor offset used to reference an anchor node from the given attribute bits. -/// @param attrs node attribute bits -/// @return An anchor offset value. -inline uint32_t get_anchor_offset(node_attr_t attrs) noexcept { - return (attrs & node_attr_mask::anchor_offset) >> 26; -} + if (token[pos + 1] != '\n') { + token.remove_prefix(pos); + const char* p_escape_begin = token.begin(); + const bool is_valid_escaping = yaml_escaper::unescape(p_escape_begin, token.end(), m_buffer); + if FK_YAML_UNLIKELY (!is_valid_escaping) { + throw parse_error( + "Unsupported escape sequence is found in a double quoted scalar.", m_line, m_indent); + } -/// @brief Set an anchor offset value to the appropriate bits. -/// @param offset An anchor offset value. -/// @param attrs node attribute bit set into which the offset value is written. -inline void set_anchor_offset(uint32_t offset, node_attr_t& attrs) noexcept { - attrs &= ~node_attr_mask::anchor_offset; - attrs |= (offset & 0x3Fu) << 26; -} + // `p_escape_begin` points to the last element of the escape sequence. + token.remove_prefix((p_escape_begin - token.begin()) + 1); + } + else { + std::size_t non_space_pos = token.find_first_not_of(" \t", pos + 2); + if (non_space_pos == str_view::npos) { + non_space_pos = token.size(); + } + token.remove_prefix(non_space_pos); + } + } + else { + process_line_folding(token, pos); + } -} // namespace node_attr_bits + pos = token.find_first_of(filter); + } while (pos != str_view::npos); -FK_YAML_DETAIL_NAMESPACE_END + if (!token.empty()) { + m_buffer.append(token.begin(), token.size()); + } -#endif /* FK_YAML_DETAIL_NODE_ATTRS_HPP */ + return {m_buffer}; + } -// #include -// _______ __ __ __ _____ __ __ __ -// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 -// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML -// -// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani -// SPDX-License-Identifier: MIT + /// @brief Parses block literal scalar contents. + /// @param token Scalar contents. + /// @param header Block scalar header information. + /// @return View into the parsed scalar contents. + str_view parse_block_literal_scalar(str_view token, const block_scalar_header& header) { + if FK_YAML_UNLIKELY (token.empty()) { + return token; + } -#ifndef FK_YAML_DETAIL_NODE_PROPERTY_HPP -#define FK_YAML_DETAIL_NODE_PROPERTY_HPP + m_use_owned_buffer = true; + m_buffer.reserve(token.size()); -#include + std::size_t cur_line_begin_pos = 0; + do { + bool has_newline_at_end = true; + std::size_t cur_line_end_pos = token.find('\n', cur_line_begin_pos); + if (cur_line_end_pos == str_view::npos) { + has_newline_at_end = false; + cur_line_end_pos = token.size(); + } -// #include + const std::size_t line_size = cur_line_end_pos - cur_line_begin_pos; + const str_view line = token.substr(cur_line_begin_pos, line_size); + if (line.size() > header.indent) { + m_buffer.append(line.begin() + header.indent, line.end()); + } -FK_YAML_DETAIL_NAMESPACE_BEGIN + if (!has_newline_at_end) { + break; + } -struct node_property { - /// The tag name property. - std::string tag {}; // NOLINT(readability-redundant-member-init) necessary for older compilers - /// The anchor name property. - std::string anchor {}; // NOLINT(readability-redundant-member-init) necessary for older compilers -}; + m_buffer.push_back('\n'); + cur_line_begin_pos = cur_line_end_pos + 1; + } while (cur_line_begin_pos < token.size()); -FK_YAML_DETAIL_NAMESPACE_END + process_chomping(header.chomp); -#endif /* FK_YAML_DETAIL_NODE_PROPERTY_HPP */ + return {m_buffer}; + } -// #include + /// @brief Parses block folded scalar contents. + /// @param token Scalar contents. + /// @param header Block scalar header information. + /// @return View into the parsed scalar contents. + str_view parse_block_folded_scalar(str_view token, const block_scalar_header& header) { + if FK_YAML_UNLIKELY (token.empty()) { + return token; + } -// #include + m_use_owned_buffer = true; + m_buffer.reserve(token.size()); + constexpr str_view white_space_filter {" \t"}; -FK_YAML_DETAIL_NAMESPACE_BEGIN + std::size_t cur_line_begin_pos = 0; + bool has_newline_at_end = true; + bool can_be_folded = false; + do { + std::size_t cur_line_end_pos = token.find('\n', cur_line_begin_pos); + if (cur_line_end_pos == str_view::npos) { + has_newline_at_end = false; + cur_line_end_pos = token.size(); + } -/// @brief A class which provides the feature of deserializing YAML documents. -/// @tparam BasicNodeType A type of the container for deserialized YAML values. -template -class basic_deserializer { - static_assert(is_basic_node::value, "basic_deserializer only accepts basic_node<...>"); + const std::size_t line_size = cur_line_end_pos - cur_line_begin_pos; + const str_view line = token.substr(cur_line_begin_pos, line_size); + const bool is_empty = line.find_first_not_of(white_space_filter) == str_view::npos; - /** A type for the target basic_node. */ - using basic_node_type = BasicNodeType; - /** A type for the lexical analyzer. */ - using lexer_type = lexical_analyzer; - /** A type for the document metainfo. */ - using doc_metainfo_type = document_metainfo; - /** A type for the tag resolver. */ - using tag_resolver_type = tag_resolver; - /** A type for the scalar parser. */ - using scalar_parser_type = scalar_parser; - /** A type for sequence node value containers. */ - using sequence_type = typename basic_node_type::sequence_type; - /** A type for mapping node value containers. */ - using mapping_type = typename basic_node_type::mapping_type; + if (line.size() <= header.indent) { + // A less-indented line is turned into a newline. + m_buffer.push_back('\n'); + can_be_folded = false; + } + else if (is_empty) { + // more-indented empty lines are not folded. + m_buffer.push_back('\n'); + m_buffer.append(line.begin() + header.indent, line.end()); + m_buffer.push_back('\n'); + } + else { + const std::size_t non_space_pos = line.find_first_not_of(white_space_filter); + const bool is_more_indented = (non_space_pos != str_view::npos) && (non_space_pos > header.indent); - /// @brief Definition of state types of parse contexts. - enum class context_state_t : std::uint8_t { - BLOCK_MAPPING, //!< The underlying node is a block mapping. - BLOCK_MAPPING_EXPLICIT_KEY, //!< The underlying node is an explicit block mapping key. - BLOCK_MAPPING_EXPLICIT_VALUE, //!< The underlying node is an explicit block mapping value. - MAPPING_VALUE, //!< The underlying node is a block mapping value. - BLOCK_SEQUENCE, //!< The underlying node is a block sequence. - BLOCK_SEQUENCE_ENTRY, //!< The underlying node is a block sequence entry. - FLOW_SEQUENCE, //!< The underlying node is a flow sequence. - FLOW_SEQUENCE_KEY, //!< The underlying node is a flow sequence as a key. - FLOW_MAPPING, //!< The underlying node is a flow mapping. - FLOW_MAPPING_KEY, //!< The underlying node is a flow mapping as a key. - }; + if (can_be_folded) { + if (is_more_indented) { + // The content line right before more-indented lines is not folded. + m_buffer.push_back('\n'); + } + else { + m_buffer.push_back(' '); + } - /// @brief Context information set for parsing. - struct parse_context { - /// @brief Construct a new parse_context object. - parse_context() = default; + can_be_folded = false; + } - /// @brief Construct a new parse_context object with non-default values for each parameter. - /// @param line The current line. (count from zero) - /// @param indent The indentation width in the current line. (count from zero) - /// @param state The parse context type. - /// @param p_node The underlying node associated to this context. - parse_context(uint32_t line, uint32_t indent, context_state_t state, basic_node_type* p_node) noexcept - : line(line), - indent(indent), - state(state), - p_node(p_node) { - } + m_buffer.append(line.begin() + header.indent, line.end()); - parse_context(const parse_context&) noexcept = default; - parse_context& operator=(const parse_context&) noexcept = default; - parse_context(parse_context&&) noexcept = default; - parse_context& operator=(parse_context&&) noexcept = default; + if (is_more_indented && has_newline_at_end) { + // more-indented lines are not folded. + m_buffer.push_back('\n'); + } + else { + can_be_folded = true; + } + } - ~parse_context() { - switch (state) { - case context_state_t::BLOCK_MAPPING_EXPLICIT_KEY: - case context_state_t::FLOW_SEQUENCE_KEY: - case context_state_t::FLOW_MAPPING_KEY: - delete p_node; - p_node = nullptr; - break; - default: + if (!has_newline_at_end) { break; } - } - - /// The current line. (count from zero) - uint32_t line {0}; - /// The indentation width in the current line. (count from zero) - uint32_t indent {0}; - /// The parse context type. - context_state_t state {context_state_t::BLOCK_MAPPING}; - /// The pointer to the associated node to this context. - basic_node_type* p_node {nullptr}; - }; - /// @brief Definitions of state types for expected flow token hints. - enum class flow_token_state_t : std::uint8_t { - NEEDS_VALUE_OR_SUFFIX, //!< Either value or flow suffix (`]` or `}`) - NEEDS_SEPARATOR_OR_SUFFIX, //!< Either separator (`,`) or flow suffix (`]` or `}`) - }; + cur_line_begin_pos = cur_line_end_pos + 1; + } while (cur_line_begin_pos < token.size()); -public: - /// @brief Construct a new basic_deserializer object. - basic_deserializer() = default; + if (has_newline_at_end && can_be_folded) { + // The final content line break are not folded. + m_buffer.push_back('\n'); + } -public: - /// @brief Deserialize a single YAML document into a YAML node. - /// @note - /// If the input consists of multiple YAML documents, this function only parses the first. - /// If the input may have multiple YAML documents all of which must be parsed into nodes, - /// prefer the `deserialize_docs()` function. - /// @tparam InputAdapterType The type of an input adapter object. - /// @param input_adapter An input adapter object for the input source buffer. - /// @return basic_node_type A root YAML node deserialized from the source string. - template ::value, int> = 0> - basic_node_type deserialize(InputAdapterType&& input_adapter) { // NOLINT(cppcoreguidelines-missing-std-forward) - const str_view input_view = input_adapter.get_buffer_view(); - lexer_type lexer(input_view); + process_chomping(header.chomp); - lexical_token_t type {lexical_token_t::END_OF_BUFFER}; - return deserialize_document(lexer, type); + return {m_buffer}; } - /// @brief Deserialize multiple YAML documents into YAML nodes. - /// @tparam InputAdapterType The type of an adapter object. - /// @param input_adapter An input adapter object for the input source buffer. - /// @return std::vector Root YAML nodes for deserialized YAML documents. - template ::value, int> = 0> - // NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) - std::vector deserialize_docs(InputAdapterType&& input_adapter) { - const str_view input_view = input_adapter.get_buffer_view(); - lexer_type lexer(input_view); + /// @brief Discards final content line break and trailing empty lines depending on the given chomping type. + /// @param chomp Chomping method type. + void process_chomping(chomping_indicator_t chomp) { + switch (chomp) { + case chomping_indicator_t::STRIP: { + const std::size_t content_end_pos = m_buffer.find_last_not_of('\n'); + if (content_end_pos == std::string::npos) { + // if the scalar has no content line, all lines are considered as trailing empty lines. + m_buffer.clear(); + break; + } - std::vector nodes {}; - lexical_token_t type {lexical_token_t::END_OF_BUFFER}; + if (content_end_pos == m_buffer.size() - 1) { + // no last content line break nor trailing empty lines. + break; + } - do { - nodes.emplace_back(deserialize_document(lexer, type)); - } while (type != lexical_token_t::END_OF_BUFFER); + // remove the last content line break and all trailing empty lines. + m_buffer.erase(content_end_pos + 1); - return nodes; - } // LCOV_EXCL_LINE + break; + } + case chomping_indicator_t::CLIP: { + const std::size_t content_end_pos = m_buffer.find_last_not_of('\n'); + if (content_end_pos == std::string::npos) { + // if the scalar has no content line, all lines are considered as trailing empty lines. + m_buffer.clear(); + break; + } -private: - /// @brief Deserialize a YAML document into a YAML node. - /// @param lexer The lexical analyzer to be used. - /// @param last_type The variable to store the last lexical token type. - /// @return basic_node_type A root YAML node deserialized from the YAML document. - basic_node_type deserialize_document(lexer_type& lexer, lexical_token_t& last_type) { - lexical_token token {}; + if (content_end_pos == m_buffer.size() - 1) { + // no trailing empty lines + break; + } - basic_node_type root; - mp_current_node = &root; - mp_meta = root.mp_meta; + // remove all trailing empty lines. + m_buffer.erase(content_end_pos + 2); - // parse directives first. - deserialize_directives(lexer, token); + break; + } + case chomping_indicator_t::KEEP: + break; + } + } - // parse node properties for root node if any - uint32_t line = lexer.get_lines_processed(); - uint32_t indent = lexer.get_last_token_begin_pos(); - const bool found_props = deserialize_node_properties(lexer, token, line, indent); + /// @brief Applies line folding to flow scalar contents. + /// @param token Flow scalar contents. + /// @param newline_pos Position of the target newline code. + void process_line_folding(str_view& token, std::size_t newline_pos) noexcept { + // discard trailing white spaces which precedes the line break in the current line. + const std::size_t last_non_space_pos = token.substr(0, newline_pos + 1).find_last_not_of(" \t"); + if (last_non_space_pos == str_view::npos) { + m_buffer.append(token.begin(), newline_pos); + } + else { + m_buffer.append(token.begin(), last_non_space_pos + 1); + } + token.remove_prefix(newline_pos + 1); // move next to the LF - switch (token.type) { - case lexical_token_t::SEQUENCE_BLOCK_PREFIX: { - root = basic_node_type::sequence({basic_node_type()}); - apply_directive_set(root); - if (found_props) { - // If node properties are found before the block sequence entry prefix, the properties belong to the - // root sequence node. - apply_node_properties(root); + uint32_t empty_line_counts = 0; + do { + const std::size_t non_space_pos = token.find_first_not_of(" \t"); + if (non_space_pos == str_view::npos) { + // Line folding ignores trailing spaces. + token.remove_prefix(token.size()); + break; + } + if (token[non_space_pos] != '\n') { + token.remove_prefix(non_space_pos); + break; } - parse_context context( - lexer.get_lines_processed(), lexer.get_last_token_begin_pos(), context_state_t::BLOCK_SEQUENCE, &root); - m_context_stack.emplace_back(context); - - mp_current_node = &(root.as_seq().back()); - apply_directive_set(*mp_current_node); - context.state = context_state_t::BLOCK_SEQUENCE_ENTRY; - context.p_node = mp_current_node; - m_context_stack.emplace_back(std::move(context)); + token.remove_prefix(non_space_pos + 1); + ++empty_line_counts; + } while (true); - token = lexer.get_next_token(); - line = lexer.get_lines_processed(); - indent = lexer.get_last_token_begin_pos(); - break; + if (empty_line_counts > 0) { + m_buffer.append(empty_line_counts, '\n'); } - case lexical_token_t::SEQUENCE_FLOW_BEGIN: - ++m_flow_context_depth; - lexer.set_context_state(true); - root = basic_node_type::sequence(); - apply_directive_set(root); - apply_node_properties(root); - m_context_stack.emplace_back( - lexer.get_lines_processed(), lexer.get_last_token_begin_pos(), context_state_t::FLOW_SEQUENCE, &root); - token = lexer.get_next_token(); - line = lexer.get_lines_processed(); - indent = lexer.get_last_token_begin_pos(); + else { + m_buffer.push_back(' '); + } + } + + /// @brief Decides scalar value type based on the lexical/tag types and scalar contents. + /// @param lex_type Lexical token type for the scalar. + /// @param tag_type Tag type for the scalar. + /// @param token Scalar contents. + /// @return Scalar value type. + node_type decide_value_type(lexical_token_t lex_type, tag_t tag_type, str_view token) const noexcept { + node_type value_type {node_type::STRING}; + if (lex_type == lexical_token_t::PLAIN_SCALAR) { + value_type = scalar_scanner::scan(token.begin(), token.end()); + } + + switch (tag_type) { + case tag_t::NULL_VALUE: + value_type = node_type::NULL_OBJECT; break; - case lexical_token_t::MAPPING_FLOW_BEGIN: - ++m_flow_context_depth; - lexer.set_context_state(true); - root = basic_node_type::mapping(); - apply_directive_set(root); - apply_node_properties(root); - m_context_stack.emplace_back( - lexer.get_lines_processed(), lexer.get_last_token_begin_pos(), context_state_t::FLOW_MAPPING, &root); - token = lexer.get_next_token(); - line = lexer.get_lines_processed(); - indent = lexer.get_last_token_begin_pos(); + case tag_t::BOOLEAN: + value_type = node_type::BOOLEAN; break; - case lexical_token_t::EXPLICIT_KEY_PREFIX: { - // If the explicit key prefix (? ) is detected here, the root node of current document must be a mapping. - // Also, tag and anchor if any are associated to the root mapping node. - // No get_next_token() call here to handle the token event in the deserialize_node() function. - root = basic_node_type::mapping(); - apply_directive_set(root); - apply_node_properties(root); - parse_context context( - lexer.get_lines_processed(), lexer.get_last_token_begin_pos(), context_state_t::BLOCK_MAPPING, &root); - m_context_stack.emplace_back(std::move(context)); - line = lexer.get_lines_processed(); - indent = lexer.get_last_token_begin_pos(); + case tag_t::INTEGER: + value_type = node_type::INTEGER; break; - } - case lexical_token_t::BLOCK_LITERAL_SCALAR: - case lexical_token_t::BLOCK_FOLDED_SCALAR: - // If a block scalar token is detected here, current document contains single scalar. - // Do nothing here since the token is handled in the deserialize_node() function. + case tag_t::FLOATING_NUMBER: + value_type = node_type::FLOAT; break; - case lexical_token_t::PLAIN_SCALAR: - case lexical_token_t::SINGLE_QUOTED_SCALAR: - case lexical_token_t::DOUBLE_QUOTED_SCALAR: - case lexical_token_t::ALIAS_PREFIX: - // Defer handling the above token events until the next call on the deserialize_scalar() function since the - // meaning depends on subsequent events. - if (found_props && line < lexer.get_lines_processed()) { - // If node properties and a followed node are on the different line, the properties belong to the root - // node. - if (m_needs_anchor_impl) { - m_root_anchor_name = m_anchor_name; - m_needs_anchor_impl = false; - m_anchor_name = {}; - } - - if (m_needs_tag_impl) { - m_root_tag_name = m_tag_name; - m_needs_tag_impl = false; - m_tag_name = {}; - } - - line = lexer.get_lines_processed(); - indent = lexer.get_last_token_begin_pos(); - } + case tag_t::STRING: + case tag_t::NON_SPECIFIC: + // scalars with the non-specific tag is resolved to a string tag. + // See the "Non-Specific Tags" section in https://yaml.org/spec/1.2.2/#691-node-tags. + value_type = node_type::STRING; break; + case tag_t::NONE: + case tag_t::CUSTOM_TAG: default: - // Do nothing since current document has no contents. break; } - // parse YAML nodes recursively - deserialize_node(lexer, token, line, indent, last_type); - FK_YAML_ASSERT( - last_type == lexical_token_t::END_OF_BUFFER || last_type == lexical_token_t::END_OF_DIRECTIVES || - last_type == lexical_token_t::END_OF_DOCUMENT); - - // reset parameters for the next call. - mp_current_node = nullptr; - mp_meta.reset(); - m_needs_tag_impl = false; - m_needs_anchor_impl = false; - m_flow_context_depth = 0; - m_flow_token_state = flow_token_state_t::NEEDS_VALUE_OR_SUFFIX; - m_context_stack.clear(); - - return root; + return value_type; } - /// @brief Deserializes the YAML directives if specified. - /// @param lexer The lexical analyzer to be used. - /// @param last_token Storage for last lexical token type. - void deserialize_directives(lexer_type& lexer, lexical_token& last_token) { - bool lacks_end_of_directives_marker = false; - lexer.set_document_state(true); - - for (;;) { - const lexical_token token = lexer.get_next_token(); + /// @brief Creates YAML scalar object based on the value type and contents. + /// @param type Scalar value type. + /// @param token Scalar contents. + /// @return A YAML scalar object. + basic_node_type create_scalar_node(node_type val_type, tag_t tag_type, str_view token) { + switch (val_type) { + case node_type::NULL_OBJECT: { + std::nullptr_t null = nullptr; + const bool converted = detail::aton(token.begin(), token.end(), null); + if FK_YAML_UNLIKELY (!converted) { + throw parse_error("Failed to convert a scalar to a null.", m_line, m_indent); + } + // The default basic_node object is a null scalar node. + return basic_node_type {}; + } + case node_type::BOOLEAN: { + auto boolean = static_cast(false); + const bool converted = detail::atob(token.begin(), token.end(), boolean); + if FK_YAML_UNLIKELY (!converted) { + throw parse_error("Failed to convert a scalar to a boolean.", m_line, m_indent); + } + return basic_node_type(boolean); + } + case node_type::INTEGER: { + integer_type integer = 0; + const bool converted = detail::atoi(token.begin(), token.end(), integer); + if FK_YAML_LIKELY (converted) { + return basic_node_type(integer); + } - switch (token.type) { - case lexical_token_t::YAML_VER_DIRECTIVE: - if FK_YAML_UNLIKELY (mp_meta->is_version_specified) { - throw parse_error( - "YAML version cannot be specified more than once.", - lexer.get_lines_processed(), - lexer.get_last_token_begin_pos()); + // For untagged plain integer scalars, attempt a uint64_t parse to handle large + // positive values that exceed int64_t max (e.g. xxHash/UUID results like + // 15745692345339290292). This only applies when integer_type is a signed 64-bit + // type; any other width would not be able to represent the value anyway. + if (tag_type != tag_t::INTEGER && std::is_signed::value && + sizeof(integer_type) == sizeof(uint64_t)) { + uint64_t u64 = 0; + if (detail::atoi(token.begin(), token.end(), u64)) { + basic_node_type node; + // Store the bit pattern in the signed field and set uint_bit so that + // as_uint() / get_value() can recover the correct value. + detail::external_node_constructor::unsigned_integer_scalar( + node, static_cast(u64)); + return node; } + } - mp_meta->version = convert_yaml_version(lexer.get_yaml_version()); - mp_meta->is_version_specified = true; - lacks_end_of_directives_marker = true; - break; - case lexical_token_t::TAG_DIRECTIVE: { - const str_view tag_handle_view = lexer.get_tag_handle(); - switch (tag_handle_view.size()) { - case 1 /* ! */: { - const bool is_already_specified = !mp_meta->primary_handle_prefix.empty(); - if FK_YAML_UNLIKELY (is_already_specified) { - throw parse_error( - "Primary handle cannot be specified more than once.", - lexer.get_lines_processed(), - lexer.get_last_token_begin_pos()); - } - const str_view tag_prefix = lexer.get_tag_prefix(); - mp_meta->primary_handle_prefix.assign(tag_prefix.begin(), tag_prefix.end()); - lacks_end_of_directives_marker = true; - break; - } - case 2 /* !! */: { - const bool is_already_specified = !mp_meta->secondary_handle_prefix.empty(); - if FK_YAML_UNLIKELY (is_already_specified) { - throw parse_error( - "Secondary handle cannot be specified more than once.", - lexer.get_lines_processed(), - lexer.get_last_token_begin_pos()); - } - const str_view tag_prefix = lexer.get_tag_prefix(); - mp_meta->secondary_handle_prefix.assign(tag_prefix.begin(), tag_prefix.end()); - lacks_end_of_directives_marker = true; - break; - } - default /* !! */: { - std::string tag_handle(tag_handle_view.begin(), tag_handle_view.end()); - const str_view tag_prefix_view = lexer.get_tag_prefix(); - std::string tag_prefix(tag_prefix_view.begin(), tag_prefix_view.end()); - const bool is_already_specified = - !(mp_meta->named_handle_map.emplace(std::move(tag_handle), std::move(tag_prefix)).second); - if FK_YAML_UNLIKELY (is_already_specified) { - throw parse_error( - "The same named handle cannot be specified more than once.", - lexer.get_lines_processed(), - lexer.get_last_token_begin_pos()); - } - lacks_end_of_directives_marker = true; - break; - } - } - break; + if FK_YAML_UNLIKELY (tag_type == tag_t::INTEGER) { + throw parse_error("Failed to convert a scalar to an integer.", m_line, m_indent); } - case lexical_token_t::INVALID_DIRECTIVE: - // TODO: should output a warning log. Currently just ignore this case. - break; - case lexical_token_t::END_OF_DIRECTIVES: - lacks_end_of_directives_marker = false; - break; - default: - if FK_YAML_UNLIKELY (lacks_end_of_directives_marker) { - throw parse_error( - "The end of directives marker (---) is missing after directives.", - lexer.get_lines_processed(), - lexer.get_last_token_begin_pos()); - } - // end the parsing of directives if the other tokens are found. - last_token = token; - lexer.set_document_state(false); - return; + + // conversion error from a scalar which is not tagged with !!int is recovered by treating it as a string + // scalar. See https://github.com/fktn-k/fkYAML/issues/428. + return basic_node_type(string_type(token.begin(), token.end())); + } + case node_type::FLOAT: { + float_number_type float_val = 0; + const bool converted = detail::atof(token.begin(), token.end(), float_val); + if FK_YAML_LIKELY (converted) { + return basic_node_type(float_val); + } + if FK_YAML_UNLIKELY (tag_type == tag_t::FLOATING_NUMBER) { + throw parse_error("Failed to convert a scalar to a floating point value", m_line, m_indent); + } + + // conversion error from a scalar which is not tagged with !!float is recovered by treating it as a string + // scalar. See https://github.com/fktn-k/fkYAML/issues/428. + return basic_node_type(string_type(token.begin(), token.end())); + } + case node_type::STRING: + if (!m_use_owned_buffer) { + return basic_node_type(string_type(token.begin(), token.end())); } + m_use_owned_buffer = false; + return basic_node_type(std::move(m_buffer)); + default: // LCOV_EXCL_LINE + detail::unreachable(); // LCOV_EXCL_LINE } } - /// @brief Deserializes the YAML nodes recursively. - /// @param lexer The lexical analyzer to be used. - /// @param first_type The first lexical token. - /// @param last_type Storage for last lexical token type. - void deserialize_node( - lexer_type& lexer, const lexical_token& first_token, uint32_t first_line, uint32_t first_indent, - lexical_token_t& last_type) { - lexical_token token = first_token; - uint32_t line = first_line; - uint32_t indent = first_indent; + /// Current line + uint32_t m_line {0}; + /// Current indentation for the scalar + uint32_t m_indent {0}; + /// Whether the parsed contents are stored in an owned buffer. + bool m_use_owned_buffer {false}; + /// Owned buffer storage for parsing. This buffer is used when scalar contents need mutation. + std::string m_buffer; +}; + +FK_YAML_DETAIL_NAMESPACE_END + +#endif /* FK_YAML_DETAIL_INPUT_SCALAR_PARSER_HPP */ + +// #include +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library +// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani +// SPDX-License-Identifier: MIT + +#ifndef FK_YAML_DETAIL_INPUT_TAG_RESOLVER_HPP +#define FK_YAML_DETAIL_INPUT_TAG_RESOLVER_HPP + +#include +#include + +// #include + +// #include + +// #include + +// #include + +// #include + +// #include + +// #include + + +FK_YAML_DETAIL_NAMESPACE_BEGIN + +static constexpr str_view default_primary_handle_prefix {"!"}; +static constexpr str_view default_secondary_handle_prefix {"tag:yaml.org,2002:"}; + +template +class tag_resolver { + static_assert(is_basic_node::value, "tag_resolver only accepts basic_node<...>."); + using doc_metainfo_type = document_metainfo; + +public: + /// @brief Resolve the input tag name into an expanded tag name prepended with a registered prefix. + /// @param tag The input tag name. + /// @return The type of a node deduced from the given tag name. + static tag_t resolve_tag(const str_view tag, const std::shared_ptr& directives) { + const std::string normalized = normalize_tag_name(tag, directives); + return convert_to_tag_type(normalized); + } + +private: + static std::string normalize_tag_name(const str_view tag, const std::shared_ptr& directives) { + if FK_YAML_UNLIKELY (tag.empty()) { + throw invalid_tag("tag must not be empty.", ""); + } + if FK_YAML_UNLIKELY (tag[0] != '!') { + throw invalid_tag("tag must start with \'!\'", std::string(tag.begin(), tag.end()).c_str()); + } - do { - switch (token.type) { - case lexical_token_t::EXPLICIT_KEY_PREFIX: { - const bool needs_to_move_back = indent == 0 || indent < m_context_stack.back().indent; - if (needs_to_move_back) { - pop_to_parent_node(line, indent, [indent](const parse_context& c) { - return c.state == context_state_t::BLOCK_MAPPING && indent == c.indent; - }); - } + if (tag.size() == 1) { + // Non-specific tag ("!") will be interpreted as one of the following: + // * tag:yaml.org,2002:seq + // * tag:yaml.org,2002:map + // * tag:yaml.org,2002:str + // See the "Non-Specific Tags" section in https://yaml.org/spec/1.2.2/#691-node-tags. + // The interpretation cannot take place here because the input lacks the corresponding value. + return {tag.begin(), tag.end()}; + } - switch (m_context_stack.back().state) { - case context_state_t::MAPPING_VALUE: - case context_state_t::BLOCK_MAPPING_EXPLICIT_KEY: - case context_state_t::BLOCK_MAPPING_EXPLICIT_VALUE: - case context_state_t::BLOCK_SEQUENCE_ENTRY: - // This path is needed in case the input contains nested explicit keys. - // ```yaml - // foo: - // ? ? foo - // : bar - // : ? baz - // : - ? qux - // : 123 - // ``` - *mp_current_node = basic_node_type::mapping(); - apply_directive_set(*mp_current_node); - m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); - break; - default: - break; + std::string normalized {"!<"}; + switch (tag[1]) { + case '!': { + // handle a secondary tag handle (!!suffix -> !<[secondary][suffix]>) + const bool is_null_or_empty = !directives || directives->secondary_handle_prefix.empty(); + if (is_null_or_empty) { + normalized.append(default_secondary_handle_prefix.begin(), default_secondary_handle_prefix.end()); + } + else { + normalized += directives->secondary_handle_prefix; + } + + const str_view body = tag.substr(2); + normalized.append(body.begin(), body.end()); + break; + } + case '<': + if (tag[2] == '!') { + const bool is_null_or_empty = !directives || directives->primary_handle_prefix.empty(); + if (is_null_or_empty) { + normalized.append(default_primary_handle_prefix.begin(), default_primary_handle_prefix.end()); + } + else { + normalized += directives->primary_handle_prefix; } - token = lexer.get_next_token(); - if (token.type == lexical_token_t::SEQUENCE_BLOCK_PREFIX) { - // heap-allocated node will be freed in handling the corresponding KEY_SEPARATOR event - auto* p_node = new basic_node_type(node_type::SEQUENCE); - m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING_EXPLICIT_KEY, p_node); + const str_view body = tag.substr(3); + return normalized.append(body.begin(), body.end()); + } - apply_directive_set(*p_node); - parse_context context( - lexer.get_lines_processed(), - lexer.get_last_token_begin_pos(), - context_state_t::BLOCK_SEQUENCE, - p_node); - m_context_stack.emplace_back(context); + // verbatim tags must be delivered as-is to the application. + // See https://yaml.org/spec/1.2.2/#691-node-tags for more details. + return {tag.begin(), tag.end()}; + default: { + const std::size_t tag_end_pos = tag.find_first_of('!', 1); - p_node->as_seq().emplace_back(basic_node_type()); - mp_current_node = &(p_node->as_seq().back()); - apply_directive_set(*mp_current_node); - context.state = context_state_t::BLOCK_SEQUENCE_ENTRY; - context.p_node = mp_current_node; - m_context_stack.emplace_back(std::move(context)); + // handle a named handle (!tag!suffix -> !<[tag][suffix]>) + if (tag_end_pos != std::string::npos) { + // there must be a non-empty suffix. (already checked by the lexer.) + FK_YAML_ASSERT(tag_end_pos < tag.size() - 1); - break; + const bool is_null_or_empty = !directives || directives->named_handle_map.empty(); + if FK_YAML_UNLIKELY (is_null_or_empty) { + throw invalid_tag( + "named handle has not been registered.", std::string(tag.begin(), tag.end()).c_str()); } - // heap-allocated node will be freed in handling the corresponding KEY_SEPARATOR event - m_context_stack.emplace_back( - line, indent, context_state_t::BLOCK_MAPPING_EXPLICIT_KEY, new basic_node_type()); - mp_current_node = m_context_stack.back().p_node; - apply_directive_set(*mp_current_node); - indent = lexer.get_last_token_begin_pos(); - line = lexer.get_lines_processed(); + // find the extracted named handle in the map. + const str_view named_handle = tag.substr(0, tag_end_pos + 1); + auto named_handle_itr = directives->named_handle_map.find({named_handle.begin(), named_handle.end()}); + auto end_itr = directives->named_handle_map.end(); + if FK_YAML_UNLIKELY (named_handle_itr == end_itr) { + throw invalid_tag( + "named handle has not been registered.", std::string(tag.begin(), tag.end()).c_str()); + } - continue; + // The YAML spec prohibits expanding the percent-encoded characters (%xx -> a UTF-8 byte). + // So no conversion takes place. + // See https://yaml.org/spec/1.2.2/#56-miscellaneous-characters for more details. + + normalized += named_handle_itr->second; + const str_view body = tag.substr(tag_end_pos + 1); + normalized.append(body.begin(), body.end()); + break; } - case lexical_token_t::KEY_SEPARATOR: { - FK_YAML_ASSERT(!m_context_stack.empty()); - if FK_YAML_UNLIKELY (m_context_stack.back().state == context_state_t::BLOCK_SEQUENCE_ENTRY) { - // empty mapping keys are not supported. - // ```yaml - // - : foo - // ``` - throw parse_error("sequence key should not be empty.", line, indent); - } - if (m_flow_context_depth > 0) { - break; - } + // handle a primary tag handle (!suffix -> !<[primary][suffix]>) + const bool is_null_or_empty = !directives || directives->primary_handle_prefix.empty(); + if (is_null_or_empty) { + normalized.append(default_primary_handle_prefix.begin(), default_primary_handle_prefix.end()); + } + else { + normalized += directives->primary_handle_prefix; + } - // hold the line count of the key separator for later use. - const uint32_t old_indent = indent; - const uint32_t old_line = line; + const str_view body = tag.substr(1); + normalized.append(body.begin(), body.end()); + break; + } + } - token = lexer.get_next_token(); - line = lexer.get_lines_processed(); - indent = lexer.get_last_token_begin_pos(); + normalized += ">"; + return normalized; + } - const bool found_props = deserialize_node_properties(lexer, token, line, indent); - if (found_props && line == lexer.get_lines_processed()) { - // defer applying node properties for the subsequent node on the same line. - continue; - } + static tag_t convert_to_tag_type(const std::string& normalized) { + if (normalized == "!") { + return tag_t::NON_SPECIFIC; + } - line = lexer.get_lines_processed(); - indent = lexer.get_last_token_begin_pos(); + if (normalized.size() < 24 /* size of !") { + return tag_t::SEQUENCE; + } + if (normalized == "!") { + return tag_t::MAPPING; + } + if (normalized == "!") { + return tag_t::NULL_VALUE; + } + if (normalized == "!") { + return tag_t::BOOLEAN; + } + if (normalized == "!") { + return tag_t::INTEGER; + } + if (normalized == "!") { + return tag_t::FLOATING_NUMBER; + } + if (normalized == "!") { + return tag_t::STRING; + } + + return tag_t::CUSTOM_TAG; + } +}; - const bool is_implicit_same_line = - (line == old_line) && (m_context_stack.empty() || old_indent > m_context_stack.back().indent); - if (is_implicit_same_line) { - // a key separator for an implicit key with its value on the same line. - continue; - } +FK_YAML_DETAIL_NAMESPACE_END - if (line > old_line) { - if (m_needs_tag_impl) { - const tag_t tag_type = tag_resolver_type::resolve_tag(m_tag_name, mp_meta); - if (tag_type == tag_t::MAPPING || tag_type == tag_t::CUSTOM_TAG) { - // set YAML node properties here to distinguish them from those for the first key node - // as shown in the following snippet: - // - // ```yaml - // foo: !!map - // !!str 123: true - // ^ - // this !!str tag overwrites the preceding !!map tag. - // ``` - *mp_current_node = basic_node_type::mapping(); - apply_directive_set(*mp_current_node); - apply_node_properties(*mp_current_node); - m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); - continue; - } - } +#endif /* FK_YAML_DETAIL_INPUT_TAG_RESOLVER_HPP */ - if (token.type == lexical_token_t::SEQUENCE_BLOCK_PREFIX) { - // a key separator preceding block sequence entries - *mp_current_node = basic_node_type::sequence({basic_node_type()}); - apply_directive_set(*mp_current_node); - apply_node_properties(*mp_current_node); - auto& cur_context = m_context_stack.back(); - cur_context.line = line; - cur_context.indent = indent; - cur_context.state = context_state_t::BLOCK_SEQUENCE; +// #include +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library +// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani +// SPDX-License-Identifier: MIT - mp_current_node = &(mp_current_node->as_seq().back()); - apply_directive_set(*mp_current_node); - parse_context entry_context = cur_context; - entry_context.state = context_state_t::BLOCK_SEQUENCE_ENTRY; - entry_context.p_node = mp_current_node; - m_context_stack.emplace_back(std::move(entry_context)); +#ifndef FK_YAML_DETAIL_META_INPUT_ADAPTER_TRAITS_HPP +#define FK_YAML_DETAIL_META_INPUT_ADAPTER_TRAITS_HPP - token = lexer.get_next_token(); - line = lexer.get_lines_processed(); - indent = lexer.get_last_token_begin_pos(); +#include - const bool has_props = deserialize_node_properties(lexer, token, line, indent); - if (has_props) { - const uint32_t line_after_props = lexer.get_lines_processed(); - if (line == line_after_props) { - // Skip updating the current indent to avoid stacking a wrong indentation. - // - // ```yaml - // &foo bar: baz - // ^ - // the correct indent width for the "bar" node key. - // ``` - continue; - } +// #include - // if node properties and the followed node are on different lines (i.e., the properties are - // for a container node), the application and the line advancement must happen here. - // Otherwise, a false indent error will be emitted. See - // https://github.com/fktn-k/fkYAML/issues/368 for more details. - line = line_after_props; - indent = lexer.get_last_token_begin_pos(); - *mp_current_node = basic_node_type::mapping(); - m_context_stack.emplace_back( - line_after_props, indent, context_state_t::BLOCK_MAPPING, mp_current_node); - apply_directive_set(*mp_current_node); - apply_node_properties(*mp_current_node); - } +// #include - continue; - } +// #include - if (indent <= m_context_stack.back().indent) { - FK_YAML_ASSERT(m_context_stack.back().state == context_state_t::MAPPING_VALUE); - // Mapping values can be omitted and are considered to be null. - // ```yaml - // foo: - // bar: - // baz: - // qux: - // # -> {foo: null, bar: {baz: null}, qux: null} - // ``` - pop_to_parent_node(line, indent, [indent](const parse_context& c) { - return (c.state == context_state_t::BLOCK_MAPPING) && (indent == c.indent); - }); - } +FK_YAML_DETAIL_NAMESPACE_BEGIN - // defer checking the existence of a key separator after the following scalar until the next - // deserialize_scalar() call. - continue; - } +/////////////////////////////////////////// +// Input Adapter API detection traits +/////////////////////////////////////////// - // handle explicit mapping key separators. - FK_YAML_ASSERT(m_context_stack.back().state == context_state_t::BLOCK_MAPPING_EXPLICIT_KEY); +/// @brief A type which represents get_buffer_view function. +/// @tparam T A target type. +template +using get_buffer_view_fn_t = decltype(std::declval().get_buffer_view()); - basic_node_type key_node = std::move(*m_context_stack.back().p_node); - m_context_stack.pop_back(); - m_context_stack.back().p_node->as_map().emplace(key_node, basic_node_type()); - mp_current_node = &(m_context_stack.back().p_node->operator[](std::move(key_node))); - m_context_stack.emplace_back( - old_line, old_indent, context_state_t::BLOCK_MAPPING_EXPLICIT_VALUE, mp_current_node); +/// @brief Type traits to check if InputAdapterType has get_buffer_view member function. +/// @tparam InputAdapterType An input adapter type to check if it has get_buffer_view function. +/// @tparam typename N/A +template +using has_get_buffer_view = is_detected; - if (token.type == lexical_token_t::SEQUENCE_BLOCK_PREFIX) { - *mp_current_node = basic_node_type::sequence({basic_node_type()}); - apply_directive_set(*mp_current_node); - apply_node_properties(*mp_current_node); - m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_SEQUENCE, mp_current_node); +//////////////////////////////// +// is_input_adapter traits +//////////////////////////////// - mp_current_node = &(mp_current_node->as_seq().back()); - parse_context entry_context = m_context_stack.back(); - entry_context.state = context_state_t::BLOCK_SEQUENCE_ENTRY; - entry_context.p_node = mp_current_node; - m_context_stack.emplace_back(std::move(entry_context)); - break; - } +/// @brief Type traits to check if T is an input adapter type. +/// @tparam T A target type. +/// @tparam typename N/A +template +struct is_input_adapter : std::false_type {}; - continue; - } - case lexical_token_t::ANCHOR_PREFIX: - case lexical_token_t::TAG_PREFIX: - deserialize_node_properties(lexer, token, line, indent); - // Skip updating the current indent to avoid stacking a wrong indentation. - // Note that node properties for block sequences as a mapping value are processed when a - // `lexical_token_t::KEY_SEPARATOR` token is processed. - // - // ```yaml - // &foo bar: baz - // ^ - // the correct indent width for the "bar" node key. - // ``` - continue; - case lexical_token_t::SEQUENCE_BLOCK_PREFIX: { - FK_YAML_ASSERT(!m_context_stack.empty()); - const uint32_t parent_indent = m_context_stack.back().indent; - if (indent == parent_indent) { - // If the previous block sequence entry is empty, just move to the parent context. - // ```yaml - // foo: - // - - // - bar - // # ^ (here) - // # -> {foo: [null, bar]} - // ``` - pop_to_parent_node(line, indent, [](const parse_context& c) { - return c.state == context_state_t::BLOCK_SEQUENCE; - }); - } - else if (indent < parent_indent) { - pop_to_parent_node(line, indent, [indent](const parse_context& c) { - return c.state == context_state_t::BLOCK_SEQUENCE && indent == c.indent; - }); - } - else /*parent_indent < indent*/ { - if FK_YAML_UNLIKELY (m_context_stack.back().state == context_state_t::BLOCK_SEQUENCE) { - // bad indentation like the following YAML: - // ```yaml - // - "foo" - // - bar - // # ^ - // ``` - throw parse_error("bad indentation of a mapping entry.", line, indent); - } +/// @brief A partial specialization of is_input_adapter if T is an input adapter type. +/// @tparam InputAdapterType +template +struct is_input_adapter::value>> : std::true_type { +}; - *mp_current_node = basic_node_type::sequence(); - m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_SEQUENCE, mp_current_node); - apply_directive_set(*mp_current_node); - apply_node_properties(*mp_current_node); - } +FK_YAML_DETAIL_NAMESPACE_END - auto& seq = mp_current_node->as_seq(); - seq.emplace_back(basic_node_type()); - mp_current_node = &(seq.back()); - apply_directive_set(*mp_current_node); - m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_SEQUENCE_ENTRY, mp_current_node); - break; - } - case lexical_token_t::SEQUENCE_FLOW_BEGIN: - if (m_flow_context_depth == 0) { - lexer.set_context_state(true); +#endif /* FK_YAML_DETAIL_META_INPUT_ADAPTER_TRAITS_HPP */ - if (indent <= m_context_stack.back().indent) { - pop_to_parent_node(line, indent, [indent](const parse_context& c) { - switch (c.state) { - case context_state_t::BLOCK_MAPPING: - case context_state_t::MAPPING_VALUE: - return indent == c.indent; - default: - return false; - } - }); - } - } - else if FK_YAML_UNLIKELY (m_flow_token_state == flow_token_state_t::NEEDS_SEPARATOR_OR_SUFFIX) { - throw parse_error("Flow sequence beginning is found without separated with a comma.", line, indent); - } +// #include - ++m_flow_context_depth; +// #include - switch (m_context_stack.back().state) { - case context_state_t::BLOCK_SEQUENCE: - case context_state_t::FLOW_SEQUENCE: - mp_current_node->as_seq().emplace_back(basic_node_type::sequence()); - mp_current_node = &(mp_current_node->as_seq().back()); - m_context_stack.emplace_back(line, indent, context_state_t::FLOW_SEQUENCE, mp_current_node); - break; - case context_state_t::BLOCK_MAPPING: - case context_state_t::FLOW_MAPPING: - // heap-allocated node will be freed in handling the corresponding SEQUENCE_FLOW_END event. - m_context_stack.emplace_back( - line, indent, context_state_t::FLOW_SEQUENCE_KEY, new basic_node_type(node_type::SEQUENCE)); - mp_current_node = m_context_stack.back().p_node; - break; - default: { - *mp_current_node = basic_node_type::sequence(); - parse_context& last_context = m_context_stack.back(); - last_context.line = line; - last_context.indent = indent; - last_context.state = context_state_t::FLOW_SEQUENCE; - break; - } - } +// #include - apply_directive_set(*mp_current_node); - apply_node_properties(*mp_current_node); +// #include +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library +// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani +// SPDX-License-Identifier: MIT - m_flow_token_state = flow_token_state_t::NEEDS_VALUE_OR_SUFFIX; - break; - case lexical_token_t::SEQUENCE_FLOW_END: { - if FK_YAML_UNLIKELY (m_flow_context_depth == 0) { - throw parse_error("Flow sequence ending is found outside the flow context.", line, indent); - } +#ifndef FK_YAML_DETAIL_NODE_PROPERTY_HPP +#define FK_YAML_DETAIL_NODE_PROPERTY_HPP - if (--m_flow_context_depth == 0) { - lexer.set_context_state(false); - } +#include - // find the corresponding flow sequence beginning. - auto itr = std::find_if( // LCOV_EXCL_LINE - m_context_stack.rbegin(), - m_context_stack.rend(), - [](const parse_context& c) { - switch (c.state) { - case context_state_t::FLOW_SEQUENCE_KEY: - case context_state_t::FLOW_SEQUENCE: - return true; - default: - return false; - } - }); +// #include - const bool is_valid = itr != m_context_stack.rend(); - if FK_YAML_UNLIKELY (!is_valid) { - throw parse_error("No corresponding flow sequence beginning is found.", line, indent); - } - // keep the last state for later processing. - parse_context& last_context = m_context_stack.back(); - mp_current_node = last_context.p_node; - last_context.p_node = nullptr; - indent = last_context.indent; - const context_state_t state = last_context.state; - m_context_stack.pop_back(); +FK_YAML_DETAIL_NAMESPACE_BEGIN - // handle cases where the flow sequence is a mapping key node. +struct node_property { + /// The tag name property. + std::string tag {}; // NOLINT(readability-redundant-member-init) necessary for older compilers + /// The anchor name property. + std::string anchor {}; // NOLINT(readability-redundant-member-init) necessary for older compilers +}; - if (!m_context_stack.empty() && state == context_state_t::FLOW_SEQUENCE_KEY) { - basic_node_type key_node = std::move(*mp_current_node); - delete mp_current_node; - mp_current_node = m_context_stack.back().p_node; - m_flow_token_state = flow_token_state_t::NEEDS_VALUE_OR_SUFFIX; +FK_YAML_DETAIL_NAMESPACE_END - add_new_key(std::move(key_node), line, indent); - break; - } +#endif /* FK_YAML_DETAIL_NODE_PROPERTY_HPP */ - token = lexer.get_next_token(); - if (token.type == lexical_token_t::KEY_SEPARATOR) { - basic_node_type key_node = basic_node_type::mapping(); - apply_directive_set(key_node); - mp_current_node->swap(key_node); +// #include - m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); - m_flow_token_state = flow_token_state_t::NEEDS_VALUE_OR_SUFFIX; +// #include - add_new_key(std::move(key_node), line, indent); - } - else { - if (!m_context_stack.empty()) { - mp_current_node = m_context_stack.back().p_node; - } - if (m_flow_context_depth > 0) { - m_flow_token_state = flow_token_state_t::NEEDS_SEPARATOR_OR_SUFFIX; - } - } - indent = lexer.get_last_token_begin_pos(); - line = lexer.get_lines_processed(); - continue; - } - case lexical_token_t::MAPPING_FLOW_BEGIN: - if (m_flow_context_depth == 0) { - lexer.set_context_state(true); +FK_YAML_DETAIL_NAMESPACE_BEGIN - if (indent <= m_context_stack.back().indent) { - pop_to_parent_node(line, indent, [indent](const parse_context& c) { - switch (c.state) { - case context_state_t::BLOCK_MAPPING: - case context_state_t::MAPPING_VALUE: - return indent == c.indent; - default: - return false; - } - }); - } - } - else if FK_YAML_UNLIKELY (m_flow_token_state == flow_token_state_t::NEEDS_SEPARATOR_OR_SUFFIX) { - throw parse_error("Flow mapping beginning is found without separated with a comma.", line, indent); - } +/// @brief A class which provides the feature of deserializing YAML documents. +/// @tparam BasicNodeType A type of the container for deserialized YAML values. +template +class basic_deserializer { + static_assert(is_basic_node::value, "basic_deserializer only accepts basic_node<...>"); - ++m_flow_context_depth; + /** A type for the target basic_node. */ + using basic_node_type = BasicNodeType; + /** A type for the lexical analyzer. */ + using lexer_type = lexical_analyzer; + /** A type for the document metainfo. */ + using doc_metainfo_type = document_metainfo; + /** A type for the tag resolver. */ + using tag_resolver_type = tag_resolver; + /** A type for the scalar parser. */ + using scalar_parser_type = scalar_parser; + /** A type for sequence node value containers. */ + using sequence_type = typename basic_node_type::sequence_type; + /** A type for mapping node value containers. */ + using mapping_type = typename basic_node_type::mapping_type; - switch (m_context_stack.back().state) { - case context_state_t::BLOCK_SEQUENCE: - case context_state_t::FLOW_SEQUENCE: - mp_current_node->as_seq().emplace_back(basic_node_type::mapping()); - mp_current_node = &(mp_current_node->as_seq().back()); - m_context_stack.emplace_back(line, indent, context_state_t::FLOW_MAPPING, mp_current_node); - break; - case context_state_t::BLOCK_MAPPING: - case context_state_t::FLOW_MAPPING: - // heap-allocated node will be freed in handling the corresponding MAPPING_FLOW_END event. - m_context_stack.emplace_back( - line, indent, context_state_t::FLOW_MAPPING_KEY, new basic_node_type(node_type::MAPPING)); - mp_current_node = m_context_stack.back().p_node; - break; - default: { - *mp_current_node = basic_node_type::mapping(); - parse_context& last_context = m_context_stack.back(); - last_context.line = line; - last_context.indent = indent; - last_context.state = context_state_t::FLOW_MAPPING; - break; - } - } + /// @brief Definition of state types of parse contexts. + enum class context_state_t : std::uint8_t { + BLOCK_MAPPING, //!< The underlying node is a block mapping. + BLOCK_MAPPING_EXPLICIT_KEY, //!< The underlying node is an explicit block mapping key. + BLOCK_MAPPING_EXPLICIT_VALUE, //!< The underlying node is an explicit block mapping value. + MAPPING_VALUE, //!< The underlying node is a block mapping value. + BLOCK_SEQUENCE, //!< The underlying node is a block sequence. + BLOCK_SEQUENCE_ENTRY, //!< The underlying node is a block sequence entry. + FLOW_SEQUENCE, //!< The underlying node is a flow sequence. + FLOW_SEQUENCE_KEY, //!< The underlying node is a flow sequence as a key. + FLOW_MAPPING, //!< The underlying node is a flow mapping. + FLOW_MAPPING_KEY, //!< The underlying node is a flow mapping as a key. + }; - apply_directive_set(*mp_current_node); - apply_node_properties(*mp_current_node); + /// @brief Context information set for parsing. + struct parse_context { + /// @brief Construct a new parse_context object. + parse_context() = default; - line = lexer.get_lines_processed(); - indent = lexer.get_last_token_begin_pos(); + /// @brief Construct a new parse_context object with non-default values for each parameter. + /// @param line The current line. (count from zero) + /// @param indent The indentation width in the current line. (count from zero) + /// @param state The parse context type. + /// @param p_node The underlying node associated to this context. + parse_context(uint32_t line, uint32_t indent, context_state_t state, basic_node_type* p_node) noexcept + : line(line), + indent(indent), + state(state), + p_node(p_node) { + } - m_flow_token_state = flow_token_state_t::NEEDS_VALUE_OR_SUFFIX; + parse_context(const parse_context&) noexcept = default; + parse_context& operator=(const parse_context&) noexcept = default; + parse_context(parse_context&&) noexcept = default; + parse_context& operator=(parse_context&&) noexcept = default; + + ~parse_context() { + switch (state) { + case context_state_t::BLOCK_MAPPING_EXPLICIT_KEY: + case context_state_t::FLOW_SEQUENCE_KEY: + case context_state_t::FLOW_MAPPING_KEY: + delete p_node; + p_node = nullptr; break; - case lexical_token_t::MAPPING_FLOW_END: { - if FK_YAML_UNLIKELY (m_flow_context_depth == 0) { - throw parse_error("Flow mapping ending is found outside the flow context.", line, indent); - } + default: + break; + } + } - if (--m_flow_context_depth == 0) { - lexer.set_context_state(false); - } + /// The current line. (count from zero) + uint32_t line {0}; + /// The indentation width in the current line. (count from zero) + uint32_t indent {0}; + /// The parse context type. + context_state_t state {context_state_t::BLOCK_MAPPING}; + /// The pointer to the associated node to this context. + basic_node_type* p_node {nullptr}; + }; + + /// @brief Definitions of state types for expected flow token hints. + enum class flow_token_state_t : std::uint8_t { + NEEDS_VALUE_OR_SUFFIX, //!< Either value or flow suffix (`]` or `}`) + NEEDS_SEPARATOR_OR_SUFFIX, //!< Either separator (`,`) or flow suffix (`]` or `}`) + }; + +public: + /// @brief Construct a new basic_deserializer object. + basic_deserializer() = default; + +public: + /// @brief Deserialize a single YAML document into a YAML node. + /// @note + /// If the input consists of multiple YAML documents, this function only parses the first. + /// If the input may have multiple YAML documents all of which must be parsed into nodes, + /// prefer the `deserialize_docs()` function. + /// @tparam InputAdapterType The type of an input adapter object. + /// @param input_adapter An input adapter object for the input source buffer. + /// @return basic_node_type A root YAML node deserialized from the source string. + template ::value, int> = 0> + basic_node_type deserialize(InputAdapterType&& input_adapter) { // NOLINT(cppcoreguidelines-missing-std-forward) + const str_view input_view = input_adapter.get_buffer_view(); + lexer_type lexer(input_view); + + lexical_token_t type {lexical_token_t::END_OF_BUFFER}; + return deserialize_document(lexer, type); + } + + /// @brief Deserialize multiple YAML documents into YAML nodes. + /// @tparam InputAdapterType The type of an adapter object. + /// @param input_adapter An input adapter object for the input source buffer. + /// @return std::vector Root YAML nodes for deserialized YAML documents. + template ::value, int> = 0> + // NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) + std::vector deserialize_docs(InputAdapterType&& input_adapter) { + const str_view input_view = input_adapter.get_buffer_view(); + lexer_type lexer(input_view); + + std::vector nodes {}; + lexical_token_t type {lexical_token_t::END_OF_BUFFER}; - // find the corresponding flow mapping beginning. - auto itr = std::find_if( // LCOV_EXCL_LINE - m_context_stack.rbegin(), - m_context_stack.rend(), - [](const parse_context& c) { - switch (c.state) { - case context_state_t::FLOW_MAPPING_KEY: - case context_state_t::FLOW_MAPPING: - return true; - default: - return false; - } - }); + do { + nodes.emplace_back(deserialize_document(lexer, type)); + } while (type != lexical_token_t::END_OF_BUFFER); - const bool is_valid = itr != m_context_stack.rend(); - if FK_YAML_UNLIKELY (!is_valid) { - throw parse_error("No corresponding flow mapping beginning is found.", line, indent); - } + return nodes; + } // LCOV_EXCL_LINE - // keep the last state for later processing. - parse_context& last_context = m_context_stack.back(); - mp_current_node = last_context.p_node; - last_context.p_node = nullptr; - indent = last_context.indent; - const context_state_t state = last_context.state; - m_context_stack.pop_back(); +private: + /// @brief Deserialize a YAML document into a YAML node. + /// @param lexer The lexical analyzer to be used. + /// @param last_type The variable to store the last lexical token type. + /// @return basic_node_type A root YAML node deserialized from the YAML document. + basic_node_type deserialize_document(lexer_type& lexer, lexical_token_t& last_type) { + lexical_token token {}; - // handle cases where the flow mapping is a mapping key node. + basic_node_type root; + mp_current_node = &root; + mp_meta = root.mp_meta; - if (!m_context_stack.empty() && state == context_state_t::FLOW_MAPPING_KEY) { - basic_node_type key_node = std::move(*mp_current_node); - delete mp_current_node; - mp_current_node = m_context_stack.back().p_node; - m_flow_token_state = flow_token_state_t::NEEDS_VALUE_OR_SUFFIX; + // parse directives first. + deserialize_directives(lexer, token); - add_new_key(std::move(key_node), line, indent); - break; - } + // parse node properties for root node if any + uint32_t line = lexer.get_lines_processed(); + uint32_t indent = lexer.get_last_token_begin_pos(); + const bool found_props = deserialize_node_properties(lexer, token, line, indent); - token = lexer.get_next_token(); - if (token.type == lexical_token_t::KEY_SEPARATOR) { - basic_node_type key_node = basic_node_type::mapping(); - apply_directive_set(key_node); - mp_current_node->swap(key_node); + switch (token.type) { + case lexical_token_t::SEQUENCE_BLOCK_PREFIX: { + root = basic_node_type::sequence({basic_node_type()}); + apply_directive_set(root); + if (found_props) { + // If node properties are found before the block sequence entry prefix, the properties belong to the + // root sequence node. + apply_node_properties(root); + } - m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); - m_flow_token_state = flow_token_state_t::NEEDS_VALUE_OR_SUFFIX; + parse_context context( + lexer.get_lines_processed(), lexer.get_last_token_begin_pos(), context_state_t::BLOCK_SEQUENCE, &root); + m_context_stack.emplace_back(context); - add_new_key(std::move(key_node), line, indent); + mp_current_node = &(root.as_seq().back()); + apply_directive_set(*mp_current_node); + context.state = context_state_t::BLOCK_SEQUENCE_ENTRY; + context.p_node = mp_current_node; + m_context_stack.emplace_back(std::move(context)); + + token = lexer.get_next_token(); + line = lexer.get_lines_processed(); + indent = lexer.get_last_token_begin_pos(); + break; + } + case lexical_token_t::SEQUENCE_FLOW_BEGIN: + ++m_flow_context_depth; + lexer.set_context_state(true); + root = basic_node_type::sequence(); + apply_directive_set(root); + apply_node_properties(root); + m_context_stack.emplace_back( + lexer.get_lines_processed(), lexer.get_last_token_begin_pos(), context_state_t::FLOW_SEQUENCE, &root); + token = lexer.get_next_token(); + line = lexer.get_lines_processed(); + indent = lexer.get_last_token_begin_pos(); + break; + case lexical_token_t::MAPPING_FLOW_BEGIN: + ++m_flow_context_depth; + lexer.set_context_state(true); + root = basic_node_type::mapping(); + apply_directive_set(root); + apply_node_properties(root); + m_context_stack.emplace_back( + lexer.get_lines_processed(), lexer.get_last_token_begin_pos(), context_state_t::FLOW_MAPPING, &root); + token = lexer.get_next_token(); + line = lexer.get_lines_processed(); + indent = lexer.get_last_token_begin_pos(); + break; + case lexical_token_t::EXPLICIT_KEY_PREFIX: { + // If the explicit key prefix (? ) is detected here, the root node of current document must be a mapping. + // Also, tag and anchor if any are associated to the root mapping node. + // No get_next_token() call here to handle the token event in the deserialize_node() function. + root = basic_node_type::mapping(); + apply_directive_set(root); + apply_node_properties(root); + parse_context context( + lexer.get_lines_processed(), lexer.get_last_token_begin_pos(), context_state_t::BLOCK_MAPPING, &root); + m_context_stack.emplace_back(std::move(context)); + line = lexer.get_lines_processed(); + indent = lexer.get_last_token_begin_pos(); + break; + } + case lexical_token_t::BLOCK_LITERAL_SCALAR: + case lexical_token_t::BLOCK_FOLDED_SCALAR: + // If a block scalar token is detected here, current document contains single scalar. + // Do nothing here since the token is handled in the deserialize_node() function. + break; + case lexical_token_t::PLAIN_SCALAR: + case lexical_token_t::SINGLE_QUOTED_SCALAR: + case lexical_token_t::DOUBLE_QUOTED_SCALAR: + case lexical_token_t::ALIAS_PREFIX: + // Defer handling the above token events until the next call on the deserialize_scalar() function since the + // meaning depends on subsequent events. + if (found_props && line < lexer.get_lines_processed()) { + // If node properties and a followed node are on the different line, the properties belong to the root + // node. + if (m_needs_anchor_impl) { + m_root_anchor_name = m_anchor_name; + m_needs_anchor_impl = false; + m_anchor_name = {}; } - else { - if (!m_context_stack.empty()) { - mp_current_node = m_context_stack.back().p_node; - } - if (m_flow_context_depth > 0) { - m_flow_token_state = flow_token_state_t::NEEDS_SEPARATOR_OR_SUFFIX; - } + + if (m_needs_tag_impl) { + m_root_tag_name = m_tag_name; + m_needs_tag_impl = false; + m_tag_name = {}; } - indent = lexer.get_last_token_begin_pos(); line = lexer.get_lines_processed(); - continue; + indent = lexer.get_last_token_begin_pos(); } - case lexical_token_t::VALUE_SEPARATOR: - FK_YAML_ASSERT(m_flow_context_depth > 0); - if FK_YAML_UNLIKELY (m_flow_token_state != flow_token_state_t::NEEDS_SEPARATOR_OR_SUFFIX) { - throw parse_error("invalid value separator is found.", line, indent); - } - m_flow_token_state = flow_token_state_t::NEEDS_VALUE_OR_SUFFIX; - break; - case lexical_token_t::ALIAS_PREFIX: { - // An alias node must not specify any properties (tag, anchor). - // https://yaml.org/spec/1.2.2/#71-alias-nodes - if FK_YAML_UNLIKELY (m_needs_tag_impl) { - throw parse_error("Tag cannot be specified to an alias node", line, indent); - } - if FK_YAML_UNLIKELY (m_needs_anchor_impl) { - throw parse_error("Anchor cannot be specified to an alias node.", line, indent); - } + break; + default: + // Do nothing since current document has no contents. + break; + } - std::string token_str = std::string(token.str.begin(), token.str.end()); + // parse YAML nodes recursively + deserialize_node(lexer, token, line, indent, last_type); + FK_YAML_ASSERT( + last_type == lexical_token_t::END_OF_BUFFER || last_type == lexical_token_t::END_OF_DIRECTIVES || + last_type == lexical_token_t::END_OF_DOCUMENT); - const auto anchor_counts = static_cast(mp_meta->anchor_table.count(token_str)); - if FK_YAML_UNLIKELY (anchor_counts == 0) { - throw parse_error("The given anchor name must appear prior to the alias node.", line, indent); - } + // reset parameters for the next call. + mp_current_node = nullptr; + mp_meta.reset(); + m_needs_tag_impl = false; + m_needs_anchor_impl = false; + m_flow_context_depth = 0; + m_flow_token_state = flow_token_state_t::NEEDS_VALUE_OR_SUFFIX; + m_context_stack.clear(); - basic_node_type node {}; - node.m_attrs |= detail::node_attr_bits::alias_bit; - node.m_prop.anchor = std::move(token_str); - detail::node_attr_bits::set_anchor_offset(anchor_counts - 1, node.m_attrs); + return root; + } - apply_directive_set(node); - apply_node_properties(node); + /// @brief Deserializes the YAML directives if specified. + /// @param lexer The lexical analyzer to be used. + /// @param last_token Storage for last lexical token type. + void deserialize_directives(lexer_type& lexer, lexical_token& last_token) { + bool lacks_end_of_directives_marker = false; + lexer.set_document_state(true); - deserialize_scalar(lexer, std::move(node), indent, line, token); - continue; - } - case lexical_token_t::PLAIN_SCALAR: - case lexical_token_t::SINGLE_QUOTED_SCALAR: - case lexical_token_t::DOUBLE_QUOTED_SCALAR: { - tag_t tag_type {tag_t::NONE}; - if (m_needs_tag_impl) { - tag_type = tag_resolver_type::resolve_tag(m_tag_name, mp_meta); + for (;;) { + const lexical_token token = lexer.get_next_token(); + + switch (token.type) { + case lexical_token_t::YAML_VER_DIRECTIVE: + if FK_YAML_UNLIKELY (mp_meta->is_version_specified) { + throw parse_error( + "YAML version cannot be specified more than once.", + lexer.get_lines_processed(), + lexer.get_last_token_begin_pos()); + } + + mp_meta->version = convert_yaml_version(lexer.get_yaml_version()); + mp_meta->is_version_specified = true; + lacks_end_of_directives_marker = true; + break; + case lexical_token_t::TAG_DIRECTIVE: { + const str_view tag_handle_view = lexer.get_tag_handle(); + switch (tag_handle_view.size()) { + case 1 /* ! */: { + const bool is_already_specified = !mp_meta->primary_handle_prefix.empty(); + if FK_YAML_UNLIKELY (is_already_specified) { + throw parse_error( + "Primary handle cannot be specified more than once.", + lexer.get_lines_processed(), + lexer.get_last_token_begin_pos()); + } + const str_view tag_prefix = lexer.get_tag_prefix(); + mp_meta->primary_handle_prefix.assign(tag_prefix.begin(), tag_prefix.end()); + lacks_end_of_directives_marker = true; + break; + } + case 2 /* !! */: { + const bool is_already_specified = !mp_meta->secondary_handle_prefix.empty(); + if FK_YAML_UNLIKELY (is_already_specified) { + throw parse_error( + "Secondary handle cannot be specified more than once.", + lexer.get_lines_processed(), + lexer.get_last_token_begin_pos()); + } + const str_view tag_prefix = lexer.get_tag_prefix(); + mp_meta->secondary_handle_prefix.assign(tag_prefix.begin(), tag_prefix.end()); + lacks_end_of_directives_marker = true; + break; } - - basic_node_type node = scalar_parser_type(line, indent).parse_flow(token.type, tag_type, token.str); - apply_directive_set(node); - apply_node_properties(node); - - deserialize_scalar(lexer, std::move(node), indent, line, token); - continue; - } - case lexical_token_t::BLOCK_LITERAL_SCALAR: - case lexical_token_t::BLOCK_FOLDED_SCALAR: { - tag_t tag_type {tag_t::NONE}; - if (m_needs_tag_impl) { - tag_type = tag_resolver_type::resolve_tag(m_tag_name, mp_meta); + default /* !! */: { + std::string tag_handle(tag_handle_view.begin(), tag_handle_view.end()); + const str_view tag_prefix_view = lexer.get_tag_prefix(); + std::string tag_prefix(tag_prefix_view.begin(), tag_prefix_view.end()); + const bool is_already_specified = + !(mp_meta->named_handle_map.emplace(std::move(tag_handle), std::move(tag_prefix)).second); + if FK_YAML_UNLIKELY (is_already_specified) { + throw parse_error( + "The same named handle cannot be specified more than once.", + lexer.get_lines_processed(), + lexer.get_last_token_begin_pos()); + } + lacks_end_of_directives_marker = true; + break; } - - basic_node_type node = - scalar_parser_type(line, indent) - .parse_block(token.type, tag_type, token.str, lexer.get_block_scalar_header()); - apply_directive_set(node); - apply_node_properties(node); - - deserialize_scalar(lexer, std::move(node), indent, line, token); - continue; + } + break; } - // these tokens end parsing the current YAML document. - case lexical_token_t::END_OF_BUFFER: - // This handles an empty input. - last_type = token.type; - return; + case lexical_token_t::INVALID_DIRECTIVE: + // TODO: should output a warning log. Currently just ignore this case. + break; case lexical_token_t::END_OF_DIRECTIVES: - case lexical_token_t::END_OF_DOCUMENT: - if FK_YAML_UNLIKELY (m_flow_context_depth > 0) { - throw parse_error("An invalid document marker found in a flow collection", line, indent); + lacks_end_of_directives_marker = false; + break; + default: + if FK_YAML_UNLIKELY (lacks_end_of_directives_marker) { + throw parse_error( + "The end of directives marker (---) is missing after directives.", + lexer.get_lines_processed(), + lexer.get_last_token_begin_pos()); } - last_type = token.type; + // end the parsing of directives if the other tokens are found. + last_token = token; + lexer.set_document_state(false); return; - // no way to come here while lexically analyzing document contents. - case lexical_token_t::YAML_VER_DIRECTIVE: // LCOV_EXCL_LINE - case lexical_token_t::TAG_DIRECTIVE: // LCOV_EXCL_LINE - case lexical_token_t::INVALID_DIRECTIVE: // LCOV_EXCL_LINE - detail::unreachable(); // LCOV_EXCL_LINE } - - token = lexer.get_next_token(); - indent = lexer.get_last_token_begin_pos(); - line = lexer.get_lines_processed(); - } while (token.type != lexical_token_t::END_OF_BUFFER); - - last_type = token.type; + } } - /// @brief Deserializes YAML node properties (anchor and/or tag names) if they exist + /// @brief Deserializes the YAML nodes recursively. /// @param lexer The lexical analyzer to be used. - /// @param last_type The variable to store the last lexical token type. - /// @param line The variable to store the line of either the first property or the last non-property token. - /// @param indent The variable to store the indent of either the first property or the last non-property token. - /// @return true if any property is found, false otherwise. - bool deserialize_node_properties(lexer_type& lexer, lexical_token& last_token, uint32_t& line, uint32_t& indent) { - m_needs_anchor_impl = m_needs_tag_impl = false; + /// @param first_type The first lexical token. + /// @param last_type Storage for last lexical token type. + void deserialize_node( + lexer_type& lexer, const lexical_token& first_token, uint32_t first_line, uint32_t first_indent, + lexical_token_t& last_type) { + lexical_token token = first_token; + uint32_t line = first_line; + uint32_t indent = first_indent; - lexical_token token = last_token; - bool ends_loop {false}; do { - if (line < lexer.get_lines_processed()) { - break; - } - switch (token.type) { - case lexical_token_t::ANCHOR_PREFIX: - if FK_YAML_UNLIKELY (m_needs_anchor_impl) { - throw parse_error( - "anchor name cannot be specified more than once to the same node.", - lexer.get_lines_processed(), - lexer.get_last_token_begin_pos()); + case lexical_token_t::EXPLICIT_KEY_PREFIX: { + const bool needs_to_move_back = indent == 0 || indent < m_context_stack.back().indent; + if (needs_to_move_back) { + pop_to_parent_node(line, indent, [indent](const parse_context& c) { + return c.state == context_state_t::BLOCK_MAPPING && indent == c.indent; + }); } - m_anchor_name = token.str; - m_needs_anchor_impl = true; - - if (!m_needs_tag_impl) { - line = lexer.get_lines_processed(); - indent = lexer.get_last_token_begin_pos(); + switch (m_context_stack.back().state) { + case context_state_t::MAPPING_VALUE: + case context_state_t::BLOCK_MAPPING_EXPLICIT_KEY: + case context_state_t::BLOCK_MAPPING_EXPLICIT_VALUE: + case context_state_t::BLOCK_SEQUENCE_ENTRY: + // This path is needed in case the input contains nested explicit keys. + // ```yaml + // foo: + // ? ? foo + // : bar + // : ? baz + // : - ? qux + // : 123 + // ``` + *mp_current_node = basic_node_type::mapping(); + apply_directive_set(*mp_current_node); + m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); + break; + default: + break; } token = lexer.get_next_token(); - break; - case lexical_token_t::TAG_PREFIX: { - if FK_YAML_UNLIKELY (m_needs_tag_impl) { - throw parse_error( - "tag name cannot be specified more than once to the same node.", + if (token.type == lexical_token_t::SEQUENCE_BLOCK_PREFIX) { + // heap-allocated node will be freed in handling the corresponding KEY_SEPARATOR event + auto* p_node = new basic_node_type(node_type::SEQUENCE); + m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING_EXPLICIT_KEY, p_node); + + apply_directive_set(*p_node); + parse_context context( lexer.get_lines_processed(), - lexer.get_last_token_begin_pos()); + lexer.get_last_token_begin_pos(), + context_state_t::BLOCK_SEQUENCE, + p_node); + m_context_stack.emplace_back(context); + + p_node->as_seq().emplace_back(basic_node_type()); + mp_current_node = &(p_node->as_seq().back()); + apply_directive_set(*mp_current_node); + context.state = context_state_t::BLOCK_SEQUENCE_ENTRY; + context.p_node = mp_current_node; + m_context_stack.emplace_back(std::move(context)); + + break; } - m_tag_name = token.str; - m_needs_tag_impl = true; + // heap-allocated node will be freed in handling the corresponding KEY_SEPARATOR event + m_context_stack.emplace_back( + line, indent, context_state_t::BLOCK_MAPPING_EXPLICIT_KEY, new basic_node_type()); + mp_current_node = m_context_stack.back().p_node; + apply_directive_set(*mp_current_node); + indent = lexer.get_last_token_begin_pos(); + line = lexer.get_lines_processed(); - if (!m_needs_anchor_impl) { - line = lexer.get_lines_processed(); - indent = lexer.get_last_token_begin_pos(); + continue; + } + case lexical_token_t::KEY_SEPARATOR: { + FK_YAML_ASSERT(!m_context_stack.empty()); + if FK_YAML_UNLIKELY (m_context_stack.back().state == context_state_t::BLOCK_SEQUENCE_ENTRY) { + // empty mapping keys are not supported. + // ```yaml + // - : foo + // ``` + throw parse_error("sequence key should not be empty.", line, indent); + } + + if (m_flow_context_depth > 0) { + break; } + // hold the line count of the key separator for later use. + const uint32_t old_indent = indent; + const uint32_t old_line = line; + token = lexer.get_next_token(); - break; - } - default: - ends_loop = true; - break; - } - } while (!ends_loop); + line = lexer.get_lines_processed(); + indent = lexer.get_last_token_begin_pos(); - last_token = token; - const bool prop_specified = m_needs_anchor_impl || m_needs_tag_impl; - if (!prop_specified) { - line = lexer.get_lines_processed(); - indent = lexer.get_last_token_begin_pos(); - } + const bool found_props = deserialize_node_properties(lexer, token, line, indent); + if (found_props && line == lexer.get_lines_processed()) { + // defer applying node properties for the subsequent node on the same line. + continue; + } + + line = lexer.get_lines_processed(); + indent = lexer.get_last_token_begin_pos(); + + const bool is_implicit_same_line = + (line == old_line) && (m_context_stack.empty() || old_indent > m_context_stack.back().indent); + if (is_implicit_same_line) { + // a key separator for an implicit key with its value on the same line. + continue; + } + + if (line > old_line) { + if (m_needs_tag_impl) { + const tag_t tag_type = tag_resolver_type::resolve_tag(m_tag_name, mp_meta); + if (tag_type == tag_t::MAPPING || tag_type == tag_t::CUSTOM_TAG) { + // set YAML node properties here to distinguish them from those for the first key node + // as shown in the following snippet: + // + // ```yaml + // foo: !!map + // !!str 123: true + // ^ + // this !!str tag overwrites the preceding !!map tag. + // ``` + *mp_current_node = basic_node_type::mapping(); + apply_directive_set(*mp_current_node); + apply_node_properties(*mp_current_node); + m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); + continue; + } + } + + if (token.type == lexical_token_t::SEQUENCE_BLOCK_PREFIX) { + // a key separator preceding block sequence entries + *mp_current_node = basic_node_type::sequence({basic_node_type()}); + apply_directive_set(*mp_current_node); + apply_node_properties(*mp_current_node); + auto& cur_context = m_context_stack.back(); + cur_context.line = line; + cur_context.indent = indent; + cur_context.state = context_state_t::BLOCK_SEQUENCE; - return prop_specified; - } + mp_current_node = &(mp_current_node->as_seq().back()); + apply_directive_set(*mp_current_node); + parse_context entry_context = cur_context; + entry_context.state = context_state_t::BLOCK_SEQUENCE_ENTRY; + entry_context.p_node = mp_current_node; + m_context_stack.emplace_back(std::move(entry_context)); - /// @brief Add new key string to the current YAML node. - /// @param key a key string to be added to the current YAML node. - /// @param line The line where the key is found. - /// @param indent The indentation width in the current line where the key is found. - void add_new_key(basic_node_type&& key, const uint32_t line, const uint32_t indent) { - if (m_flow_context_depth == 0) { - if FK_YAML_UNLIKELY (m_context_stack.back().indent < indent) { - // bad indentation like the following YAML: - // ```yaml - // foo: true - // baz: 123 - // # ^ - // ``` - throw parse_error("bad indentation of a mapping entry.", line, indent); - } + token = lexer.get_next_token(); + line = lexer.get_lines_processed(); + indent = lexer.get_last_token_begin_pos(); - pop_to_parent_node(line, indent, [indent](const parse_context& c) { - return (c.state == context_state_t::BLOCK_MAPPING) && (indent == c.indent); - }); - } - else { - if FK_YAML_UNLIKELY (m_flow_token_state != flow_token_state_t::NEEDS_VALUE_OR_SUFFIX) { - throw parse_error("Flow mapping entry is found without separated with a comma.", line, indent); - } + const bool has_props = deserialize_node_properties(lexer, token, line, indent); + if (has_props) { + const uint32_t line_after_props = lexer.get_lines_processed(); + if (line == line_after_props) { + // Skip updating the current indent to avoid stacking a wrong indentation. + // + // ```yaml + // &foo bar: baz + // ^ + // the correct indent width for the "bar" node key. + // ``` + continue; + } - if (mp_current_node->is_sequence()) { - mp_current_node->as_seq().emplace_back(basic_node_type::mapping()); - mp_current_node = &(mp_current_node->operator[](mp_current_node->size() - 1)); - m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); - } - } + // if node properties and the followed node are on different lines (i.e., the properties are + // for a container node), the application and the line advancement must happen here. + // Otherwise, a false indent error will be emitted. See + // https://github.com/fktn-k/fkYAML/issues/368 for more details. + line = line_after_props; + indent = lexer.get_last_token_begin_pos(); + *mp_current_node = basic_node_type::mapping(); + m_context_stack.emplace_back( + line_after_props, indent, context_state_t::BLOCK_MAPPING, mp_current_node); + apply_directive_set(*mp_current_node); + apply_node_properties(*mp_current_node); + } - auto itr = mp_current_node->as_map().emplace(std::move(key), basic_node_type()); - if FK_YAML_UNLIKELY (!itr.second) { - throw parse_error("Detected duplication in mapping keys.", line, indent); - } + continue; + } - mp_current_node = &(itr.first->second); - const parse_context& key_context = m_context_stack.back(); - m_context_stack.emplace_back( - key_context.line, key_context.indent, context_state_t::MAPPING_VALUE, mp_current_node); - } + if (indent <= m_context_stack.back().indent) { + FK_YAML_ASSERT(m_context_stack.back().state == context_state_t::MAPPING_VALUE); - /// @brief Assign node value to the current node. - /// @param node_value A rvalue basic_node_type object to be assigned to the current node. - void assign_node_value(basic_node_type&& node_value, const uint32_t line, const uint32_t indent) { - if (mp_current_node->is_sequence()) { - FK_YAML_ASSERT(m_flow_context_depth > 0); + // Mapping values can be omitted and are considered to be null. + // ```yaml + // foo: + // bar: + // baz: + // qux: + // # -> {foo: null, bar: {baz: null}, qux: null} + // ``` + pop_to_parent_node(line, indent, [indent](const parse_context& c) { + return (c.state == context_state_t::BLOCK_MAPPING) && (indent == c.indent); + }); + } - if FK_YAML_UNLIKELY (m_flow_token_state != flow_token_state_t::NEEDS_VALUE_OR_SUFFIX) { - // Flow sequence entries are not allowed to be empty. - // ```yaml - // [foo,,bar] - // ``` - throw parse_error("flow sequence entry is found without separated with a comma.", line, indent); - } + // defer checking the existence of a key separator after the following scalar until the next + // deserialize_scalar() call. + continue; + } - mp_current_node->as_seq().emplace_back(std::move(node_value)); - m_flow_token_state = flow_token_state_t::NEEDS_SEPARATOR_OR_SUFFIX; - return; - } + // handle explicit mapping key separators. + FK_YAML_ASSERT(m_context_stack.back().state == context_state_t::BLOCK_MAPPING_EXPLICIT_KEY); - // a scalar node - *mp_current_node = std::move(node_value); - if FK_YAML_UNLIKELY (m_context_stack.empty()) { - // single scalar document. - return; - } + basic_node_type key_node = std::move(*m_context_stack.back().p_node); + m_context_stack.pop_back(); + m_context_stack.back().p_node->as_map().emplace(key_node, basic_node_type()); + mp_current_node = &(m_context_stack.back().p_node->operator[](std::move(key_node))); + m_context_stack.emplace_back( + old_line, old_indent, context_state_t::BLOCK_MAPPING_EXPLICIT_VALUE, mp_current_node); - if FK_YAML_LIKELY (m_context_stack.back().state != context_state_t::BLOCK_MAPPING_EXPLICIT_KEY) { - m_context_stack.pop_back(); - mp_current_node = m_context_stack.back().p_node; + if (token.type == lexical_token_t::SEQUENCE_BLOCK_PREFIX) { + *mp_current_node = basic_node_type::sequence({basic_node_type()}); + apply_directive_set(*mp_current_node); + apply_node_properties(*mp_current_node); + m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_SEQUENCE, mp_current_node); - if (m_flow_context_depth > 0) { - m_flow_token_state = flow_token_state_t::NEEDS_SEPARATOR_OR_SUFFIX; - } - } - } + mp_current_node = &(mp_current_node->as_seq().back()); + parse_context entry_context = m_context_stack.back(); + entry_context.state = context_state_t::BLOCK_SEQUENCE_ENTRY; + entry_context.p_node = mp_current_node; + m_context_stack.emplace_back(std::move(entry_context)); + break; + } - /// @brief Deserialize a detected scalar node. - /// @param lexer The lexical analyzer to be used. - /// @param node A scalar node. - /// @param indent The current indentation width. Can be updated in this function. - /// @param line The number of processed lines. Can be updated in this function. - /// @param token The storage for last lexical token. - /// @return true if next token has already been got, false otherwise. - void deserialize_scalar( - lexer_type& lexer, basic_node_type&& node, uint32_t& indent, uint32_t& line, lexical_token& token) { - token = lexer.get_next_token(); - if (mp_current_node->is_mapping()) { - const bool is_key_sep_followed = - (token.type == lexical_token_t::KEY_SEPARATOR) && (line == lexer.get_lines_processed()); - if FK_YAML_UNLIKELY (!is_key_sep_followed) { - throw parse_error( - "The \":\" mapping value indicator must be followed after a mapping key.", - lexer.get_lines_processed(), - lexer.get_last_token_begin_pos()); + continue; } - add_new_key(std::move(node), line, indent); - } - else if (token.type == lexical_token_t::KEY_SEPARATOR) { - if FK_YAML_UNLIKELY (line != lexer.get_lines_processed()) { - // This path is for explicit mapping key separator like: + case lexical_token_t::ANCHOR_PREFIX: + case lexical_token_t::TAG_PREFIX: + deserialize_node_properties(lexer, token, line, indent); + // Skip updating the current indent to avoid stacking a wrong indentation. + // Note that node properties for block sequences as a mapping value are processed when a + // `lexical_token_t::KEY_SEPARATOR` token is processed. + // // ```yaml - // ? foo - // : bar - // # ^ this separator + // &foo bar: baz + // ^ + // the correct indent width for the "bar" node key. // ``` - assign_node_value(std::move(node), line, indent); - indent = lexer.get_last_token_begin_pos(); - line = lexer.get_lines_processed(); - - if (m_context_stack.back().state != context_state_t::BLOCK_MAPPING_EXPLICIT_KEY) { + continue; + case lexical_token_t::SEQUENCE_BLOCK_PREFIX: { + FK_YAML_ASSERT(!m_context_stack.empty()); + const uint32_t parent_indent = m_context_stack.back().indent; + if (indent == parent_indent) { + // If the previous block sequence entry is empty, just move to the parent context. + // ```yaml + // foo: + // - + // - bar + // # ^ (here) + // # -> {foo: [null, bar]} + // ``` + pop_to_parent_node(line, indent, [](const parse_context& c) { + return c.state == context_state_t::BLOCK_SEQUENCE; + }); + } + else if (indent < parent_indent) { pop_to_parent_node(line, indent, [indent](const parse_context& c) { - return c.state == context_state_t::BLOCK_MAPPING_EXPLICIT_KEY && indent == c.indent; + return c.state == context_state_t::BLOCK_SEQUENCE && indent == c.indent; }); } - return; - } - - if (mp_current_node->is_scalar()) { - if FK_YAML_LIKELY (!m_context_stack.empty()) { - parse_context& cur_context = m_context_stack.back(); - switch (cur_context.state) { - case context_state_t::BLOCK_MAPPING_EXPLICIT_KEY: - case context_state_t::BLOCK_MAPPING_EXPLICIT_VALUE: - m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); - break; - case context_state_t::BLOCK_SEQUENCE_ENTRY: - if FK_YAML_UNLIKELY (cur_context.indent >= indent) { - // This handles combination of empty block sequence entry and block mapping entry with the - // same indentation level, for examples: - // ```yaml - // foo: - // bar: - // - # These entries are indented - // baz: 123 # with the same width. - // # ^^^ - // ``` - pop_to_parent_node(line, indent, [indent](const parse_context& c) { - return c.state == context_state_t::BLOCK_MAPPING && indent == c.indent; - }); - add_new_key(std::move(node), line, indent); - indent = lexer.get_last_token_begin_pos(); - line = lexer.get_lines_processed(); - return; - } - - m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); - break; - default: - if FK_YAML_UNLIKELY (cur_context.line == line) { - throw parse_error("Multiple mapping keys are specified on the same line.", line, indent); - } - cur_context.line = line; - cur_context.indent = indent; - cur_context.state = context_state_t::BLOCK_MAPPING; - break; + else /*parent_indent < indent*/ { + if FK_YAML_UNLIKELY (m_context_stack.back().state == context_state_t::BLOCK_SEQUENCE) { + // bad indentation like the following YAML: + // ```yaml + // - "foo" + // - bar + // # ^ + // ``` + throw parse_error("bad indentation of a mapping entry.", line, indent); } - *mp_current_node = basic_node_type::mapping(); + *mp_current_node = basic_node_type::sequence(); + m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_SEQUENCE, mp_current_node); apply_directive_set(*mp_current_node); + apply_node_properties(*mp_current_node); } - else { - // root mapping node - m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); - *mp_current_node = basic_node_type::mapping(); - apply_directive_set(*mp_current_node); + auto& seq = mp_current_node->as_seq(); + seq.emplace_back(basic_node_type()); + mp_current_node = &(seq.back()); + apply_directive_set(*mp_current_node); + m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_SEQUENCE_ENTRY, mp_current_node); + break; + } + case lexical_token_t::SEQUENCE_FLOW_BEGIN: + if (m_flow_context_depth == 0) { + lexer.set_context_state(true); - // apply node properties if any to the root mapping node. - if (!m_root_anchor_name.empty()) { - mp_current_node->add_anchor_name( - std::string(m_root_anchor_name.begin(), m_root_anchor_name.end())); - m_root_anchor_name = {}; - } - if (!m_root_tag_name.empty()) { - mp_current_node->add_tag_name(std::string(m_root_tag_name.begin(), m_root_tag_name.end())); - m_root_tag_name = {}; + if (indent <= m_context_stack.back().indent) { + pop_to_parent_node(line, indent, [indent](const parse_context& c) { + switch (c.state) { + case context_state_t::BLOCK_MAPPING: + case context_state_t::MAPPING_VALUE: + return indent == c.indent; + default: + return false; + } + }); } } - } - add_new_key(std::move(node), line, indent); - } - else { - assign_node_value(std::move(node), line, indent); - } + else if FK_YAML_UNLIKELY (m_flow_token_state == flow_token_state_t::NEEDS_SEPARATOR_OR_SUFFIX) { + throw parse_error("Flow sequence beginning is found without separated with a comma.", line, indent); + } - indent = lexer.get_last_token_begin_pos(); - line = lexer.get_lines_processed(); - } + ++m_flow_context_depth; - /// @brief Pops parent contexts to a block mapping with the given indentation. - /// @tparam Pred Functor type to test parent contexts. - /// @param line The current line count. - /// @param indent The indentation level of the target parent block mapping. - template - void pop_to_parent_node(uint32_t line, uint32_t indent, Pred&& pred) { - FK_YAML_ASSERT(!m_context_stack.empty()); + switch (m_context_stack.back().state) { + case context_state_t::BLOCK_SEQUENCE: + case context_state_t::FLOW_SEQUENCE: + mp_current_node->as_seq().emplace_back(basic_node_type::sequence()); + mp_current_node = &(mp_current_node->as_seq().back()); + m_context_stack.emplace_back(line, indent, context_state_t::FLOW_SEQUENCE, mp_current_node); + break; + case context_state_t::BLOCK_MAPPING: + case context_state_t::FLOW_MAPPING: + // heap-allocated node will be freed in handling the corresponding SEQUENCE_FLOW_END event. + m_context_stack.emplace_back( + line, indent, context_state_t::FLOW_SEQUENCE_KEY, new basic_node_type(node_type::SEQUENCE)); + mp_current_node = m_context_stack.back().p_node; + break; + default: { + *mp_current_node = basic_node_type::sequence(); + parse_context& last_context = m_context_stack.back(); + last_context.line = line; + last_context.indent = indent; + last_context.state = context_state_t::FLOW_SEQUENCE; + break; + } + } - // LCOV_EXCL_START - auto itr = std::find_if(m_context_stack.rbegin(), m_context_stack.rend(), std::forward(pred)); - // LCOV_EXCL_STOP - const bool is_indent_valid = (itr != m_context_stack.rend()); - if FK_YAML_UNLIKELY (!is_indent_valid) { - throw parse_error("Detected invalid indentation.", line, indent); - } + apply_directive_set(*mp_current_node); + apply_node_properties(*mp_current_node); - const auto pop_num = static_cast(std::distance(m_context_stack.rbegin(), itr)); + m_flow_token_state = flow_token_state_t::NEEDS_VALUE_OR_SUFFIX; + break; + case lexical_token_t::SEQUENCE_FLOW_END: { + if FK_YAML_UNLIKELY (m_flow_context_depth == 0) { + throw parse_error("Flow sequence ending is found outside the flow context.", line, indent); + } - // move back to the parent block mapping. - for (uint32_t i = 0; i < pop_num; i++) { - m_context_stack.pop_back(); - } - mp_current_node = m_context_stack.back().p_node; - } + if (--m_flow_context_depth == 0) { + lexer.set_context_state(false); + } - /// @brief Set YAML directive properties to the given node. - /// @param node A basic_node_type object to be set YAML directive properties. - void apply_directive_set(basic_node_type& node) noexcept { - node.mp_meta = mp_meta; - } + // find the corresponding flow sequence beginning. + auto itr = std::find_if( // LCOV_EXCL_LINE + m_context_stack.rbegin(), + m_context_stack.rend(), + [](const parse_context& c) { + switch (c.state) { + case context_state_t::FLOW_SEQUENCE_KEY: + case context_state_t::FLOW_SEQUENCE: + return true; + default: + return false; + } + }); - /// @brief Set YAML node properties (anchor and/or tag names) to the given node. - /// @param node A node type object to be set YAML node properties. - void apply_node_properties(basic_node_type& node) { - if (m_needs_anchor_impl) { - node.add_anchor_name(std::string(m_anchor_name.begin(), m_anchor_name.end())); - m_needs_anchor_impl = false; - m_anchor_name = {}; - } + const bool is_valid = itr != m_context_stack.rend(); + if FK_YAML_UNLIKELY (!is_valid) { + throw parse_error("No corresponding flow sequence beginning is found.", line, indent); + } - if (m_needs_tag_impl) { - node.add_tag_name(std::string(m_tag_name.begin(), m_tag_name.end())); - m_needs_tag_impl = false; - m_tag_name = {}; - } - } + // keep the last state for later processing. + parse_context& last_context = m_context_stack.back(); + mp_current_node = last_context.p_node; + last_context.p_node = nullptr; + indent = last_context.indent; + const context_state_t state = last_context.state; + m_context_stack.pop_back(); - /// @brief Update the target YAML version with an input string. - /// @param version_str A YAML version string. - yaml_version_type convert_yaml_version(str_view version_str) noexcept { - return (version_str.compare("1.1") == 0) ? yaml_version_type::VERSION_1_1 : yaml_version_type::VERSION_1_2; - } + // handle cases where the flow sequence is a mapping key node. -private: - /// The currently focused YAML node. - basic_node_type* mp_current_node {nullptr}; - /// The stack of parse contexts. - std::deque m_context_stack {}; - /// The current depth of flow contexts. - uint32_t m_flow_context_depth {0}; - /// The set of YAML directives. - std::shared_ptr mp_meta {}; - /// A flag to determine the need for YAML anchor node implementation. - bool m_needs_anchor_impl {false}; - /// A flag to determine the need for a corresponding node with the last YAML tag. - bool m_needs_tag_impl {false}; - /// A flag to determine the need for a value separator or a flow suffix to follow. - flow_token_state_t m_flow_token_state {flow_token_state_t::NEEDS_VALUE_OR_SUFFIX}; - /// The last YAML anchor name. - str_view m_anchor_name; - /// The last tag name. - str_view m_tag_name; - /// The root YAML anchor name. (maybe empty and unused) - str_view m_root_anchor_name; - /// The root tag name. (maybe empty and unused) - str_view m_root_tag_name; -}; + if (!m_context_stack.empty() && state == context_state_t::FLOW_SEQUENCE_KEY) { + basic_node_type key_node = std::move(*mp_current_node); + delete mp_current_node; + mp_current_node = m_context_stack.back().p_node; + m_flow_token_state = flow_token_state_t::NEEDS_VALUE_OR_SUFFIX; -FK_YAML_DETAIL_NAMESPACE_END + add_new_key(std::move(key_node), line, indent); + break; + } -#endif /* FK_YAML_DETAIL_INPUT_DESERIALIZER_HPP */ + token = lexer.get_next_token(); + if (token.type == lexical_token_t::KEY_SEPARATOR) { + basic_node_type key_node = basic_node_type::mapping(); + apply_directive_set(key_node); + mp_current_node->swap(key_node); -// #include -// _______ __ __ __ _____ __ __ __ -// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 -// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML -// -// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani -// SPDX-License-Identifier: MIT + m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); + m_flow_token_state = flow_token_state_t::NEEDS_VALUE_OR_SUFFIX; -#ifndef FK_YAML_DETAIL_INPUT_INPUT_ADAPTER_HPP -#define FK_YAML_DETAIL_INPUT_INPUT_ADAPTER_HPP + add_new_key(std::move(key_node), line, indent); + } + else { + if (!m_context_stack.empty()) { + mp_current_node = m_context_stack.back().p_node; + } + if (m_flow_context_depth > 0) { + m_flow_token_state = flow_token_state_t::NEEDS_SEPARATOR_OR_SUFFIX; + } + } + + indent = lexer.get_last_token_begin_pos(); + line = lexer.get_lines_processed(); + continue; + } + case lexical_token_t::MAPPING_FLOW_BEGIN: + if (m_flow_context_depth == 0) { + lexer.set_context_state(true); + + if (indent <= m_context_stack.back().indent) { + pop_to_parent_node(line, indent, [indent](const parse_context& c) { + switch (c.state) { + case context_state_t::BLOCK_MAPPING: + case context_state_t::MAPPING_VALUE: + return indent == c.indent; + default: + return false; + } + }); + } + } + else if FK_YAML_UNLIKELY (m_flow_token_state == flow_token_state_t::NEEDS_SEPARATOR_OR_SUFFIX) { + throw parse_error("Flow mapping beginning is found without separated with a comma.", line, indent); + } + + ++m_flow_context_depth; + + switch (m_context_stack.back().state) { + case context_state_t::BLOCK_SEQUENCE: + case context_state_t::FLOW_SEQUENCE: + mp_current_node->as_seq().emplace_back(basic_node_type::mapping()); + mp_current_node = &(mp_current_node->as_seq().back()); + m_context_stack.emplace_back(line, indent, context_state_t::FLOW_MAPPING, mp_current_node); + break; + case context_state_t::BLOCK_MAPPING: + case context_state_t::FLOW_MAPPING: + // heap-allocated node will be freed in handling the corresponding MAPPING_FLOW_END event. + m_context_stack.emplace_back( + line, indent, context_state_t::FLOW_MAPPING_KEY, new basic_node_type(node_type::MAPPING)); + mp_current_node = m_context_stack.back().p_node; + break; + default: { + *mp_current_node = basic_node_type::mapping(); + parse_context& last_context = m_context_stack.back(); + last_context.line = line; + last_context.indent = indent; + last_context.state = context_state_t::FLOW_MAPPING; + break; + } + } -#include -#include -#include -#include -#include -#include -#include + apply_directive_set(*mp_current_node); + apply_node_properties(*mp_current_node); -// #include + line = lexer.get_lines_processed(); + indent = lexer.get_last_token_begin_pos(); -// #include + m_flow_token_state = flow_token_state_t::NEEDS_VALUE_OR_SUFFIX; + break; + case lexical_token_t::MAPPING_FLOW_END: { + if FK_YAML_UNLIKELY (m_flow_context_depth == 0) { + throw parse_error("Flow mapping ending is found outside the flow context.", line, indent); + } -// #include -// _______ __ __ __ _____ __ __ __ -// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 -// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML -// -// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani -// SPDX-License-Identifier: MIT + if (--m_flow_context_depth == 0) { + lexer.set_context_state(false); + } -#ifndef FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_DETECTOR_HPP -#define FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_DETECTOR_HPP + // find the corresponding flow mapping beginning. + auto itr = std::find_if( // LCOV_EXCL_LINE + m_context_stack.rbegin(), + m_context_stack.rend(), + [](const parse_context& c) { + switch (c.state) { + case context_state_t::FLOW_MAPPING_KEY: + case context_state_t::FLOW_MAPPING: + return true; + default: + return false; + } + }); -#include -#include + const bool is_valid = itr != m_context_stack.rend(); + if FK_YAML_UNLIKELY (!is_valid) { + throw parse_error("No corresponding flow mapping beginning is found.", line, indent); + } -// #include + // keep the last state for later processing. + parse_context& last_context = m_context_stack.back(); + mp_current_node = last_context.p_node; + last_context.p_node = nullptr; + indent = last_context.indent; + const context_state_t state = last_context.state; + m_context_stack.pop_back(); -// #include -// _______ __ __ __ _____ __ __ __ -// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 -// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML -// -// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani -// SPDX-License-Identifier: MIT + // handle cases where the flow mapping is a mapping key node. -#ifndef FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_T_HPP -#define FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_T_HPP + if (!m_context_stack.empty() && state == context_state_t::FLOW_MAPPING_KEY) { + basic_node_type key_node = std::move(*mp_current_node); + delete mp_current_node; + mp_current_node = m_context_stack.back().p_node; + m_flow_token_state = flow_token_state_t::NEEDS_VALUE_OR_SUFFIX; -#include + add_new_key(std::move(key_node), line, indent); + break; + } -// #include + token = lexer.get_next_token(); + if (token.type == lexical_token_t::KEY_SEPARATOR) { + basic_node_type key_node = basic_node_type::mapping(); + apply_directive_set(key_node); + mp_current_node->swap(key_node); + m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); + m_flow_token_state = flow_token_state_t::NEEDS_VALUE_OR_SUFFIX; -FK_YAML_DETAIL_NAMESPACE_BEGIN + add_new_key(std::move(key_node), line, indent); + } + else { + if (!m_context_stack.empty()) { + mp_current_node = m_context_stack.back().p_node; + } + if (m_flow_context_depth > 0) { + m_flow_token_state = flow_token_state_t::NEEDS_SEPARATOR_OR_SUFFIX; + } + } -/// @brief Definition of Unicode encoding types -/// @note Since fkYAML doesn't treat UTF-16/UTF-32 encoded characters per byte, endians do not matter. -enum class utf_encode_t : std::uint8_t { - UTF_8, //!< UTF-8 - UTF_16BE, //!< UTF-16 Big Endian - UTF_16LE, //!< UTF-16 Little Endian - UTF_32BE, //!< UTF-32 Big Endian - UTF_32LE, //!< UTF-32 Little Endian -}; + indent = lexer.get_last_token_begin_pos(); + line = lexer.get_lines_processed(); + continue; + } + case lexical_token_t::VALUE_SEPARATOR: + FK_YAML_ASSERT(m_flow_context_depth > 0); + if FK_YAML_UNLIKELY (m_flow_token_state != flow_token_state_t::NEEDS_SEPARATOR_OR_SUFFIX) { + throw parse_error("invalid value separator is found.", line, indent); + } + m_flow_token_state = flow_token_state_t::NEEDS_VALUE_OR_SUFFIX; + break; + case lexical_token_t::ALIAS_PREFIX: { + // An alias node must not specify any properties (tag, anchor). + // https://yaml.org/spec/1.2.2/#71-alias-nodes + if FK_YAML_UNLIKELY (m_needs_tag_impl) { + throw parse_error("Tag cannot be specified to an alias node", line, indent); + } + if FK_YAML_UNLIKELY (m_needs_anchor_impl) { + throw parse_error("Anchor cannot be specified to an alias node.", line, indent); + } -FK_YAML_DETAIL_NAMESPACE_END + std::string token_str = std::string(token.str.begin(), token.str.end()); -#endif /* FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_T_HPP */ + const auto anchor_counts = static_cast(mp_meta->anchor_table.count(token_str)); + if FK_YAML_UNLIKELY (anchor_counts == 0) { + throw parse_error("The given anchor name must appear prior to the alias node.", line, indent); + } -// #include + basic_node_type node {}; + node.m_attrs |= detail::node_attr_bits::alias_bit; + node.m_prop.anchor = std::move(token_str); + detail::node_attr_bits::set_anchor_offset(anchor_counts - 1, node.m_attrs); -// #include + apply_directive_set(node); + apply_node_properties(node); -// #include + deserialize_scalar(lexer, std::move(node), indent, line, token); + continue; + } + case lexical_token_t::PLAIN_SCALAR: + case lexical_token_t::SINGLE_QUOTED_SCALAR: + case lexical_token_t::DOUBLE_QUOTED_SCALAR: { + tag_t tag_type {tag_t::NONE}; + if (m_needs_tag_impl) { + tag_type = tag_resolver_type::resolve_tag(m_tag_name, mp_meta); + } + basic_node_type node = scalar_parser_type(line, indent).parse_flow(token.type, tag_type, token.str); + apply_directive_set(node); + apply_node_properties(node); -FK_YAML_DETAIL_NAMESPACE_BEGIN + deserialize_scalar(lexer, std::move(node), indent, line, token); + continue; + } + case lexical_token_t::BLOCK_LITERAL_SCALAR: + case lexical_token_t::BLOCK_FOLDED_SCALAR: { + tag_t tag_type {tag_t::NONE}; + if (m_needs_tag_impl) { + tag_type = tag_resolver_type::resolve_tag(m_tag_name, mp_meta); + } -/// @brief Detect an encoding type for UTF-8 expected inputs. -/// @note This function doesn't support the case where the first character is null. -/// @param[in] bytes 4 bytes of an input character sequence. -/// @param[out] has_bom Whether the input contains a BOM. -/// @return A detected encoding type. -inline utf_encode_t detect_encoding_type(const std::array& bytes, bool& has_bom) noexcept { - has_bom = false; + basic_node_type node = + scalar_parser_type(line, indent) + .parse_block(token.type, tag_type, token.str, lexer.get_block_scalar_header()); + apply_directive_set(node); + apply_node_properties(node); - const uint8_t byte0 = bytes[0]; - const uint8_t byte1 = bytes[1]; - const uint8_t byte2 = bytes[2]; - const uint8_t byte3 = bytes[3]; + deserialize_scalar(lexer, std::move(node), indent, line, token); + continue; + } + // these tokens end parsing the current YAML document. + case lexical_token_t::END_OF_BUFFER: + // This handles an empty input. + last_type = token.type; + return; + case lexical_token_t::END_OF_DIRECTIVES: + case lexical_token_t::END_OF_DOCUMENT: + if FK_YAML_UNLIKELY (m_flow_context_depth > 0) { + throw parse_error("An invalid document marker found in a flow collection", line, indent); + } + last_type = token.type; + return; + // no way to come here while lexically analyzing document contents. + case lexical_token_t::YAML_VER_DIRECTIVE: // LCOV_EXCL_LINE + case lexical_token_t::TAG_DIRECTIVE: // LCOV_EXCL_LINE + case lexical_token_t::INVALID_DIRECTIVE: // LCOV_EXCL_LINE + detail::unreachable(); // LCOV_EXCL_LINE + } - // Check if a BOM exists. + token = lexer.get_next_token(); + indent = lexer.get_last_token_begin_pos(); + line = lexer.get_lines_processed(); + } while (token.type != lexical_token_t::END_OF_BUFFER); - if (byte0 == static_cast(0xEFu) && byte1 == static_cast(0xBBu) && - byte2 == static_cast(0xBFu)) { - has_bom = true; - return utf_encode_t::UTF_8; + last_type = token.type; } - if (byte0 == 0 && byte1 == 0 && byte2 == static_cast(0xFEu) && byte3 == static_cast(0xFFu)) { - has_bom = true; - return utf_encode_t::UTF_32BE; - } + /// @brief Deserializes YAML node properties (anchor and/or tag names) if they exist + /// @param lexer The lexical analyzer to be used. + /// @param last_type The variable to store the last lexical token type. + /// @param line The variable to store the line of either the first property or the last non-property token. + /// @param indent The variable to store the indent of either the first property or the last non-property token. + /// @return true if any property is found, false otherwise. + bool deserialize_node_properties(lexer_type& lexer, lexical_token& last_token, uint32_t& line, uint32_t& indent) { + m_needs_anchor_impl = m_needs_tag_impl = false; - if (byte0 == static_cast(0xFFu) && byte1 == static_cast(0xFEu) && byte2 == 0 && byte3 == 0) { - has_bom = true; - return utf_encode_t::UTF_32LE; - } + lexical_token token = last_token; + bool ends_loop {false}; + do { + if (line < lexer.get_lines_processed()) { + break; + } + + switch (token.type) { + case lexical_token_t::ANCHOR_PREFIX: + if FK_YAML_UNLIKELY (m_needs_anchor_impl) { + throw parse_error( + "anchor name cannot be specified more than once to the same node.", + lexer.get_lines_processed(), + lexer.get_last_token_begin_pos()); + } - if (byte0 == static_cast(0xFEu) && byte1 == static_cast(0xFFu)) { - has_bom = true; - return utf_encode_t::UTF_16BE; - } + m_anchor_name = token.str; + m_needs_anchor_impl = true; - if (byte0 == static_cast(0xFFu) && byte1 == static_cast(0xFEu)) { - has_bom = true; - return utf_encode_t::UTF_16LE; - } + if (!m_needs_tag_impl) { + line = lexer.get_lines_processed(); + indent = lexer.get_last_token_begin_pos(); + } - // Test the first character assuming it's an ASCII character. + token = lexer.get_next_token(); + break; + case lexical_token_t::TAG_PREFIX: { + if FK_YAML_UNLIKELY (m_needs_tag_impl) { + throw parse_error( + "tag name cannot be specified more than once to the same node.", + lexer.get_lines_processed(), + lexer.get_last_token_begin_pos()); + } - if (byte0 == 0 && byte1 == 0 && byte2 == 0 && 0 < byte3 && byte3 < static_cast(0x80u)) { - return utf_encode_t::UTF_32BE; - } + m_tag_name = token.str; + m_needs_tag_impl = true; - if (0 < byte0 && byte0 < static_cast(0x80u) && byte1 == 0 && byte2 == 0 && byte3 == 0) { - return utf_encode_t::UTF_32LE; - } + if (!m_needs_anchor_impl) { + line = lexer.get_lines_processed(); + indent = lexer.get_last_token_begin_pos(); + } - if (byte0 == 0 && 0 < byte1 && byte1 < static_cast(0x80u)) { - return utf_encode_t::UTF_16BE; - } + token = lexer.get_next_token(); + break; + } + default: + ends_loop = true; + break; + } + } while (!ends_loop); - if (0 < byte0 && byte0 < static_cast(0x80u) && byte1 == 0) { - return utf_encode_t::UTF_16LE; - } + last_token = token; + const bool prop_specified = m_needs_anchor_impl || m_needs_tag_impl; + if (!prop_specified) { + line = lexer.get_lines_processed(); + indent = lexer.get_last_token_begin_pos(); + } - return utf_encode_t::UTF_8; -} + return prop_specified; + } -/// @brief A class which detects UTF encoding type and the existence of a BOM at the beginning. -/// @tparam ItrType Type of iterators for the input. -template -struct utf_encode_detector {}; + /// @brief Add new key string to the current YAML node. + /// @param key a key string to be added to the current YAML node. + /// @param line The line where the key is found. + /// @param indent The indentation width in the current line where the key is found. + void add_new_key(basic_node_type&& key, const uint32_t line, const uint32_t indent) { + if (m_flow_context_depth == 0) { + if FK_YAML_UNLIKELY (m_context_stack.back().indent < indent) { + // bad indentation like the following YAML: + // ```yaml + // foo: true + // baz: 123 + // # ^ + // ``` + throw parse_error("bad indentation of a mapping entry.", line, indent); + } -/// @brief The partial specialization of utf_encode_detector for char iterators. -/// @tparam ItrType An iterator type. -template -struct utf_encode_detector::value>> { - /// @brief Detects the encoding type of the input, and consumes a BOM if it exists. - /// @param begin The iterator to the first element of an input. - /// @param end The iterator to the past-the end element of an input. - /// @return A detected encoding type. - static utf_encode_t detect(ItrType& begin, const ItrType& end) noexcept { - if FK_YAML_UNLIKELY (begin == end) { - return utf_encode_t::UTF_8; + pop_to_parent_node(line, indent, [indent](const parse_context& c) { + return (c.state == context_state_t::BLOCK_MAPPING) && (indent == c.indent); + }); } + else { + if FK_YAML_UNLIKELY (m_flow_token_state != flow_token_state_t::NEEDS_VALUE_OR_SUFFIX) { + throw parse_error("Flow mapping entry is found without separated with a comma.", line, indent); + } - // the inner curly braces are necessary for older compilers - std::array bytes {{}}; - bytes.fill(0xFFu); - auto current = begin; - for (int i = 0; i < 4 && current != end; i++, ++current) { - bytes[i] = static_cast(*current); // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index) + if (mp_current_node->is_sequence()) { + mp_current_node->as_seq().emplace_back(basic_node_type::mapping()); + mp_current_node = &(mp_current_node->operator[](mp_current_node->size() - 1)); + m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); + } } - bool has_bom = false; - const utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); - - if (has_bom) { - // skip reading the BOM. - switch (encode_type) { - case utf_encode_t::UTF_8: - std::advance(begin, 3); - break; - case utf_encode_t::UTF_16BE: - case utf_encode_t::UTF_16LE: - std::advance(begin, 2); - break; - case utf_encode_t::UTF_32BE: - case utf_encode_t::UTF_32LE: - std::advance(begin, 4); - break; - } + auto itr = mp_current_node->as_map().emplace(std::move(key), basic_node_type()); + if FK_YAML_UNLIKELY (!itr.second) { + throw parse_error("Detected duplication in mapping keys.", line, indent); } - return encode_type; + mp_current_node = &(itr.first->second); + const parse_context& key_context = m_context_stack.back(); + m_context_stack.emplace_back( + key_context.line, key_context.indent, context_state_t::MAPPING_VALUE, mp_current_node); } -}; -#if FK_YAML_HAS_CHAR8_T + /// @brief Assign node value to the current node. + /// @param node_value A rvalue basic_node_type object to be assigned to the current node. + void assign_node_value(basic_node_type&& node_value, const uint32_t line, const uint32_t indent) { + if (mp_current_node->is_sequence()) { + FK_YAML_ASSERT(m_flow_context_depth > 0); -/// @brief The partial specialization of utf_encode_detector for char8_t iterators. -/// @tparam ItrType An iterator type. -template -struct utf_encode_detector::value>> { - /// @brief Detects the encoding type of the input, and consumes a BOM if it exists. - /// @param begin The iterator to the first element of an input. - /// @param end The iterator to the past-the end element of an input. - /// @return A detected encoding type. - static utf_encode_t detect(ItrType& begin, const ItrType& end) { - if FK_YAML_UNLIKELY (begin == end) { - return utf_encode_t::UTF_8; + if FK_YAML_UNLIKELY (m_flow_token_state != flow_token_state_t::NEEDS_VALUE_OR_SUFFIX) { + // Flow sequence entries are not allowed to be empty. + // ```yaml + // [foo,,bar] + // ``` + throw parse_error("flow sequence entry is found without separated with a comma.", line, indent); + } + + mp_current_node->as_seq().emplace_back(std::move(node_value)); + m_flow_token_state = flow_token_state_t::NEEDS_SEPARATOR_OR_SUFFIX; + return; } - std::array bytes {}; - bytes.fill(0xFFu); - auto current = begin; - for (int i = 0; i < 4 && current != end; i++, ++current) { - bytes[i] = uint8_t(*current); // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index) + // a scalar node + *mp_current_node = std::move(node_value); + if FK_YAML_UNLIKELY (m_context_stack.empty()) { + // single scalar document. + return; } - bool has_bom = false; - const utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + if FK_YAML_LIKELY (m_context_stack.back().state != context_state_t::BLOCK_MAPPING_EXPLICIT_KEY) { + m_context_stack.pop_back(); + mp_current_node = m_context_stack.back().p_node; - if FK_YAML_UNLIKELY (encode_type != utf_encode_t::UTF_8) { - throw exception("char8_t characters must be encoded in the UTF-8 format."); + if (m_flow_context_depth > 0) { + m_flow_token_state = flow_token_state_t::NEEDS_SEPARATOR_OR_SUFFIX; + } } + } - if (has_bom) { - // skip reading the BOM. - std::advance(begin, 3); + /// @brief Deserialize a detected scalar node. + /// @param lexer The lexical analyzer to be used. + /// @param node A scalar node. + /// @param indent The current indentation width. Can be updated in this function. + /// @param line The number of processed lines. Can be updated in this function. + /// @param token The storage for last lexical token. + /// @return true if next token has already been got, false otherwise. + void deserialize_scalar( + lexer_type& lexer, basic_node_type&& node, uint32_t& indent, uint32_t& line, lexical_token& token) { + token = lexer.get_next_token(); + if (mp_current_node->is_mapping()) { + const bool is_key_sep_followed = + (token.type == lexical_token_t::KEY_SEPARATOR) && (line == lexer.get_lines_processed()); + if FK_YAML_UNLIKELY (!is_key_sep_followed) { + throw parse_error( + "The \":\" mapping value indicator must be followed after a mapping key.", + lexer.get_lines_processed(), + lexer.get_last_token_begin_pos()); + } + add_new_key(std::move(node), line, indent); } + else if (token.type == lexical_token_t::KEY_SEPARATOR) { + if FK_YAML_UNLIKELY (line != lexer.get_lines_processed()) { + // This path is for explicit mapping key separator like: + // ```yaml + // ? foo + // : bar + // # ^ this separator + // ``` + assign_node_value(std::move(node), line, indent); + indent = lexer.get_last_token_begin_pos(); + line = lexer.get_lines_processed(); - return encode_type; - } -}; + if (m_context_stack.back().state != context_state_t::BLOCK_MAPPING_EXPLICIT_KEY) { + pop_to_parent_node(line, indent, [indent](const parse_context& c) { + return c.state == context_state_t::BLOCK_MAPPING_EXPLICIT_KEY && indent == c.indent; + }); + } + return; + } -#endif // FK_YAML_HAS_CHAR8_T + if (mp_current_node->is_scalar()) { + if FK_YAML_LIKELY (!m_context_stack.empty()) { + parse_context& cur_context = m_context_stack.back(); + switch (cur_context.state) { + case context_state_t::BLOCK_MAPPING_EXPLICIT_KEY: + case context_state_t::BLOCK_MAPPING_EXPLICIT_VALUE: + m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); + break; + case context_state_t::BLOCK_SEQUENCE_ENTRY: + if FK_YAML_UNLIKELY (cur_context.indent >= indent) { + // This handles combination of empty block sequence entry and block mapping entry with the + // same indentation level, for examples: + // ```yaml + // foo: + // bar: + // - # These entries are indented + // baz: 123 # with the same width. + // # ^^^ + // ``` + pop_to_parent_node(line, indent, [indent](const parse_context& c) { + return c.state == context_state_t::BLOCK_MAPPING && indent == c.indent; + }); + add_new_key(std::move(node), line, indent); + indent = lexer.get_last_token_begin_pos(); + line = lexer.get_lines_processed(); + return; + } -/// @brief The partial specialization of utf_encode_detector for char16_t iterators. -/// @tparam ItrType An iterator type. -template -struct utf_encode_detector::value>> { - /// @brief Detects the encoding type of the input, and consumes a BOM if it exists. - /// @param begin The iterator to the first element of an input. - /// @param end The iterator to the past-the end element of an input. - /// @return A detected encoding type. - static utf_encode_t detect(ItrType& begin, const ItrType& end) { - if FK_YAML_UNLIKELY (begin == end) { - return utf_encode_t::UTF_16BE; - } + m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); + break; + default: + if FK_YAML_UNLIKELY (cur_context.line == line) { + throw parse_error("Multiple mapping keys are specified on the same line.", line, indent); + } + cur_context.line = line; + cur_context.indent = indent; + cur_context.state = context_state_t::BLOCK_MAPPING; + break; + } - // the inner curly braces are necessary for older compilers - std::array bytes {{}}; - bytes.fill(0xFFu); - auto current = begin; - for (int i = 0; i < 2 && current != end; i++, ++current) { - // NOLINTBEGIN(cppcoreguidelines-pro-bounds-constant-array-index) - const char16_t elem = *current; - const int idx_base = i * 2; - bytes[idx_base] = static_cast(elem >> 8); - bytes[idx_base + 1] = static_cast(elem); - // NOLINTEND(cppcoreguidelines-pro-bounds-constant-array-index) - } + *mp_current_node = basic_node_type::mapping(); + apply_directive_set(*mp_current_node); + } + else { + // root mapping node - bool has_bom = false; - const utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + m_context_stack.emplace_back(line, indent, context_state_t::BLOCK_MAPPING, mp_current_node); + *mp_current_node = basic_node_type::mapping(); + apply_directive_set(*mp_current_node); - if FK_YAML_UNLIKELY (encode_type != utf_encode_t::UTF_16BE && encode_type != utf_encode_t::UTF_16LE) { - throw exception("char16_t characters must be encoded in the UTF-16 format."); + // apply node properties if any to the root mapping node. + if (!m_root_anchor_name.empty()) { + mp_current_node->add_anchor_name( + std::string(m_root_anchor_name.begin(), m_root_anchor_name.end())); + m_root_anchor_name = {}; + } + if (!m_root_tag_name.empty()) { + mp_current_node->add_tag_name(std::string(m_root_tag_name.begin(), m_root_tag_name.end())); + m_root_tag_name = {}; + } + } + } + add_new_key(std::move(node), line, indent); } - - if (has_bom) { - // skip reading the BOM. - std::advance(begin, 1); + else { + assign_node_value(std::move(node), line, indent); } - return encode_type; + indent = lexer.get_last_token_begin_pos(); + line = lexer.get_lines_processed(); } -}; - -/// @brief The partial specialization of utf_encode_detector for char32_t iterators. -/// @tparam ItrType An iterator type. -template -struct utf_encode_detector::value>> { - /// @brief Detects the encoding type of the input, and consumes a BOM if it exists. - /// @param begin The iterator to the first element of an input. - /// @param end The iterator to the past-the end element of an input. - /// @return A detected encoding type. - static utf_encode_t detect(ItrType& begin, const ItrType& end) { - if FK_YAML_UNLIKELY (begin == end) { - return utf_encode_t::UTF_32BE; - } - - // the inner curly braces are necessary for older compilers - std::array bytes {{}}; - const char32_t elem = *begin; - bytes[0] = static_cast(elem >> 24); - bytes[1] = static_cast(elem >> 16); - bytes[2] = static_cast(elem >> 8); - bytes[3] = static_cast(elem); - bool has_bom = false; - const utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + /// @brief Pops parent contexts to a block mapping with the given indentation. + /// @tparam Pred Functor type to test parent contexts. + /// @param line The current line count. + /// @param indent The indentation level of the target parent block mapping. + template + void pop_to_parent_node(uint32_t line, uint32_t indent, Pred&& pred) { + FK_YAML_ASSERT(!m_context_stack.empty()); - if FK_YAML_UNLIKELY (encode_type != utf_encode_t::UTF_32BE && encode_type != utf_encode_t::UTF_32LE) { - throw exception("char32_t characters must be encoded in the UTF-32 format."); + // LCOV_EXCL_START + auto itr = std::find_if(m_context_stack.rbegin(), m_context_stack.rend(), std::forward(pred)); + // LCOV_EXCL_STOP + const bool is_indent_valid = (itr != m_context_stack.rend()); + if FK_YAML_UNLIKELY (!is_indent_valid) { + throw parse_error("Detected invalid indentation.", line, indent); } - if (has_bom) { - // skip reading the BOM. - std::advance(begin, 1); + const auto pop_num = static_cast(std::distance(m_context_stack.rbegin(), itr)); + + // move back to the parent block mapping. + for (uint32_t i = 0; i < pop_num; i++) { + m_context_stack.pop_back(); } + mp_current_node = m_context_stack.back().p_node; + } - return encode_type; + /// @brief Set YAML directive properties to the given node. + /// @param node A basic_node_type object to be set YAML directive properties. + void apply_directive_set(basic_node_type& node) noexcept { + node.mp_meta = mp_meta; } -}; -/// @brief A class which detects UTF encoding type and the existence of a BOM from the input file. -struct file_utf_encode_detector { - /// @brief Detects the encoding type of the input, and consumes a BOM if it exists. - /// @param p_file The input file handle. - /// @return A detected encoding type. - static utf_encode_t detect(std::FILE* p_file) noexcept { - // the inner curly braces are necessary for older compilers - std::array bytes {{}}; - bytes.fill(0xFFu); - for (int i = 0; i < 4; i++) { - char byte = 0; - const std::size_t size = std::fread(&byte, sizeof(char), 1, p_file); - if (size != sizeof(char)) { - break; - } - bytes[i] = static_cast(byte & 0xFF); // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index) + /// @brief Set YAML node properties (anchor and/or tag names) to the given node. + /// @param node A node type object to be set YAML node properties. + void apply_node_properties(basic_node_type& node) { + if (m_needs_anchor_impl) { + node.add_anchor_name(std::string(m_anchor_name.begin(), m_anchor_name.end())); + m_needs_anchor_impl = false; + m_anchor_name = {}; } - bool has_bom = false; - const utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); - - // move back to the beginning if a BOM doesn't exist. - long offset = 0; // NOLINT(google-runtime-int) - if (has_bom) { - switch (encode_type) { - case utf_encode_t::UTF_8: - offset = 3; - break; - case utf_encode_t::UTF_16BE: - case utf_encode_t::UTF_16LE: - offset = 2; - break; - case utf_encode_t::UTF_32BE: - case utf_encode_t::UTF_32LE: - offset = 4; - break; - } + if (m_needs_tag_impl) { + node.add_tag_name(std::string(m_tag_name.begin(), m_tag_name.end())); + m_needs_tag_impl = false; + m_tag_name = {}; } - std::fseek(p_file, offset, SEEK_SET); // NOLINT(cert-err33-c) + } - return encode_type; + /// @brief Update the target YAML version with an input string. + /// @param version_str A YAML version string. + yaml_version_type convert_yaml_version(str_view version_str) noexcept { + return (version_str.compare("1.1") == 0) ? yaml_version_type::VERSION_1_1 : yaml_version_type::VERSION_1_2; } -}; -/// @brief A class which detects UTF encoding type and the existence of a BOM from the input file. -struct stream_utf_encode_detector { - /// @brief Detects the encoding type of the input, and consumes a BOM if it exists. - /// @param p_file The input file handle. - /// @return A detected encoding type. - static utf_encode_t detect(std::istream& is) noexcept { - // the inner curly braces are necessary for older compilers - std::array bytes {{}}; - bytes.fill(0xFFu); - for (int i = 0; i < 4; i++) { - char ch = 0; - is.read(&ch, 1); - const std::streamsize size = is.gcount(); - if (size != 1) { - // without this, seekg() will fail. - is.clear(); - break; - } - bytes[i] = static_cast(ch & 0xFF); // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index) - } +private: + /// The currently focused YAML node. + basic_node_type* mp_current_node {nullptr}; + /// The stack of parse contexts. + std::deque m_context_stack {}; + /// The current depth of flow contexts. + uint32_t m_flow_context_depth {0}; + /// The set of YAML directives. + std::shared_ptr mp_meta {}; + /// A flag to determine the need for YAML anchor node implementation. + bool m_needs_anchor_impl {false}; + /// A flag to determine the need for a corresponding node with the last YAML tag. + bool m_needs_tag_impl {false}; + /// A flag to determine the need for a value separator or a flow suffix to follow. + flow_token_state_t m_flow_token_state {flow_token_state_t::NEEDS_VALUE_OR_SUFFIX}; + /// The last YAML anchor name. + str_view m_anchor_name; + /// The last tag name. + str_view m_tag_name; + /// The root YAML anchor name. (maybe empty and unused) + str_view m_root_anchor_name; + /// The root tag name. (maybe empty and unused) + str_view m_root_tag_name; +}; - bool has_bom = false; - const utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); +FK_YAML_DETAIL_NAMESPACE_END - // move back to the beginning if a BOM doesn't exist. - std::streamoff offset = 0; - if (has_bom) { - switch (encode_type) { - case utf_encode_t::UTF_8: - offset = 3; - break; - case utf_encode_t::UTF_16BE: - case utf_encode_t::UTF_16LE: - offset = 2; - break; - case utf_encode_t::UTF_32BE: - case utf_encode_t::UTF_32LE: - offset = 4; - break; - } - } - is.seekg(offset, std::ios_base::beg); +#endif /* FK_YAML_DETAIL_INPUT_DESERIALIZER_HPP */ - return encode_type; - } -}; +// #include +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library +// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani +// SPDX-License-Identifier: MIT -FK_YAML_DETAIL_NAMESPACE_END +#ifndef FK_YAML_DETAIL_INPUT_INPUT_ADAPTER_HPP +#define FK_YAML_DETAIL_INPUT_INPUT_ADAPTER_HPP -#endif /* FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_DETECTOR_HPP */ +#include +#include +#include +#include +#include +#include +#include -// #include +// #include -// #include +// #include -// #include +// #include +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library +// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani +// SPDX-License-Identifier: MIT -// #include +#ifndef FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_DETECTOR_HPP +#define FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_DETECTOR_HPP -// #include +#include +#include -// #include +// #include +// #include +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library +// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani +// SPDX-License-Identifier: MIT -FK_YAML_DETAIL_NAMESPACE_BEGIN +#ifndef FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_T_HPP +#define FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_T_HPP -/////////////////////// -// input_adapter // -/////////////////////// +#include -template -class iterator_input_adapter; +// #include -/// @brief An input adapter for iterators of type char. -/// @tparam IterType An iterator type. -template -class iterator_input_adapter::value>> { -public: - /// @brief Construct a new iterator_input_adapter object. - iterator_input_adapter() = default; - /// @brief Construct a new iterator_input_adapter object. - /// @param begin The beginning of iterators. - /// @param end The end of iterators. - /// @param encode_type The encoding type for this input adapter. - /// @param is_contiguous Whether iterators are contiguous or not. - iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type, bool is_contiguous) noexcept - : m_begin(begin), - m_end(end), - m_encode_type(encode_type), - m_is_contiguous(is_contiguous) { - } +FK_YAML_DETAIL_NAMESPACE_BEGIN - // allow only move construct/assignment like other input adapters. - iterator_input_adapter(const iterator_input_adapter&) = delete; - iterator_input_adapter(iterator_input_adapter&& rhs) = default; - iterator_input_adapter& operator=(const iterator_input_adapter&) = delete; - iterator_input_adapter& operator=(iterator_input_adapter&&) = default; - ~iterator_input_adapter() = default; +/// @brief Definition of Unicode encoding types +/// @note Since fkYAML doesn't treat UTF-16/UTF-32 encoded characters per byte, endians do not matter. +enum class utf_encode_t : std::uint8_t { + UTF_8, //!< UTF-8 + UTF_16BE, //!< UTF-16 Big Endian + UTF_16LE, //!< UTF-16 Little Endian + UTF_32BE, //!< UTF-32 Big Endian + UTF_32LE, //!< UTF-32 Little Endian +}; - /// @brief Get view into the input buffer contents. - /// @return View into the input buffer contents. - str_view get_buffer_view() { - if FK_YAML_UNLIKELY (m_begin == m_end) { - return {}; - } +FK_YAML_DETAIL_NAMESPACE_END - m_buffer.clear(); +#endif /* FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_T_HPP */ - switch (m_encode_type) { - case utf_encode_t::UTF_8: - return get_buffer_view_utf8(); - case utf_encode_t::UTF_16BE: - case utf_encode_t::UTF_16LE: - return get_buffer_view_utf16(); - case utf_encode_t::UTF_32BE: - case utf_encode_t::UTF_32LE: - return get_buffer_view_utf32(); - default: // LCOV_EXCL_LINE - detail::unreachable(); // LCOV_EXCL_LINE - } - } +// #include -private: - /// @brief The concrete implementation of get_buffer_view() for UTF-8 encoded inputs. - /// @return View into the UTF-8 encoded input buffer contents. - str_view get_buffer_view_utf8() { - FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); +// #include - IterType current = m_begin; - std::deque cr_itrs {}; - while (current != m_end) { - const auto first = static_cast(*current); - const uint32_t num_bytes = utf8::get_num_bytes(first); +// #include - switch (num_bytes) { - case 1: - if FK_YAML_UNLIKELY (first == 0x0D /*CR*/) { - cr_itrs.emplace_back(current); - } - break; - case 2: { - const auto second = static_cast(*++current); - const bool is_valid = utf8::validate(first, second); - if FK_YAML_UNLIKELY (!is_valid) { - throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", {first, second}); - } - break; - } - case 3: { - const auto second = static_cast(*++current); - const auto third = static_cast(*++current); - const bool is_valid = utf8::validate(first, second, third); - if FK_YAML_UNLIKELY (!is_valid) { - throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", {first, second, third}); - } - break; - } - case 4: { - const auto second = static_cast(*++current); - const auto third = static_cast(*++current); - const auto fourth = static_cast(*++current); - const bool is_valid = utf8::validate(first, second, third, fourth); - if FK_YAML_UNLIKELY (!is_valid) { - throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", {first, second, third, fourth}); - } - break; - } - default: // LCOV_EXCL_LINE - unreachable(); // LCOV_EXCL_LINE - } - ++current; - } +FK_YAML_DETAIL_NAMESPACE_BEGIN - const bool is_contiguous_no_cr = cr_itrs.empty() && m_is_contiguous; - if FK_YAML_LIKELY (is_contiguous_no_cr) { - // The input iterators (begin, end) can be used as-is during parsing. - FK_YAML_ASSERT(m_begin != m_end); - return str_view {&*m_begin, static_cast(std::distance(m_begin, m_end))}; - } +/// @brief Detect an encoding type for UTF-8 expected inputs. +/// @note This function doesn't support the case where the first character is null. +/// @param[in] bytes 4 bytes of an input character sequence. +/// @param[out] has_bom Whether the input contains a BOM. +/// @return A detected encoding type. +inline utf_encode_t detect_encoding_type(const std::array& bytes, bool& has_bom) noexcept { + has_bom = false; - m_buffer.reserve(std::distance(m_begin, m_end) - cr_itrs.size()); + const uint8_t byte0 = bytes[0]; + const uint8_t byte1 = bytes[1]; + const uint8_t byte2 = bytes[2]; + const uint8_t byte3 = bytes[3]; - current = m_begin; - for (const auto& cr_itr : cr_itrs) { - m_buffer.append(current, cr_itr); - current = std::next(cr_itr); - } - m_buffer.append(current, m_end); + // Check if a BOM exists. - return str_view {m_buffer.begin(), m_buffer.end()}; + if (byte0 == static_cast(0xEFu) && byte1 == static_cast(0xBBu) && + byte2 == static_cast(0xBFu)) { + has_bom = true; + return utf_encode_t::UTF_8; } - /// @brief The concrete implementation of get_buffer_view() for UTF-16 encoded inputs. - /// @return View into the UTF-8 encoded input buffer contents. - str_view get_buffer_view_utf16() { - FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); - - // Assume the input characters are all ASCII characters. - // That's the most probably the case. - m_buffer.reserve(std::distance(m_begin, m_end) / 2); + if (byte0 == 0 && byte1 == 0 && byte2 == static_cast(0xFEu) && byte3 == static_cast(0xFFu)) { + has_bom = true; + return utf_encode_t::UTF_32BE; + } - int shift_bits[2] {0, 0}; - if (m_encode_type == utf_encode_t::UTF_16BE) { - shift_bits[0] = 8; - } - else // m_encode_type == utf_encode_t::UTF_16LE - { - shift_bits[1] = 8; - } + if (byte0 == static_cast(0xFFu) && byte1 == static_cast(0xFEu) && byte2 == 0 && byte3 == 0) { + has_bom = true; + return utf_encode_t::UTF_32LE; + } - std::array encoded_buffer {{0, 0}}; - uint32_t encoded_buf_size {0}; - std::array utf8_buffer {{0, 0, 0, 0}}; - uint32_t utf8_buf_size {0}; + if (byte0 == static_cast(0xFEu) && byte1 == static_cast(0xFFu)) { + has_bom = true; + return utf_encode_t::UTF_16BE; + } - IterType current = m_begin; - while (current != m_end || encoded_buf_size != 0) { - while (current != m_end && encoded_buf_size < 2) { - auto utf16 = static_cast(static_cast(*current) << shift_bits[0]); - utf16 |= static_cast(static_cast(*++current) << shift_bits[1]); - ++current; + if (byte0 == static_cast(0xFFu) && byte1 == static_cast(0xFEu)) { + has_bom = true; + return utf_encode_t::UTF_16LE; + } - // skip appending CRs. - if FK_YAML_LIKELY (utf16 != char16_t(0x000Du)) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-constant-array-index) - encoded_buffer[encoded_buf_size++] = utf16; - } - } + // Test the first character assuming it's an ASCII character. - uint32_t consumed_size = 0; - utf8::from_utf16(encoded_buffer, utf8_buffer, consumed_size, utf8_buf_size); + if (byte0 == 0 && byte1 == 0 && byte2 == 0 && 0 < byte3 && byte3 < static_cast(0x80u)) { + return utf_encode_t::UTF_32BE; + } - if FK_YAML_LIKELY (consumed_size == 1) { - encoded_buffer[0] = encoded_buffer[1]; - } - encoded_buf_size -= consumed_size; + if (0 < byte0 && byte0 < static_cast(0x80u) && byte1 == 0 && byte2 == 0 && byte3 == 0) { + return utf_encode_t::UTF_32LE; + } - m_buffer.append(reinterpret_cast(utf8_buffer.data()), utf8_buf_size); - } + if (byte0 == 0 && 0 < byte1 && byte1 < static_cast(0x80u)) { + return utf_encode_t::UTF_16BE; + } - return str_view {m_buffer.begin(), m_buffer.end()}; + if (0 < byte0 && byte0 < static_cast(0x80u) && byte1 == 0) { + return utf_encode_t::UTF_16LE; } - /// @brief The concrete implementation of get_buffer_view() for UTF-32 encoded inputs. - /// @return View into the UTF-8 encoded input buffer contents. - str_view get_buffer_view_utf32() { - FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); + return utf_encode_t::UTF_8; +} - // Assume the input characters are all ASCII characters. - // That's the most probably the case. - m_buffer.reserve(std::distance(m_begin, m_end) / 4); +/// @brief A class which detects UTF encoding type and the existence of a BOM at the beginning. +/// @tparam ItrType Type of iterators for the input. +template +struct utf_encode_detector {}; - int shift_bits[4] {0, 0, 0, 0}; - if (m_encode_type == utf_encode_t::UTF_32BE) { - shift_bits[0] = 24; - shift_bits[1] = 16; - shift_bits[2] = 8; - } - else // m_encode_type == utf_encode_t::UTF_32LE - { - shift_bits[1] = 8; - shift_bits[2] = 16; - shift_bits[3] = 24; +/// @brief The partial specialization of utf_encode_detector for char iterators. +/// @tparam ItrType An iterator type. +template +struct utf_encode_detector::value>> { + /// @brief Detects the encoding type of the input, and consumes a BOM if it exists. + /// @param begin The iterator to the first element of an input. + /// @param end The iterator to the past-the end element of an input. + /// @return A detected encoding type. + static utf_encode_t detect(ItrType& begin, const ItrType& end) noexcept { + if FK_YAML_UNLIKELY (begin == end) { + return utf_encode_t::UTF_8; } - std::array utf8_buffer {{0, 0, 0, 0}}; - uint32_t utf8_buf_size {0}; + // the inner curly braces are necessary for older compilers + std::array bytes {{}}; + bytes.fill(0xFFu); + auto current = begin; + for (int i = 0; i < 4 && current != end; i++, ++current) { + bytes[i] = static_cast(*current); // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index) + } - IterType current = m_begin; - while (current != m_end) { - auto utf32 = static_cast(*current << shift_bits[0]); - ++current; - utf32 |= static_cast(*current << shift_bits[1]); - ++current; - utf32 |= static_cast(*current << shift_bits[2]); - ++current; - utf32 |= static_cast(*current << shift_bits[3]); - ++current; + bool has_bom = false; + const utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); - if FK_YAML_LIKELY (utf32 != char32_t(0x0000000Du)) { - utf8::from_utf32(utf32, utf8_buffer, utf8_buf_size); - m_buffer.append(reinterpret_cast(utf8_buffer.data()), utf8_buf_size); + if (has_bom) { + // skip reading the BOM. + switch (encode_type) { + case utf_encode_t::UTF_8: + std::advance(begin, 3); + break; + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + std::advance(begin, 2); + break; + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + std::advance(begin, 4); + break; } } - return str_view {m_buffer.begin(), m_buffer.end()}; + return encode_type; } - -private: - /// The iterator at the beginning of input. - IterType m_begin {}; - /// The iterator at the end of input. - IterType m_end {}; - /// The encoding type for this input adapter. - utf_encode_t m_encode_type {utf_encode_t::UTF_8}; - /// The normalized owned buffer. - std::string m_buffer; - /// Whether ItrType is a contiguous iterator. - bool m_is_contiguous {false}; }; #if FK_YAML_HAS_CHAR8_T -/// @brief An input adapter for iterators of type char8_t. -/// @tparam IterType An iterator type. -template -class iterator_input_adapter::value>> { -public: - /// @brief Construct a new iterator_input_adapter object. - iterator_input_adapter() = default; - - /// @brief Construct a new iterator_input_adapter object. - /// @param begin The beginning of iterators. - /// @param end The end of iterators. - /// @param encode_type The encoding type for this input adapter. - /// @param is_contiguous Whether iterators are contiguous or not. - iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type, bool is_contiguous) noexcept - : m_begin(begin), - m_end(end), - m_encode_type(encode_type), - m_is_contiguous(is_contiguous) { - // char8_t characters must be encoded in the UTF-8 format. - // See https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0482r6.html. - FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); - } - - // allow only move construct/assignment like other input adapters. - iterator_input_adapter(const iterator_input_adapter&) = delete; - iterator_input_adapter(iterator_input_adapter&& rhs) = default; - iterator_input_adapter& operator=(const iterator_input_adapter&) = delete; - iterator_input_adapter& operator=(iterator_input_adapter&&) = default; - ~iterator_input_adapter() = default; - - /// @brief Get view into the input buffer contents. - /// @return View into the input buffer contents. - str_view get_buffer_view() { - if FK_YAML_UNLIKELY (m_begin == m_end) { - return {}; +/// @brief The partial specialization of utf_encode_detector for char8_t iterators. +/// @tparam ItrType An iterator type. +template +struct utf_encode_detector::value>> { + /// @brief Detects the encoding type of the input, and consumes a BOM if it exists. + /// @param begin The iterator to the first element of an input. + /// @param end The iterator to the past-the end element of an input. + /// @return A detected encoding type. + static utf_encode_t detect(ItrType& begin, const ItrType& end) { + if FK_YAML_UNLIKELY (begin == end) { + return utf_encode_t::UTF_8; } - IterType current = m_begin; - std::deque cr_itrs {}; - while (current != m_end) { - const auto first = static_cast(*current); - const uint32_t num_bytes = utf8::get_num_bytes(first); + std::array bytes {}; + bytes.fill(0xFFu); + auto current = begin; + for (int i = 0; i < 4 && current != end; i++, ++current) { + bytes[i] = uint8_t(*current); // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index) + } - switch (num_bytes) { - case 1: - if FK_YAML_UNLIKELY (first == 0x0D /*CR*/) { - cr_itrs.emplace_back(current); - } - break; - case 2: { - const auto second = static_cast(*++current); - const bool is_valid = utf8::validate(first, second); - if FK_YAML_UNLIKELY (!is_valid) { - throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", {first, second}); - } - break; - } - case 3: { - const auto second = static_cast(*++current); - const auto third = static_cast(*++current); - const bool is_valid = utf8::validate(first, second, third); - if FK_YAML_UNLIKELY (!is_valid) { - throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", {first, second, third}); - } - break; - } - case 4: { - const auto second = static_cast(*++current); - const auto third = static_cast(*++current); - const auto fourth = static_cast(*++current); - const bool is_valid = utf8::validate(first, second, third, fourth); - if FK_YAML_UNLIKELY (!is_valid) { - throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", {first, second, third, fourth}); - } - break; - } - default: // LCOV_EXCL_LINE - unreachable(); // LCOV_EXCL_LINE - } + bool has_bom = false; + const utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); - ++current; + if FK_YAML_UNLIKELY (encode_type != utf_encode_t::UTF_8) { + throw exception("char8_t characters must be encoded in the UTF-8 format."); } - m_buffer.reserve(std::distance(m_begin, m_end) - cr_itrs.size()); - current = m_begin; - for (const auto& cr_itr : cr_itrs) { - std::transform( - current, cr_itr, std::back_inserter(m_buffer), [](char8_t c) { return static_cast(c); }); - current = std::next(cr_itr); + if (has_bom) { + // skip reading the BOM. + std::advance(begin, 3); } - std::transform(current, m_end, std::back_inserter(m_buffer), [](char8_t c) { return static_cast(c); }); - return str_view {m_buffer.begin(), m_buffer.end()}; + return encode_type; } - -private: - /// The iterator at the beginning of input. - IterType m_begin {}; - /// The iterator at the end of input. - IterType m_end {}; - /// The encoding type for this input adapter. - utf_encode_t m_encode_type {utf_encode_t::UTF_8}; - /// The normalized owned buffer. - std::string m_buffer; - /// Whether ItrType is a contiguous iterator. - bool m_is_contiguous {false}; }; #endif // FK_YAML_HAS_CHAR8_T -/// @brief An input adapter for iterators of type char16_t. -/// @tparam IterType An iterator type. -template -class iterator_input_adapter::value>> { -public: - /// @brief Construct a new iterator_input_adapter object. - iterator_input_adapter() = default; +/// @brief The partial specialization of utf_encode_detector for char16_t iterators. +/// @tparam ItrType An iterator type. +template +struct utf_encode_detector::value>> { + /// @brief Detects the encoding type of the input, and consumes a BOM if it exists. + /// @param begin The iterator to the first element of an input. + /// @param end The iterator to the past-the end element of an input. + /// @return A detected encoding type. + static utf_encode_t detect(ItrType& begin, const ItrType& end) { + if FK_YAML_UNLIKELY (begin == end) { + return utf_encode_t::UTF_16BE; + } - /// @brief Construct a new iterator_input_adapter object. - /// @param begin The beginning of iterators. - /// @param end The end of iterators. - /// @param encode_type The encoding type for this input adapter. - /// @param is_contiguous Whether iterators are contiguous or not. - iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type, bool is_contiguous) noexcept - : m_begin(begin), - m_end(end), - m_encode_type(encode_type), - m_is_contiguous(is_contiguous) { - FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); + // the inner curly braces are necessary for older compilers + std::array bytes {{}}; + bytes.fill(0xFFu); + auto current = begin; + for (int i = 0; i < 2 && current != end; i++, ++current) { + // NOLINTBEGIN(cppcoreguidelines-pro-bounds-constant-array-index) + const char16_t elem = *current; + const int idx_base = i * 2; + bytes[idx_base] = static_cast(elem >> 8); + bytes[idx_base + 1] = static_cast(elem); + // NOLINTEND(cppcoreguidelines-pro-bounds-constant-array-index) + } + + bool has_bom = false; + const utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + if FK_YAML_UNLIKELY (encode_type != utf_encode_t::UTF_16BE && encode_type != utf_encode_t::UTF_16LE) { + throw exception("char16_t characters must be encoded in the UTF-16 format."); + } + + if (has_bom) { + // skip reading the BOM. + std::advance(begin, 1); + } + + return encode_type; } +}; - // allow only move construct/assignment like other input adapters. - iterator_input_adapter(const iterator_input_adapter&) = delete; - iterator_input_adapter(iterator_input_adapter&& rhs) = default; - iterator_input_adapter& operator=(const iterator_input_adapter&) = delete; - iterator_input_adapter& operator=(iterator_input_adapter&&) = default; - ~iterator_input_adapter() = default; +/// @brief The partial specialization of utf_encode_detector for char32_t iterators. +/// @tparam ItrType An iterator type. +template +struct utf_encode_detector::value>> { + /// @brief Detects the encoding type of the input, and consumes a BOM if it exists. + /// @param begin The iterator to the first element of an input. + /// @param end The iterator to the past-the end element of an input. + /// @return A detected encoding type. + static utf_encode_t detect(ItrType& begin, const ItrType& end) { + if FK_YAML_UNLIKELY (begin == end) { + return utf_encode_t::UTF_32BE; + } - /// @brief Get view into the input buffer contents. - /// @return View into the input buffer contents. - str_view get_buffer_view() { - if FK_YAML_UNLIKELY (m_begin == m_end) { - return {}; + // the inner curly braces are necessary for older compilers + std::array bytes {{}}; + const char32_t elem = *begin; + bytes[0] = static_cast(elem >> 24); + bytes[1] = static_cast(elem >> 16); + bytes[2] = static_cast(elem >> 8); + bytes[3] = static_cast(elem); + + bool has_bom = false; + const utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + if FK_YAML_UNLIKELY (encode_type != utf_encode_t::UTF_32BE && encode_type != utf_encode_t::UTF_32LE) { + throw exception("char32_t characters must be encoded in the UTF-32 format."); } - const int shift_bits = (m_encode_type == utf_encode_t::UTF_16BE) ? 0 : 8; + if (has_bom) { + // skip reading the BOM. + std::advance(begin, 1); + } - std::array encoded_buffer {{0, 0}}; - uint32_t encoded_buf_size {0}; - std::array utf8_buffer {{0, 0, 0, 0}}; - uint32_t utf8_buf_size {0}; + return encode_type; + } +}; - // Assume the input characters are all ASCII characters. - // That's the most probably the case. - m_buffer.reserve(std::distance(m_begin, m_end)); +/// @brief A class which detects UTF encoding type and the existence of a BOM from the input file. +struct file_utf_encode_detector { + /// @brief Detects the encoding type of the input, and consumes a BOM if it exists. + /// @param p_file The input file handle. + /// @return A detected encoding type. + static utf_encode_t detect(std::FILE* p_file) noexcept { + // the inner curly braces are necessary for older compilers + std::array bytes {{}}; + bytes.fill(0xFFu); + for (int i = 0; i < 4; i++) { + char byte = 0; + const std::size_t size = std::fread(&byte, sizeof(char), 1, p_file); + if (size != sizeof(char)) { + break; + } + bytes[i] = static_cast(byte & 0xFF); // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index) + } - IterType current = m_begin; - while (current != m_end || encoded_buf_size != 0) { - while (current != m_end && encoded_buf_size < 2) { - char16_t utf16 = *current; - ++current; - utf16 = static_cast(((utf16 & 0x00FFu) << shift_bits) | ((utf16 & 0xFF00u) >> shift_bits)); + bool has_bom = false; + const utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); - if FK_YAML_LIKELY (utf16 != char16_t(0x000Du)) { - // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-constant-array-index) - encoded_buffer[encoded_buf_size++] = utf16; - } + // move back to the beginning if a BOM doesn't exist. + long offset = 0; // NOLINT(google-runtime-int) + if (has_bom) { + switch (encode_type) { + case utf_encode_t::UTF_8: + offset = 3; + break; + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + offset = 2; + break; + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + offset = 4; + break; } + } + std::fseek(p_file, offset, SEEK_SET); // NOLINT(cert-err33-c) - uint32_t consumed_size = 0; - utf8::from_utf16(encoded_buffer, utf8_buffer, consumed_size, utf8_buf_size); + return encode_type; + } +}; - if FK_YAML_LIKELY (consumed_size == 1) { - encoded_buffer[0] = encoded_buffer[1]; - encoded_buffer[1] = 0; +/// @brief A class which detects UTF encoding type and the existence of a BOM from the input file. +struct stream_utf_encode_detector { + /// @brief Detects the encoding type of the input, and consumes a BOM if it exists. + /// @param p_file The input file handle. + /// @return A detected encoding type. + static utf_encode_t detect(std::istream& is) noexcept { + // the inner curly braces are necessary for older compilers + std::array bytes {{}}; + bytes.fill(0xFFu); + for (int i = 0; i < 4; i++) { + char ch = 0; + is.read(&ch, 1); + const std::streamsize size = is.gcount(); + if (size != 1) { + // without this, seekg() will fail. + is.clear(); + break; } - encoded_buf_size -= consumed_size; + bytes[i] = static_cast(ch & 0xFF); // NOLINT(cppcoreguidelines-pro-bounds-constant-array-index) + } - m_buffer.append(reinterpret_cast(utf8_buffer.data()), utf8_buf_size); + bool has_bom = false; + const utf_encode_t encode_type = detect_encoding_type(bytes, has_bom); + + // move back to the beginning if a BOM doesn't exist. + std::streamoff offset = 0; + if (has_bom) { + switch (encode_type) { + case utf_encode_t::UTF_8: + offset = 3; + break; + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + offset = 2; + break; + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + offset = 4; + break; + } } + is.seekg(offset, std::ios_base::beg); - return str_view {m_buffer.begin(), m_buffer.end()}; + return encode_type; } - -private: - /// The iterator at the beginning of input. - IterType m_begin {}; - /// The iterator at the end of input. - IterType m_end {}; - /// The encoding type for this input adapter. - utf_encode_t m_encode_type {utf_encode_t::UTF_16BE}; - /// The normalized owned buffer. - std::string m_buffer; - /// Whether ItrType is a contiguous iterator. - bool m_is_contiguous {false}; }; -/// @brief An input adapter for iterators of type char32_t. -/// @tparam IterType An iterator type. -template -class iterator_input_adapter::value>> { -public: - /// @brief Construct a new iterator_input_adapter object. - iterator_input_adapter() = default; +FK_YAML_DETAIL_NAMESPACE_END - /// @brief Construct a new iterator_input_adapter object. - /// @param begin The beginning of iterators. - /// @param end The end of iterators. - /// @param encode_type The encoding type for this input adapter. - /// @param is_contiguous Whether iterators are contiguous or not. - iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type, bool is_contiguous) noexcept - : m_begin(begin), - m_end(end), - m_encode_type(encode_type), - m_is_contiguous(is_contiguous) { - FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); - } +#endif /* FK_YAML_DETAIL_ENCODINGS_UTF_ENCODE_DETECTOR_HPP */ - // allow only move construct/assignment like other input adapters. - iterator_input_adapter(const iterator_input_adapter&) = delete; - iterator_input_adapter(iterator_input_adapter&& rhs) = default; - iterator_input_adapter& operator=(const iterator_input_adapter&) = delete; - iterator_input_adapter& operator=(iterator_input_adapter&&) = default; - ~iterator_input_adapter() = default; +// #include - /// @brief Get view into the input buffer contents. - /// @return View into the input buffer contents. - str_view get_buffer_view() { - if FK_YAML_UNLIKELY (m_begin == m_end) { - return {}; - } +// #include - int shift_bits[4] {0, 0, 0, 0}; - if (m_encode_type == utf_encode_t::UTF_32LE) { - shift_bits[0] = 24; - shift_bits[1] = 8; - shift_bits[2] = 8; - shift_bits[3] = 24; - } +// #include - std::array utf8_buffer {{0, 0, 0, 0}}; - uint32_t utf8_buf_size {0}; +// #include - // Assume the input characters are all ASCII characters. - // That's the most probably the case. - m_buffer.reserve(std::distance(m_begin, m_end)); +// #include - IterType current = m_begin; - while (current != m_end) { - const char32_t tmp = *current; - ++current; - const auto utf32 = static_cast( - ((tmp & 0xFF000000u) >> shift_bits[0]) | ((tmp & 0x00FF0000u) >> shift_bits[1]) | - ((tmp & 0x0000FF00u) << shift_bits[2]) | ((tmp & 0x000000FFu) << shift_bits[3])); +// #include - if FK_YAML_UNLIKELY (utf32 != static_cast(0x0000000Du)) { - utf8::from_utf32(utf32, utf8_buffer, utf8_buf_size); - m_buffer.append(reinterpret_cast(utf8_buffer.data()), utf8_buf_size); - } - } - return str_view {m_buffer.begin(), m_buffer.end()}; - } +FK_YAML_DETAIL_NAMESPACE_BEGIN -private: - /// The iterator at the beginning of input. - IterType m_begin {}; - /// The iterator at the end of input. - IterType m_end {}; - /// The encoding type for this input adapter. - utf_encode_t m_encode_type {utf_encode_t::UTF_32BE}; - /// The normalized owned buffer. - std::string m_buffer; - /// Whether ItrType is a contiguous iterator. - bool m_is_contiguous {false}; -}; +/////////////////////// +// input_adapter // +/////////////////////// -/// @brief An input adapter for C-style file handles. -class file_input_adapter { +template +class iterator_input_adapter; + +/// @brief An input adapter for iterators of type char. +/// @tparam IterType An iterator type. +template +class iterator_input_adapter::value>> { public: - /// @brief Construct a new file_input_adapter object. - file_input_adapter() = default; + /// @brief Construct a new iterator_input_adapter object. + iterator_input_adapter() = default; - /// @brief Construct a new file_input_adapter object. - /// @note - /// This class doesn't call fopen() nor fclose(). - /// It's user's responsibility to call those functions. - /// @param file A file handle for this adapter. (A non-null pointer is assumed.) + /// @brief Construct a new iterator_input_adapter object. + /// @param begin The beginning of iterators. + /// @param end The end of iterators. /// @param encode_type The encoding type for this input adapter. - explicit file_input_adapter(std::FILE* file, utf_encode_t encode_type) noexcept - : m_file(file), - m_encode_type(encode_type) { + /// @param is_contiguous Whether iterators are contiguous or not. + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type, bool is_contiguous) noexcept + : m_begin(begin), + m_end(end), + m_encode_type(encode_type), + m_is_contiguous(is_contiguous) { } - // allow only move construct/assignment - file_input_adapter(const file_input_adapter&) = delete; - file_input_adapter(file_input_adapter&& rhs) = default; - file_input_adapter& operator=(const file_input_adapter&) = delete; - file_input_adapter& operator=(file_input_adapter&&) = default; - ~file_input_adapter() = default; + // allow only move construct/assignment like other input adapters. + iterator_input_adapter(const iterator_input_adapter&) = delete; + iterator_input_adapter(iterator_input_adapter&& rhs) = default; + iterator_input_adapter& operator=(const iterator_input_adapter&) = delete; + iterator_input_adapter& operator=(iterator_input_adapter&&) = default; + ~iterator_input_adapter() = default; /// @brief Get view into the input buffer contents. /// @return View into the input buffer contents. str_view get_buffer_view() { + if FK_YAML_UNLIKELY (m_begin == m_end) { + return {}; + } + + m_buffer.clear(); + switch (m_encode_type) { case utf_encode_t::UTF_8: return get_buffer_view_utf8(); @@ -9408,42 +9265,20 @@ class file_input_adapter { str_view get_buffer_view_utf8() { FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); - m_buffer.clear(); - char tmp_buf[256] {}; - constexpr std::size_t buf_size = sizeof(tmp_buf) / sizeof(tmp_buf[0]); - std::size_t read_size = 0; - while ((read_size = std::fread(&tmp_buf[0], sizeof(char), buf_size, m_file)) > 0) { - char* p_current = &tmp_buf[0]; - char* p_end = p_current + read_size; - - // copy tmp_buf to m_buffer, dropping CRs. - char* p_cr = p_current; - do { - if FK_YAML_UNLIKELY (*p_cr == '\r') { - m_buffer.append(p_current, p_cr); - p_current = p_cr + 1; - } - ++p_cr; - } while (p_cr != p_end); - - m_buffer.append(p_current, p_end); - } - - if FK_YAML_UNLIKELY (m_buffer.empty()) { - return {}; - } - - auto current = m_buffer.begin(); - auto end = m_buffer.end(); - while (current != end) { - const auto first = static_cast(*current++); + IterType current = m_begin; + std::deque cr_itrs {}; + while (current != m_end) { + const auto first = static_cast(*current); const uint32_t num_bytes = utf8::get_num_bytes(first); switch (num_bytes) { case 1: + if FK_YAML_UNLIKELY (first == 0x0D /*CR*/) { + cr_itrs.emplace_back(current); + } break; case 2: { - const auto second = static_cast(*current++); + const auto second = static_cast(*++current); const bool is_valid = utf8::validate(first, second); if FK_YAML_UNLIKELY (!is_valid) { throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", {first, second}); @@ -9451,8 +9286,8 @@ class file_input_adapter { break; } case 3: { - const auto second = static_cast(*current++); - const auto third = static_cast(*current++); + const auto second = static_cast(*++current); + const auto third = static_cast(*++current); const bool is_valid = utf8::validate(first, second, third); if FK_YAML_UNLIKELY (!is_valid) { throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", {first, second, third}); @@ -9460,9 +9295,9 @@ class file_input_adapter { break; } case 4: { - const auto second = static_cast(*current++); - const auto third = static_cast(*current++); - const auto fourth = static_cast(*current++); + const auto second = static_cast(*++current); + const auto third = static_cast(*++current); + const auto fourth = static_cast(*++current); const bool is_valid = utf8::validate(first, second, third, fourth); if FK_YAML_UNLIKELY (!is_valid) { throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", {first, second, third, fourth}); @@ -9472,7 +9307,25 @@ class file_input_adapter { default: // LCOV_EXCL_LINE unreachable(); // LCOV_EXCL_LINE } + + ++current; + } + + const bool is_contiguous_no_cr = cr_itrs.empty() && m_is_contiguous; + if FK_YAML_LIKELY (is_contiguous_no_cr) { + // The input iterators (begin, end) can be used as-is during parsing. + FK_YAML_ASSERT(m_begin != m_end); + return str_view {&*m_begin, static_cast(std::distance(m_begin, m_end))}; + } + + m_buffer.reserve(std::distance(m_begin, m_end) - cr_itrs.size()); + + current = m_begin; + for (const auto& cr_itr : cr_itrs) { + m_buffer.append(current, cr_itr); + current = std::next(cr_itr); } + m_buffer.append(current, m_end); return str_view {m_buffer.begin(), m_buffer.end()}; } @@ -9482,26 +9335,33 @@ class file_input_adapter { str_view get_buffer_view_utf16() { FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); + // Assume the input characters are all ASCII characters. + // That's the most probably the case. + m_buffer.reserve(std::distance(m_begin, m_end) / 2); + int shift_bits[2] {0, 0}; if (m_encode_type == utf_encode_t::UTF_16BE) { shift_bits[0] = 8; } - else { // m_encode_type == utf_encode_t::UTF_16LE + else // m_encode_type == utf_encode_t::UTF_16LE + { shift_bits[1] = 8; } - char chars[2] = {0, 0}; std::array encoded_buffer {{0, 0}}; uint32_t encoded_buf_size {0}; std::array utf8_buffer {{0, 0, 0, 0}}; uint32_t utf8_buf_size {0}; - while (std::feof(m_file) == 0) { - while (encoded_buf_size < 2 && std::fread(&chars[0], sizeof(char), 2, m_file) == 2) { - const auto utf16 = static_cast( - (static_cast(chars[0]) << shift_bits[0]) | - (static_cast(chars[1]) << shift_bits[1])); - if FK_YAML_LIKELY (utf16 != static_cast(0x000Du)) { + IterType current = m_begin; + while (current != m_end || encoded_buf_size != 0) { + while (current != m_end && encoded_buf_size < 2) { + auto utf16 = static_cast(static_cast(*current) << shift_bits[0]); + utf16 |= static_cast(static_cast(*++current) << shift_bits[1]); + ++current; + + // skip appending CRs. + if FK_YAML_LIKELY (utf16 != char16_t(0x000Du)) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-constant-array-index) encoded_buffer[encoded_buf_size++] = utf16; } @@ -9526,31 +9386,36 @@ class file_input_adapter { str_view get_buffer_view_utf32() { FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); + // Assume the input characters are all ASCII characters. + // That's the most probably the case. + m_buffer.reserve(std::distance(m_begin, m_end) / 4); + int shift_bits[4] {0, 0, 0, 0}; if (m_encode_type == utf_encode_t::UTF_32BE) { shift_bits[0] = 24; shift_bits[1] = 16; shift_bits[2] = 8; } - else { // m_encode_type == utf_encode_t::UTF_32LE + else // m_encode_type == utf_encode_t::UTF_32LE + { shift_bits[1] = 8; shift_bits[2] = 16; shift_bits[3] = 24; } - char chars[4] = {0, 0, 0, 0}; std::array utf8_buffer {{0, 0, 0, 0}}; uint32_t utf8_buf_size {0}; - while (std::feof(m_file) == 0) { - const std::size_t size = std::fread(&chars[0], sizeof(char), 4, m_file); - if (size != 4) { - break; - } - - const auto utf32 = static_cast( - (static_cast(chars[0]) << shift_bits[0]) | (static_cast(chars[1]) << shift_bits[1]) | - (static_cast(chars[2]) << shift_bits[2]) | (static_cast(chars[3]) << shift_bits[3])); + IterType current = m_begin; + while (current != m_end) { + auto utf32 = static_cast(*current << shift_bits[0]); + ++current; + utf32 |= static_cast(*current << shift_bits[1]); + ++current; + utf32 |= static_cast(*current << shift_bits[2]); + ++current; + utf32 |= static_cast(*current << shift_bits[3]); + ++current; if FK_YAML_LIKELY (utf32 != char32_t(0x0000000Du)) { utf8::from_utf32(utf32, utf8_buffer, utf8_buf_size); @@ -9562,98 +9427,71 @@ class file_input_adapter { } private: - /// A pointer to the input file handle. - std::FILE* m_file {nullptr}; + /// The iterator at the beginning of input. + IterType m_begin {}; + /// The iterator at the end of input. + IterType m_end {}; /// The encoding type for this input adapter. utf_encode_t m_encode_type {utf_encode_t::UTF_8}; /// The normalized owned buffer. std::string m_buffer; + /// Whether ItrType is a contiguous iterator. + bool m_is_contiguous {false}; }; -/// @brief An input adapter for streams -class stream_input_adapter { +#if FK_YAML_HAS_CHAR8_T + +/// @brief An input adapter for iterators of type char8_t. +/// @tparam IterType An iterator type. +template +class iterator_input_adapter::value>> { public: - /// @brief Construct a new stream_input_adapter object. - stream_input_adapter() = default; + /// @brief Construct a new iterator_input_adapter object. + iterator_input_adapter() = default; - /// @brief Construct a new stream_input_adapter object. - /// @param is A reference to the target input stream. + /// @brief Construct a new iterator_input_adapter object. + /// @param begin The beginning of iterators. + /// @param end The end of iterators. /// @param encode_type The encoding type for this input adapter. - explicit stream_input_adapter(std::istream& is, utf_encode_t encode_type) noexcept - : m_istream(&is), - m_encode_type(encode_type) { + /// @param is_contiguous Whether iterators are contiguous or not. + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type, bool is_contiguous) noexcept + : m_begin(begin), + m_end(end), + m_encode_type(encode_type), + m_is_contiguous(is_contiguous) { + // char8_t characters must be encoded in the UTF-8 format. + // See https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0482r6.html. + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); } - // allow only move construct/assignment - stream_input_adapter(const stream_input_adapter&) = delete; - stream_input_adapter& operator=(const stream_input_adapter&) = delete; - stream_input_adapter(stream_input_adapter&&) = default; - stream_input_adapter& operator=(stream_input_adapter&&) = default; - ~stream_input_adapter() = default; + // allow only move construct/assignment like other input adapters. + iterator_input_adapter(const iterator_input_adapter&) = delete; + iterator_input_adapter(iterator_input_adapter&& rhs) = default; + iterator_input_adapter& operator=(const iterator_input_adapter&) = delete; + iterator_input_adapter& operator=(iterator_input_adapter&&) = default; + ~iterator_input_adapter() = default; /// @brief Get view into the input buffer contents. - /// @return View into the input buffer contents. - str_view get_buffer_view() { - switch (m_encode_type) { - case utf_encode_t::UTF_8: - return get_buffer_view_utf8(); - case utf_encode_t::UTF_16BE: - case utf_encode_t::UTF_16LE: - return get_buffer_view_utf16(); - case utf_encode_t::UTF_32BE: - case utf_encode_t::UTF_32LE: - return get_buffer_view_utf32(); - default: // LCOV_EXCL_LINE - detail::unreachable(); // LCOV_EXCL_LINE - } - } - -private: - /// @brief The concrete implementation of get_buffer_view() for UTF-8 encoded inputs. - /// @return View into the UTF-8 encoded input buffer contents. - str_view get_buffer_view_utf8() { - FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); - - m_buffer.clear(); - char tmp_buf[256] {}; - do { - m_istream->read(&tmp_buf[0], 256); - const auto read_size = static_cast(m_istream->gcount()); - if FK_YAML_UNLIKELY (read_size == 0) { - break; - } - - char* p_current = &tmp_buf[0]; - char* p_end = p_current + read_size; - - // copy tmp_buf to m_buffer, dropping CRs. - char* p_cr = p_current; - do { - if FK_YAML_UNLIKELY (*p_cr == '\r') { - m_buffer.append(p_current, p_cr); - p_current = p_cr + 1; - } - ++p_cr; - } while (p_cr != p_end); - - m_buffer.append(p_current, p_end); - } while (!m_istream->eof()); - - if FK_YAML_UNLIKELY (m_buffer.empty()) { + /// @return View into the input buffer contents. + str_view get_buffer_view() { + if FK_YAML_UNLIKELY (m_begin == m_end) { return {}; } - auto current = m_buffer.begin(); - auto end = m_buffer.end(); - while (current != end) { - const auto first = static_cast(*current++); + IterType current = m_begin; + std::deque cr_itrs {}; + while (current != m_end) { + const auto first = static_cast(*current); const uint32_t num_bytes = utf8::get_num_bytes(first); switch (num_bytes) { case 1: + if FK_YAML_UNLIKELY (first == 0x0D /*CR*/) { + cr_itrs.emplace_back(current); + } break; case 2: { - const auto second = static_cast(*current++); + const auto second = static_cast(*++current); const bool is_valid = utf8::validate(first, second); if FK_YAML_UNLIKELY (!is_valid) { throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", {first, second}); @@ -9661,8 +9499,8 @@ class stream_input_adapter { break; } case 3: { - const auto second = static_cast(*current++); - const auto third = static_cast(*current++); + const auto second = static_cast(*++current); + const auto third = static_cast(*++current); const bool is_valid = utf8::validate(first, second, third); if FK_YAML_UNLIKELY (!is_valid) { throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", {first, second, third}); @@ -9670,9 +9508,9 @@ class stream_input_adapter { break; } case 4: { - const auto second = static_cast(*current++); - const auto third = static_cast(*current++); - const auto fourth = static_cast(*current++); + const auto second = static_cast(*++current); + const auto third = static_cast(*++current); + const auto fourth = static_cast(*++current); const bool is_valid = utf8::validate(first, second, third, fourth); if FK_YAML_UNLIKELY (!is_valid) { throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", {first, second, third, fourth}); @@ -9682,43 +9520,91 @@ class stream_input_adapter { default: // LCOV_EXCL_LINE unreachable(); // LCOV_EXCL_LINE } + + ++current; + } + + m_buffer.reserve(std::distance(m_begin, m_end) - cr_itrs.size()); + current = m_begin; + for (const auto& cr_itr : cr_itrs) { + std::transform( + current, cr_itr, std::back_inserter(m_buffer), [](char8_t c) { return static_cast(c); }); + current = std::next(cr_itr); } + std::transform(current, m_end, std::back_inserter(m_buffer), [](char8_t c) { return static_cast(c); }); return str_view {m_buffer.begin(), m_buffer.end()}; } - /// @brief The concrete implementation of get_buffer_view() for UTF-16 encoded inputs. - /// @return View into the UTF-8 encoded input buffer contents. - str_view get_buffer_view_utf16() { +private: + /// The iterator at the beginning of input. + IterType m_begin {}; + /// The iterator at the end of input. + IterType m_end {}; + /// The encoding type for this input adapter. + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; + /// The normalized owned buffer. + std::string m_buffer; + /// Whether ItrType is a contiguous iterator. + bool m_is_contiguous {false}; +}; + +#endif // FK_YAML_HAS_CHAR8_T + +/// @brief An input adapter for iterators of type char16_t. +/// @tparam IterType An iterator type. +template +class iterator_input_adapter::value>> { +public: + /// @brief Construct a new iterator_input_adapter object. + iterator_input_adapter() = default; + + /// @brief Construct a new iterator_input_adapter object. + /// @param begin The beginning of iterators. + /// @param end The end of iterators. + /// @param encode_type The encoding type for this input adapter. + /// @param is_contiguous Whether iterators are contiguous or not. + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type, bool is_contiguous) noexcept + : m_begin(begin), + m_end(end), + m_encode_type(encode_type), + m_is_contiguous(is_contiguous) { FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); + } - int shift_bits[2] {0, 0}; - if (m_encode_type == utf_encode_t::UTF_16BE) { - shift_bits[0] = 8; - } - else { // m_encode_type == utf_encode_t::UTF_16LE - shift_bits[1] = 8; + // allow only move construct/assignment like other input adapters. + iterator_input_adapter(const iterator_input_adapter&) = delete; + iterator_input_adapter(iterator_input_adapter&& rhs) = default; + iterator_input_adapter& operator=(const iterator_input_adapter&) = delete; + iterator_input_adapter& operator=(iterator_input_adapter&&) = default; + ~iterator_input_adapter() = default; + + /// @brief Get view into the input buffer contents. + /// @return View into the input buffer contents. + str_view get_buffer_view() { + if FK_YAML_UNLIKELY (m_begin == m_end) { + return {}; } - char chars[2] = {0, 0}; + const int shift_bits = (m_encode_type == utf_encode_t::UTF_16BE) ? 0 : 8; + std::array encoded_buffer {{0, 0}}; uint32_t encoded_buf_size {0}; std::array utf8_buffer {{0, 0, 0, 0}}; uint32_t utf8_buf_size {0}; - do { - while (encoded_buf_size < 2) { - m_istream->read(&chars[0], 2); - const std::streamsize size = m_istream->gcount(); - if FK_YAML_UNLIKELY (size != 2) { - break; - } + // Assume the input characters are all ASCII characters. + // That's the most probably the case. + m_buffer.reserve(std::distance(m_begin, m_end)); - const auto utf16 = static_cast( - (static_cast(chars[0]) << shift_bits[0]) | - (static_cast(chars[1]) << shift_bits[1])); + IterType current = m_begin; + while (current != m_end || encoded_buf_size != 0) { + while (current != m_end && encoded_buf_size < 2) { + char16_t utf16 = *current; + ++current; + utf16 = static_cast(((utf16 & 0x00FFu) << shift_bits) | ((utf16 & 0xFF00u) >> shift_bits)); - if FK_YAML_LIKELY (utf16 != static_cast(0x000Du)) { + if FK_YAML_LIKELY (utf16 != char16_t(0x000Du)) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-constant-array-index) encoded_buffer[encoded_buf_size++] = utf16; } @@ -9729,929 +9615,1084 @@ class stream_input_adapter { if FK_YAML_LIKELY (consumed_size == 1) { encoded_buffer[0] = encoded_buffer[1]; + encoded_buffer[1] = 0; } encoded_buf_size -= consumed_size; m_buffer.append(reinterpret_cast(utf8_buffer.data()), utf8_buf_size); - } while (!m_istream->eof()); - - return str_view {m_buffer.begin(), m_buffer.end()}; - } - - /// @brief The concrete implementation of get_buffer_view() for UTF-32 encoded inputs. - /// @return View into the UTF-8 encoded input buffer contents. - str_view get_buffer_view_utf32() { - FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); - - int shift_bits[4] {0, 0, 0, 0}; - if (m_encode_type == utf_encode_t::UTF_32BE) { - shift_bits[0] = 24; - shift_bits[1] = 16; - shift_bits[2] = 8; - } - else { // m_encode_type == utf_encode_t::UTF_32LE - shift_bits[1] = 8; - shift_bits[2] = 16; - shift_bits[3] = 24; } - char chars[4] = {0, 0, 0, 0}; - std::array utf8_buffer {{0, 0, 0, 0}}; - uint32_t utf8_buf_size {0}; - - do { - m_istream->read(&chars[0], 4); - const std::streamsize size = m_istream->gcount(); - if FK_YAML_UNLIKELY (size != 4) { - break; - } - - const auto utf32 = static_cast( - (static_cast(chars[0]) << shift_bits[0]) | (static_cast(chars[1]) << shift_bits[1]) | - (static_cast(chars[2]) << shift_bits[2]) | (static_cast(chars[3]) << shift_bits[3])); - - if FK_YAML_LIKELY (utf32 != char32_t(0x0000000Du)) { - utf8::from_utf32(utf32, utf8_buffer, utf8_buf_size); - m_buffer.append(reinterpret_cast(utf8_buffer.data()), utf8_buf_size); - } - } while (!m_istream->eof()); - return str_view {m_buffer.begin(), m_buffer.end()}; } private: - /// A pointer to the input stream object. - std::istream* m_istream {nullptr}; + /// The iterator at the beginning of input. + IterType m_begin {}; + /// The iterator at the end of input. + IterType m_end {}; /// The encoding type for this input adapter. - utf_encode_t m_encode_type {utf_encode_t::UTF_8}; - /// The normalized owned buffer. - std::string m_buffer; -}; - -///////////////////////////////// -// input_adapter providers // -///////////////////////////////// - -/// @brief A concrete factory method for iterator_input_adapter objects with iterators. -/// @tparam ItrType An iterator type. -/// @param begin The beginning of iterators. -/// @param end The end of iterators. -/// @param is_contiguous Whether iterators refer to a contiguous byte array. -/// @return An iterator_input_adapter object for the target iterator type. -template -inline iterator_input_adapter create_iterator_input_adapter(ItrType begin, ItrType end, bool is_contiguous) { - const utf_encode_t encode_type = utf_encode_detector::detect(begin, end); - return iterator_input_adapter(begin, end, encode_type, is_contiguous); -} - -/// @brief A factory method for iterator_input_adapter objects with iterator values. -/// @tparam ItrType An iterator type. -/// @param begin The beginning of iterators. -/// @param end The end of iterators. -/// @return iterator_input_adapter An iterator_input_adapter object for the target iterator type. -template -inline iterator_input_adapter input_adapter(ItrType begin, ItrType end) { - bool is_contiguous = true; - const auto size = std::distance(begin, end); - - // Check if `begin` & `end` are contiguous iterators. - // Getting distance between begin and (end - 1) avoids dereferencing an invalid sentinel. - if FK_YAML_LIKELY (size > 0) { - using char_ptr_t = remove_cvref_t::pointer>; - char_ptr_t p_begin = &*begin; - char_ptr_t p_second_last = &*std::next(begin, size - 1); - is_contiguous = (p_second_last - p_begin == size); - } - return create_iterator_input_adapter(begin, end, is_contiguous); -} - -/// @brief A factory method for iterator_input_adapter objects with C-style arrays. -/// @tparam T A type of arrayed objects. -/// @tparam N A size of an array. -/// @return decltype(input_adapter(array, array + N)) An iterator_input_adapter object for the target array. -template -inline auto input_adapter(T (&array)[N]) -> decltype(create_iterator_input_adapter(array, array + (N - 1), true)) { - return create_iterator_input_adapter(array, array + (N - 1), true); -} - -/// @brief A namespace to implement container_input_adapter_factory for internal use. -namespace input_adapter_factory { - -using std::begin; -using std::end; - -/// @brief A factory of input adapters for containers. -/// @tparam ContainerType A container type. -/// @tparam typename N/A -template -struct container_input_adapter_factory {}; - -/// @brief A partial specialization of container_input_adapter_factory if begin()/end() are available for ContainerType. -/// @tparam ContainerType A container type. -template -struct container_input_adapter_factory< - ContainerType, void_t()), end(std::declval()))>> { - /// A type for resulting input adapter object. - using adapter_type = - decltype(input_adapter(begin(std::declval()), end(std::declval()))); - - /// @brief A factory method of input adapter objects for the target container objects. - /// @param container A container-like input object. - /// @return adapter_type An iterator_input_adapter object. - static adapter_type create(const ContainerType& container) { - return input_adapter(begin(container), end(container)); - } -}; - -} // namespace input_adapter_factory - -/// @brief A factory method for iterator_input_adapter objects with containers. -/// @tparam ContainerType A container type. -/// @param container A container object. -/// @return input_adapter_factory::container_input_adapter_factory::adapter_type -template -inline typename input_adapter_factory::container_input_adapter_factory::adapter_type input_adapter( - const ContainerType& container) { - return input_adapter_factory::container_input_adapter_factory::create(container); -} + utf_encode_t m_encode_type {utf_encode_t::UTF_16BE}; + /// The normalized owned buffer. + std::string m_buffer; + /// Whether ItrType is a contiguous iterator. + bool m_is_contiguous {false}; +}; -/// @brief A factory method for file_input_adapter objects with C-style file handles. -/// @param file A file handle. -/// @return file_input_adapter A file_input_adapter object. -inline file_input_adapter input_adapter(std::FILE* file) { - if FK_YAML_UNLIKELY (!file) { - throw fkyaml::exception("Invalid FILE object pointer."); +/// @brief An input adapter for iterators of type char32_t. +/// @tparam IterType An iterator type. +template +class iterator_input_adapter::value>> { +public: + /// @brief Construct a new iterator_input_adapter object. + iterator_input_adapter() = default; + + /// @brief Construct a new iterator_input_adapter object. + /// @param begin The beginning of iterators. + /// @param end The end of iterators. + /// @param encode_type The encoding type for this input adapter. + /// @param is_contiguous Whether iterators are contiguous or not. + iterator_input_adapter(IterType begin, IterType end, utf_encode_t encode_type, bool is_contiguous) noexcept + : m_begin(begin), + m_end(end), + m_encode_type(encode_type), + m_is_contiguous(is_contiguous) { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); } - const utf_encode_t encode_type = file_utf_encode_detector::detect(file); - return file_input_adapter(file, encode_type); -} + // allow only move construct/assignment like other input adapters. + iterator_input_adapter(const iterator_input_adapter&) = delete; + iterator_input_adapter(iterator_input_adapter&& rhs) = default; + iterator_input_adapter& operator=(const iterator_input_adapter&) = delete; + iterator_input_adapter& operator=(iterator_input_adapter&&) = default; + ~iterator_input_adapter() = default; -/// @brief A factory method for stream_input_adapter objects with std::istream objects. -/// @param stream An input stream. -/// @return stream_input_adapter A stream_input_adapter object. -inline stream_input_adapter input_adapter(std::istream& stream) { - if FK_YAML_UNLIKELY (!stream.good()) { - throw fkyaml::exception("Invalid stream."); - } + /// @brief Get view into the input buffer contents. + /// @return View into the input buffer contents. + str_view get_buffer_view() { + if FK_YAML_UNLIKELY (m_begin == m_end) { + return {}; + } - const utf_encode_t encode_type = stream_utf_encode_detector::detect(stream); - return stream_input_adapter(stream, encode_type); -} + int shift_bits[4] {0, 0, 0, 0}; + if (m_encode_type == utf_encode_t::UTF_32LE) { + shift_bits[0] = 24; + shift_bits[1] = 8; + shift_bits[2] = 8; + shift_bits[3] = 24; + } -FK_YAML_DETAIL_NAMESPACE_END + std::array utf8_buffer {{0, 0, 0, 0}}; + uint32_t utf8_buf_size {0}; -#endif /* FK_YAML_DETAIL_INPUT_INPUT_ADAPTER_HPP */ + // Assume the input characters are all ASCII characters. + // That's the most probably the case. + m_buffer.reserve(std::distance(m_begin, m_end)); -// #include -// _______ __ __ __ _____ __ __ __ -// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 -// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML -// -// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani -// SPDX-License-Identifier: MIT + IterType current = m_begin; + while (current != m_end) { + const char32_t tmp = *current; + ++current; + const auto utf32 = static_cast( + ((tmp & 0xFF000000u) >> shift_bits[0]) | ((tmp & 0x00FF0000u) >> shift_bits[1]) | + ((tmp & 0x0000FF00u) << shift_bits[2]) | ((tmp & 0x000000FFu) << shift_bits[3])); -#ifndef FK_YAML_DETAIL_ITERATOR_HPP -#define FK_YAML_DETAIL_ITERATOR_HPP + if FK_YAML_UNLIKELY (utf32 != static_cast(0x0000000Du)) { + utf8::from_utf32(utf32, utf8_buffer, utf8_buf_size); + m_buffer.append(reinterpret_cast(utf8_buffer.data()), utf8_buf_size); + } + } -#include -#include + return str_view {m_buffer.begin(), m_buffer.end()}; + } -// #include +private: + /// The iterator at the beginning of input. + IterType m_begin {}; + /// The iterator at the end of input. + IterType m_end {}; + /// The encoding type for this input adapter. + utf_encode_t m_encode_type {utf_encode_t::UTF_32BE}; + /// The normalized owned buffer. + std::string m_buffer; + /// Whether ItrType is a contiguous iterator. + bool m_is_contiguous {false}; +}; -// #include +/// @brief An input adapter for C-style file handles. +class file_input_adapter { +public: + /// @brief Construct a new file_input_adapter object. + file_input_adapter() = default; -// #include + /// @brief Construct a new file_input_adapter object. + /// @note + /// This class doesn't call fopen() nor fclose(). + /// It's user's responsibility to call those functions. + /// @param file A file handle for this adapter. (A non-null pointer is assumed.) + /// @param encode_type The encoding type for this input adapter. + explicit file_input_adapter(std::FILE* file, utf_encode_t encode_type) noexcept + : m_file(file), + m_encode_type(encode_type) { + } + + // allow only move construct/assignment + file_input_adapter(const file_input_adapter&) = delete; + file_input_adapter(file_input_adapter&& rhs) = default; + file_input_adapter& operator=(const file_input_adapter&) = delete; + file_input_adapter& operator=(file_input_adapter&&) = default; + ~file_input_adapter() = default; + /// @brief Get view into the input buffer contents. + /// @return View into the input buffer contents. + str_view get_buffer_view() { + switch (m_encode_type) { + case utf_encode_t::UTF_8: + return get_buffer_view_utf8(); + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + return get_buffer_view_utf16(); + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + return get_buffer_view_utf32(); + default: // LCOV_EXCL_LINE + detail::unreachable(); // LCOV_EXCL_LINE + } + } -FK_YAML_DETAIL_NAMESPACE_BEGIN +private: + /// @brief The concrete implementation of get_buffer_view() for UTF-8 encoded inputs. + /// @return View into the UTF-8 encoded input buffer contents. + str_view get_buffer_view_utf8() { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); -/// @brief The template definitions of type information used in @ref Iterator class -/// @tparam ValueType The type of iterated elements. -template -struct iterator_traits { - /// A type of iterated elements. - using value_type = typename ValueType::value_type; - /// A type to represent difference between iterators. - using difference_type = typename ValueType::difference_type; - /// A type of an element pointer. - using pointer = typename ValueType::pointer; - /// A type of reference to an element. - using reference = typename ValueType::reference; -}; + m_buffer.clear(); + char tmp_buf[256] {}; + constexpr std::size_t buf_size = sizeof(tmp_buf) / sizeof(tmp_buf[0]); + std::size_t read_size = 0; + while ((read_size = std::fread(&tmp_buf[0], sizeof(char), buf_size, m_file)) > 0) { + char* p_current = &tmp_buf[0]; + char* p_end = p_current + read_size; -/// @brief A specialization of @ref iterator_traits for constant value types. -/// @tparam ValueType The type of iterated elements. -template -struct iterator_traits { - /// A type of iterated elements. - using value_type = typename ValueType::value_type; - /// A type to represent difference between iterators. - using difference_type = typename ValueType::difference_type; - /// A type of a constant element pointer. - using pointer = typename ValueType::const_pointer; - /// A type of constant reference to an element. - using reference = typename ValueType::const_reference; -}; + // copy tmp_buf to m_buffer, dropping CRs. + char* p_cr = p_current; + do { + if FK_YAML_UNLIKELY (*p_cr == '\r') { + m_buffer.append(p_current, p_cr); + p_current = p_cr + 1; + } + ++p_cr; + } while (p_cr != p_end); -/// @brief Definitions of iterator types for iterators internally held. -enum class iterator_t : std::uint8_t { - SEQUENCE, //!< sequence iterator type. - MAPPING, //!< mapping iterator type. -}; + m_buffer.append(p_current, p_end); + } -/// @brief The actual storage for iterators internally held in iterator. -template -struct iterator_holder { - static_assert( - is_basic_node::value, - "iterator_holder class only accepts a basic_node as its template parameter."); + if FK_YAML_UNLIKELY (m_buffer.empty()) { + return {}; + } - /// A sequence iterator object. - typename BasicNodeType::sequence_type::iterator sequence_iterator {}; - /// A mapping iterator object. - typename BasicNodeType::mapping_type::iterator mapping_iterator {}; -}; + auto current = m_buffer.begin(); + auto end = m_buffer.end(); + while (current != end) { + const auto first = static_cast(*current++); + const uint32_t num_bytes = utf8::get_num_bytes(first); -/// @brief A class which holds iterators either of sequence or mapping type -/// @tparam ValueType The type of iterated elements. -template -class iterator { - /// @brief The iterator type with ValueType of different const-ness. - using other_iterator_type = typename std::conditional< - std::is_const::value, iterator::type>, - iterator>::type; + switch (num_bytes) { + case 1: + break; + case 2: { + const auto second = static_cast(*current++); + const bool is_valid = utf8::validate(first, second); + if FK_YAML_UNLIKELY (!is_valid) { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", {first, second}); + } + break; + } + case 3: { + const auto second = static_cast(*current++); + const auto third = static_cast(*current++); + const bool is_valid = utf8::validate(first, second, third); + if FK_YAML_UNLIKELY (!is_valid) { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", {first, second, third}); + } + break; + } + case 4: { + const auto second = static_cast(*current++); + const auto third = static_cast(*current++); + const auto fourth = static_cast(*current++); + const bool is_valid = utf8::validate(first, second, third, fourth); + if FK_YAML_UNLIKELY (!is_valid) { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", {first, second, third, fourth}); + } + break; + } + default: // LCOV_EXCL_LINE + unreachable(); // LCOV_EXCL_LINE + } + } - friend other_iterator_type; + return str_view {m_buffer.begin(), m_buffer.end()}; + } -public: - /// A type for iterator traits of instantiated @Iterator template class. - using iterator_traits_type = iterator_traits; + /// @brief The concrete implementation of get_buffer_view() for UTF-16 encoded inputs. + /// @return View into the UTF-8 encoded input buffer contents. + str_view get_buffer_view_utf16() { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); - /// A type for iterator category tag. - using iterator_category = std::bidirectional_iterator_tag; - /// A type of iterated element. - using value_type = typename iterator_traits_type::value_type; - /// A type to represent differences between iterators. - using difference_type = typename iterator_traits_type::difference_type; - /// A type of an element pointer. - using pointer = typename iterator_traits_type::pointer; - /// A type of reference to an element. - using reference = typename iterator_traits_type::reference; + int shift_bits[2] {0, 0}; + if (m_encode_type == utf_encode_t::UTF_16BE) { + shift_bits[0] = 8; + } + else { // m_encode_type == utf_encode_t::UTF_16LE + shift_bits[1] = 8; + } - static_assert(is_basic_node::value, "iterator class only accepts a basic_node as its value type."); + char chars[2] = {0, 0}; + std::array encoded_buffer {{0, 0}}; + uint32_t encoded_buf_size {0}; + std::array utf8_buffer {{0, 0, 0, 0}}; + uint32_t utf8_buf_size {0}; - /// @brief Constructs an iterator object. - iterator() = default; + while (std::feof(m_file) == 0) { + while (encoded_buf_size < 2 && std::fread(&chars[0], sizeof(char), 2, m_file) == 2) { + const auto utf16 = static_cast( + (static_cast(chars[0]) << shift_bits[0]) | + (static_cast(chars[1]) << shift_bits[1])); + if FK_YAML_LIKELY (utf16 != static_cast(0x000Du)) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-constant-array-index) + encoded_buffer[encoded_buf_size++] = utf16; + } + } - /// @brief Construct a new iterator object with sequence iterator object. - /// @param[in] itr An sequence iterator object. - iterator(const typename value_type::sequence_type::iterator& itr) noexcept { - m_iterator_holder.sequence_iterator = itr; - } + uint32_t consumed_size = 0; + utf8::from_utf16(encoded_buffer, utf8_buffer, consumed_size, utf8_buf_size); - /// @brief Construct a new iterator object with mapping iterator object. - /// @param[in] itr An mapping iterator object. - iterator(const typename value_type::mapping_type::iterator& itr) noexcept - : m_inner_iterator_type(iterator_t::MAPPING) { - m_iterator_holder.mapping_iterator = itr; - } + if FK_YAML_LIKELY (consumed_size == 1) { + encoded_buffer[0] = encoded_buffer[1]; + } + encoded_buf_size -= consumed_size; - /// @brief Copy constructs an iterator. - iterator(const iterator&) = default; + m_buffer.append(reinterpret_cast(utf8_buffer.data()), utf8_buf_size); + } - /// @brief Copy constructs an iterator from another iterator with different const-ness in ValueType. - /// @note This copy constructor is not defined if ValueType is not const to avoid const removal from ValueType. - /// @tparam OtherIterator The iterator type to copy from. - /// @param other An iterator to copy from with different const-ness in ValueType. - template < - typename OtherIterator, - enable_if_t< - conjunction, std::is_const>::value, int> = 0> - iterator(const OtherIterator& other) noexcept - : m_inner_iterator_type(other.m_inner_iterator_type), - m_iterator_holder(other.m_iterator_holder) { + return str_view {m_buffer.begin(), m_buffer.end()}; } - /// @brief A copy assignment operator of the iterator class. - iterator& operator=(const iterator&) = default; + /// @brief The concrete implementation of get_buffer_view() for UTF-32 encoded inputs. + /// @return View into the UTF-8 encoded input buffer contents. + str_view get_buffer_view_utf32() { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); - template < - typename OtherIterator, - enable_if_t< - conjunction, std::is_const>::value, int> = 0> - iterator& operator=(const OtherIterator& other) noexcept { - m_inner_iterator_type = other.m_inner_iterator_type; - m_iterator_holder = other.m_iterator_holder; - return *this; - } + int shift_bits[4] {0, 0, 0, 0}; + if (m_encode_type == utf_encode_t::UTF_32BE) { + shift_bits[0] = 24; + shift_bits[1] = 16; + shift_bits[2] = 8; + } + else { // m_encode_type == utf_encode_t::UTF_32LE + shift_bits[1] = 8; + shift_bits[2] = 16; + shift_bits[3] = 24; + } - /// @brief Move constructs an iterator. - iterator(iterator&&) = default; + char chars[4] = {0, 0, 0, 0}; + std::array utf8_buffer {{0, 0, 0, 0}}; + uint32_t utf8_buf_size {0}; - /// @brief A move assignment operator of the iterator class. - iterator& operator=(iterator&&) = default; + while (std::feof(m_file) == 0) { + const std::size_t size = std::fread(&chars[0], sizeof(char), 4, m_file); + if (size != 4) { + break; + } - /// @brief Destroys an iterator. - ~iterator() = default; + const auto utf32 = static_cast( + (static_cast(chars[0]) << shift_bits[0]) | (static_cast(chars[1]) << shift_bits[1]) | + (static_cast(chars[2]) << shift_bits[2]) | (static_cast(chars[3]) << shift_bits[3])); - /// @brief An arrow operator of the iterator class. - /// @return pointer A pointer to the BasicNodeType object internally referenced by the actual iterator object. - pointer operator->() noexcept { - if (m_inner_iterator_type == iterator_t::SEQUENCE) { - return &(*(m_iterator_holder.sequence_iterator)); + if FK_YAML_LIKELY (utf32 != char32_t(0x0000000Du)) { + utf8::from_utf32(utf32, utf8_buffer, utf8_buf_size); + m_buffer.append(reinterpret_cast(utf8_buffer.data()), utf8_buf_size); + } } - // m_inner_iterator_type == iterator_t::MAPPING: - return &(m_iterator_holder.mapping_iterator->second); + return str_view {m_buffer.begin(), m_buffer.end()}; } - /// @brief A dereference operator of the iterator class. - /// @return reference Reference to the Node object internally referenced by the actual iterator object. - reference operator*() const noexcept { - if (m_inner_iterator_type == iterator_t::SEQUENCE) { - return *(m_iterator_holder.sequence_iterator); - } +private: + /// A pointer to the input file handle. + std::FILE* m_file {nullptr}; + /// The encoding type for this input adapter. + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; + /// The normalized owned buffer. + std::string m_buffer; +}; - // m_inner_iterator_type == iterator_t::MAPPING: - return m_iterator_holder.mapping_iterator->second; - } +/// @brief An input adapter for streams +class stream_input_adapter { +public: + /// @brief Construct a new stream_input_adapter object. + stream_input_adapter() = default; - /// @brief A compound assignment operator by sum of the Iterator class. - /// @param i The difference from this Iterator object with which it moves forward. - /// @return Iterator& Reference to this Iterator object. - iterator& operator+=(difference_type i) noexcept { - switch (m_inner_iterator_type) { - case iterator_t::SEQUENCE: - std::advance(m_iterator_holder.sequence_iterator, i); - break; - case iterator_t::MAPPING: - std::advance(m_iterator_holder.mapping_iterator, i); - break; - } - return *this; + /// @brief Construct a new stream_input_adapter object. + /// @param is A reference to the target input stream. + /// @param encode_type The encoding type for this input adapter. + explicit stream_input_adapter(std::istream& is, utf_encode_t encode_type) noexcept + : m_istream(&is), + m_encode_type(encode_type) { } - /// @brief A plus operator of the iterator class. - /// @param i The difference from this iterator object. - /// @return iterator An iterator object which has been added @a i. - iterator operator+(difference_type i) const noexcept { - auto result = *this; - result += i; - return result; - } + // allow only move construct/assignment + stream_input_adapter(const stream_input_adapter&) = delete; + stream_input_adapter& operator=(const stream_input_adapter&) = delete; + stream_input_adapter(stream_input_adapter&&) = default; + stream_input_adapter& operator=(stream_input_adapter&&) = default; + ~stream_input_adapter() = default; - /// @brief An pre-increment operator of the iterator class. - /// @return iterator& Reference to this iterator object. - iterator& operator++() noexcept { - switch (m_inner_iterator_type) { - case iterator_t::SEQUENCE: - std::advance(m_iterator_holder.sequence_iterator, 1); - break; - case iterator_t::MAPPING: - std::advance(m_iterator_holder.mapping_iterator, 1); - break; + /// @brief Get view into the input buffer contents. + /// @return View into the input buffer contents. + str_view get_buffer_view() { + switch (m_encode_type) { + case utf_encode_t::UTF_8: + return get_buffer_view_utf8(); + case utf_encode_t::UTF_16BE: + case utf_encode_t::UTF_16LE: + return get_buffer_view_utf16(); + case utf_encode_t::UTF_32BE: + case utf_encode_t::UTF_32LE: + return get_buffer_view_utf32(); + default: // LCOV_EXCL_LINE + detail::unreachable(); // LCOV_EXCL_LINE } - return *this; - } - - /// @brief A post-increment operator of the iterator class. - /// @return iterator An iterator object which has been incremented. - iterator operator++(int) & noexcept { - auto result = *this; - ++(*this); - return result; } - /// @brief A compound assignment operator by difference of the iterator class. - /// @param i The difference from this iterator object with which it moves backward. - /// @return iterator& Reference to this iterator object. - iterator& operator-=(difference_type i) noexcept { - return operator+=(-i); - } +private: + /// @brief The concrete implementation of get_buffer_view() for UTF-8 encoded inputs. + /// @return View into the UTF-8 encoded input buffer contents. + str_view get_buffer_view_utf8() { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_8); - /// @brief A minus operator of the iterator class. - /// @param i The difference from this iterator object. - /// @return iterator An iterator object from which has been subtracted @ i. - iterator operator-(difference_type i) const noexcept { - auto result = *this; - result -= i; - return result; - } + m_buffer.clear(); + char tmp_buf[256] {}; + do { + m_istream->read(&tmp_buf[0], 256); + const auto read_size = static_cast(m_istream->gcount()); + if FK_YAML_UNLIKELY (read_size == 0) { + break; + } - /// @brief A pre-decrement operator of the iterator class. - /// @return iterator& Reference to this iterator object. - iterator& operator--() noexcept { - switch (m_inner_iterator_type) { - case iterator_t::SEQUENCE: - std::advance(m_iterator_holder.sequence_iterator, -1); - break; - case iterator_t::MAPPING: - std::advance(m_iterator_holder.mapping_iterator, -1); - break; - } - return *this; - } + char* p_current = &tmp_buf[0]; + char* p_end = p_current + read_size; - /// @brief A post-decrement operator of the iterator class - /// @return iterator An iterator object which has been decremented. - iterator operator--(int) & noexcept { - auto result = *this; - --(*this); - return result; - } + // copy tmp_buf to m_buffer, dropping CRs. + char* p_cr = p_current; + do { + if FK_YAML_UNLIKELY (*p_cr == '\r') { + m_buffer.append(p_current, p_cr); + p_current = p_cr + 1; + } + ++p_cr; + } while (p_cr != p_end); - /// @brief An equal-to operator of the iterator class. - /// @param rhs An iterator object to be compared with this iterator object. - /// @return true This iterator object is equal to the other. - /// @return false This iterator object is not equal to the other. - template < - typename Iterator, - enable_if_t< - disjunction, std::is_same>::value, int> = 0> - bool operator==(const Iterator& rhs) const { - if FK_YAML_UNLIKELY (m_inner_iterator_type != rhs.m_inner_iterator_type) { - throw fkyaml::exception("Cannot compare iterators of different container types."); + m_buffer.append(p_current, p_end); + } while (!m_istream->eof()); + + if FK_YAML_UNLIKELY (m_buffer.empty()) { + return {}; } - if (m_inner_iterator_type == iterator_t::SEQUENCE) { - return (m_iterator_holder.sequence_iterator == rhs.m_iterator_holder.sequence_iterator); + auto current = m_buffer.begin(); + auto end = m_buffer.end(); + while (current != end) { + const auto first = static_cast(*current++); + const uint32_t num_bytes = utf8::get_num_bytes(first); + + switch (num_bytes) { + case 1: + break; + case 2: { + const auto second = static_cast(*current++); + const bool is_valid = utf8::validate(first, second); + if FK_YAML_UNLIKELY (!is_valid) { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", {first, second}); + } + break; + } + case 3: { + const auto second = static_cast(*current++); + const auto third = static_cast(*current++); + const bool is_valid = utf8::validate(first, second, third); + if FK_YAML_UNLIKELY (!is_valid) { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", {first, second, third}); + } + break; + } + case 4: { + const auto second = static_cast(*current++); + const auto third = static_cast(*current++); + const auto fourth = static_cast(*current++); + const bool is_valid = utf8::validate(first, second, third, fourth); + if FK_YAML_UNLIKELY (!is_valid) { + throw fkyaml::invalid_encoding("Invalid UTF-8 encoding.", {first, second, third, fourth}); + } + break; + } + default: // LCOV_EXCL_LINE + unreachable(); // LCOV_EXCL_LINE + } } - // m_inner_iterator_type == iterator_t::MAPPING - return (m_iterator_holder.mapping_iterator == rhs.m_iterator_holder.mapping_iterator); + return str_view {m_buffer.begin(), m_buffer.end()}; } - /// @brief An not-equal-to operator of the iterator class. - /// @param rhs An iterator object to be compared with this iterator object. - /// @return true This iterator object is not equal to the other. - /// @return false This iterator object is equal to the other. - template < - typename Iterator, - enable_if_t< - disjunction, std::is_same>::value, int> = 0> - bool operator!=(const Iterator& rhs) const { - return !operator==(rhs); - } + /// @brief The concrete implementation of get_buffer_view() for UTF-16 encoded inputs. + /// @return View into the UTF-8 encoded input buffer contents. + str_view get_buffer_view_utf16() { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_16BE || m_encode_type == utf_encode_t::UTF_16LE); - /// @brief A less-than operator of the iterator class. - /// @param rhs An iterator object to be compared with this iterator object. - /// @return true This iterator object is less than the other. - /// @return false This iterator object is not less than the other. - template < - typename Iterator, - enable_if_t< - disjunction, std::is_same>::value, int> = 0> - bool operator<(const Iterator& rhs) const { - if FK_YAML_UNLIKELY (m_inner_iterator_type != rhs.m_inner_iterator_type) { - throw fkyaml::exception("Cannot compare iterators of different container types."); + int shift_bits[2] {0, 0}; + if (m_encode_type == utf_encode_t::UTF_16BE) { + shift_bits[0] = 8; } - - if FK_YAML_UNLIKELY (m_inner_iterator_type == iterator_t::MAPPING) { - throw fkyaml::exception("Cannot compare order of iterators of the mapping container type"); + else { // m_encode_type == utf_encode_t::UTF_16LE + shift_bits[1] = 8; } - return (m_iterator_holder.sequence_iterator < rhs.m_iterator_holder.sequence_iterator); - } + char chars[2] = {0, 0}; + std::array encoded_buffer {{0, 0}}; + uint32_t encoded_buf_size {0}; + std::array utf8_buffer {{0, 0, 0, 0}}; + uint32_t utf8_buf_size {0}; - /// @brief A less-than-or-equal-to operator of the iterator class. - /// @param rhs An iterator object to be compared with this iterator object. - /// @return true This iterator object is either less than or equal to the other. - /// @return false This iterator object is neither less than nor equal to the other. - template < - typename Iterator, - enable_if_t< - disjunction, std::is_same>::value, int> = 0> - bool operator<=(const Iterator& rhs) const { - return !rhs.operator<(*this); - } + do { + while (encoded_buf_size < 2) { + m_istream->read(&chars[0], 2); + const std::streamsize size = m_istream->gcount(); + if FK_YAML_UNLIKELY (size != 2) { + break; + } - /// @brief A greater-than operator of the iterator class. - /// @param rhs An iterator object to be compared with this iterator object. - /// @return true This iterator object is greater than the other. - /// @return false This iterator object is not greater than the other. - template < - typename Iterator, - enable_if_t< - disjunction, std::is_same>::value, int> = 0> - bool operator>(const Iterator& rhs) const { - return !operator<=(rhs); - } + const auto utf16 = static_cast( + (static_cast(chars[0]) << shift_bits[0]) | + (static_cast(chars[1]) << shift_bits[1])); - /// @brief A greater-than-or-equal-to operator of the iterator class. - /// @param rhs An iterator object to be compared with this iterator object. - /// @return true This iterator object is either greater than or equal to the other. - /// @return false This iterator object is neither greater than nor equal to the other. - template < - typename Iterator, - enable_if_t< - disjunction, std::is_same>::value, int> = 0> - bool operator>=(const Iterator& rhs) const { - return !operator<(rhs); - } + if FK_YAML_LIKELY (utf16 != static_cast(0x000Du)) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-constant-array-index) + encoded_buffer[encoded_buf_size++] = utf16; + } + } -public: - /// @brief Get the type of the internal iterator implementation. - /// @return iterator_t The type of the internal iterator implementation. - iterator_t type() const noexcept { - return m_inner_iterator_type; - } + uint32_t consumed_size = 0; + utf8::from_utf16(encoded_buffer, utf8_buffer, consumed_size, utf8_buf_size); - /// @brief Get the mapping key node of the current iterator. - /// @return The mapping key node of the current iterator. - const typename value_type::mapping_type::key_type& key() const { - if FK_YAML_UNLIKELY (m_inner_iterator_type == iterator_t::SEQUENCE) { - throw fkyaml::exception("Cannot retrieve key from non-mapping iterators."); - } + if FK_YAML_LIKELY (consumed_size == 1) { + encoded_buffer[0] = encoded_buffer[1]; + } + encoded_buf_size -= consumed_size; - return m_iterator_holder.mapping_iterator->first; - } + m_buffer.append(reinterpret_cast(utf8_buffer.data()), utf8_buf_size); + } while (!m_istream->eof()); - /// @brief Get reference to the YAML node of the current iterator. - /// @return Reference to the YAML node of the current iterator. - reference value() const noexcept { - return operator*(); + return str_view {m_buffer.begin(), m_buffer.end()}; } -private: - /// A type of the internally-held iterator. - iterator_t m_inner_iterator_type {iterator_t::SEQUENCE}; - /// A holder of actual iterators. - iterator_holder m_iterator_holder {}; -}; + /// @brief The concrete implementation of get_buffer_view() for UTF-32 encoded inputs. + /// @return View into the UTF-8 encoded input buffer contents. + str_view get_buffer_view_utf32() { + FK_YAML_ASSERT(m_encode_type == utf_encode_t::UTF_32BE || m_encode_type == utf_encode_t::UTF_32LE); -/// @brief Get reference to a mapping key node. -/// @tparam ValueType The iterator value type. -/// @tparam I The element index. -/// @param i An iterator object. -/// @return Reference to a mapping key node. -template = 0> -inline auto get(const iterator& i) -> decltype(i.key()) { - return i.key(); -} + int shift_bits[4] {0, 0, 0, 0}; + if (m_encode_type == utf_encode_t::UTF_32BE) { + shift_bits[0] = 24; + shift_bits[1] = 16; + shift_bits[2] = 8; + } + else { // m_encode_type == utf_encode_t::UTF_32LE + shift_bits[1] = 8; + shift_bits[2] = 16; + shift_bits[3] = 24; + } -/// @brief Get reference to a mapping value node. -/// @tparam ValueType The iterator value type. -/// @tparam I The element index -/// @param i An iterator object. -/// @return Reference to a mapping value node. -template = 0> -inline auto get(const iterator& i) -> decltype(i.value()) { - return i.value(); -} + char chars[4] = {0, 0, 0, 0}; + std::array utf8_buffer {{0, 0, 0, 0}}; + uint32_t utf8_buf_size {0}; -FK_YAML_DETAIL_NAMESPACE_END + do { + m_istream->read(&chars[0], 4); + const std::streamsize size = m_istream->gcount(); + if FK_YAML_UNLIKELY (size != 4) { + break; + } -namespace std { + const auto utf32 = static_cast( + (static_cast(chars[0]) << shift_bits[0]) | (static_cast(chars[1]) << shift_bits[1]) | + (static_cast(chars[2]) << shift_bits[2]) | (static_cast(chars[3]) << shift_bits[3])); -#ifdef __clang__ -// clang emits warnings against mixed usage of class/struct for tuple_size/tuple_element. -// see also: https://groups.google.com/a/isocpp.org/g/std-discussion/c/QC-AMb5oO1w -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wmismatched-tags" -#endif + if FK_YAML_LIKELY (utf32 != char32_t(0x0000000Du)) { + utf8::from_utf32(utf32, utf8_buffer, utf8_buf_size); + m_buffer.append(reinterpret_cast(utf8_buffer.data()), utf8_buf_size); + } + } while (!m_istream->eof()); -/// @brief Partial specialization of std::tuple_size for iterator class. -/// @tparam ValueType The iterator value type. -template -// NOLINTNEXTLINE(cert-dcl58-cpp) -struct tuple_size<::fkyaml::detail::iterator> : integral_constant {}; + return str_view {m_buffer.begin(), m_buffer.end()}; + } -/// @brief Partial specialization of std::tuple_element for iterator class. -/// @tparam ValueType The iterator value type. -/// @tparam I The element index. -template -// NOLINTNEXTLINE(cert-dcl58-cpp) -struct tuple_element> { - using type = decltype(get(std::declval<::fkyaml::detail::iterator>())); +private: + /// A pointer to the input stream object. + std::istream* m_istream {nullptr}; + /// The encoding type for this input adapter. + utf_encode_t m_encode_type {utf_encode_t::UTF_8}; + /// The normalized owned buffer. + std::string m_buffer; }; -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - -} // namespace std +///////////////////////////////// +// input_adapter providers // +///////////////////////////////// -#endif /* FK_YAML_DETAIL_ITERATOR_HPP */ +/// @brief A concrete factory method for iterator_input_adapter objects with iterators. +/// @tparam ItrType An iterator type. +/// @param begin The beginning of iterators. +/// @param end The end of iterators. +/// @param is_contiguous Whether iterators refer to a contiguous byte array. +/// @return An iterator_input_adapter object for the target iterator type. +template +inline iterator_input_adapter create_iterator_input_adapter(ItrType begin, ItrType end, bool is_contiguous) { + const utf_encode_t encode_type = utf_encode_detector::detect(begin, end); + return iterator_input_adapter(begin, end, encode_type, is_contiguous); +} -// #include -// _______ __ __ __ _____ __ __ __ -// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 -// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML -// -// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani -// SPDX-License-Identifier: MIT +/// @brief A factory method for iterator_input_adapter objects with iterator values. +/// @tparam ItrType An iterator type. +/// @param begin The beginning of iterators. +/// @param end The end of iterators. +/// @return iterator_input_adapter An iterator_input_adapter object for the target iterator type. +template +inline iterator_input_adapter input_adapter(ItrType begin, ItrType end) { + bool is_contiguous = true; + const auto size = std::distance(begin, end); -#ifndef FK_YAML_DETAIL_MAP_RANGE_PROXY_HPP -#define FK_YAML_DETAIL_MAP_RANGE_PROXY_HPP + // Check if `begin` & `end` are contiguous iterators. + // Getting distance between begin and (end - 1) avoids dereferencing an invalid sentinel. + if FK_YAML_LIKELY (size > 0) { + using char_ptr_t = remove_cvref_t::pointer>; + char_ptr_t p_begin = &*begin; + char_ptr_t p_second_last = &*std::next(begin, size - 1); + is_contiguous = (p_second_last - p_begin == size); + } + return create_iterator_input_adapter(begin, end, is_contiguous); +} -// #include +/// @brief A factory method for iterator_input_adapter objects with C-style arrays. +/// @tparam T A type of arrayed objects. +/// @tparam N A size of an array. +/// @return decltype(input_adapter(array, array + N)) An iterator_input_adapter object for the target array. +template +inline auto input_adapter(T (&array)[N]) -> decltype(create_iterator_input_adapter(array, array + (N - 1), true)) { + return create_iterator_input_adapter(array, array + (N - 1), true); +} -// #include +/// @brief A namespace to implement container_input_adapter_factory for internal use. +namespace input_adapter_factory { +using std::begin; +using std::end; -FK_YAML_DETAIL_NAMESPACE_BEGIN +/// @brief A factory of input adapters for containers. +/// @tparam ContainerType A container type. +/// @tparam typename N/A +template +struct container_input_adapter_factory {}; -/// @brief A helper iterator class which wraps a mapping iterator object. -/// @tparam Iterator The base iterator type. -template -class map_iterator_proxy { -public: - /// @brief The type of the pointed-to elements by base iterators. - using value_type = Iterator; +/// @brief A partial specialization of container_input_adapter_factory if begin()/end() are available for ContainerType. +/// @tparam ContainerType A container type. +template +struct container_input_adapter_factory< + ContainerType, void_t()), end(std::declval()))>> { + /// A type for resulting input adapter object. + using adapter_type = + decltype(input_adapter(begin(std::declval()), end(std::declval()))); - /// @brief The type to represent difference between the pointed-to elements by base iterators. - using difference_type = std::ptrdiff_t; + /// @brief A factory method of input adapter objects for the target container objects. + /// @param container A container-like input object. + /// @return adapter_type An iterator_input_adapter object. + static adapter_type create(const ContainerType& container) { + return input_adapter(begin(container), end(container)); + } +}; - /// @brief The type of the pointed-to element references by base iterators. - using reference = value_type&; +} // namespace input_adapter_factory - /// @brief The type of the pointed-to element pointers by base iterators. - using pointer = value_type*; +/// @brief A factory method for iterator_input_adapter objects with containers. +/// @tparam ContainerType A container type. +/// @param container A container object. +/// @return input_adapter_factory::container_input_adapter_factory::adapter_type +template +inline typename input_adapter_factory::container_input_adapter_factory::adapter_type input_adapter( + const ContainerType& container) { + return input_adapter_factory::container_input_adapter_factory::create(container); +} - /// @brief The iterator category. - using iterator_category = std::forward_iterator_tag; +/// @brief A factory method for file_input_adapter objects with C-style file handles. +/// @param file A file handle. +/// @return file_input_adapter A file_input_adapter object. +inline file_input_adapter input_adapter(std::FILE* file) { + if FK_YAML_UNLIKELY (!file) { + throw fkyaml::exception("Invalid FILE object pointer."); + } - /// @brief Constructs a map_iterator_proxy object. - map_iterator_proxy() = default; + const utf_encode_t encode_type = file_utf_encode_detector::detect(file); + return file_input_adapter(file, encode_type); +} - /// @brief Constructs a map_iterator_proxy object with an Iterator object. - /// @param i A base iterator object. - map_iterator_proxy(const Iterator& i) noexcept - : m_base_iterator(i) { +/// @brief A factory method for stream_input_adapter objects with std::istream objects. +/// @param stream An input stream. +/// @return stream_input_adapter A stream_input_adapter object. +inline stream_input_adapter input_adapter(std::istream& stream) { + if FK_YAML_UNLIKELY (!stream.good()) { + throw fkyaml::exception("Invalid stream."); } - /// @brief Copy constructs a map_iterator_proxy object. - map_iterator_proxy(const map_iterator_proxy&) = default; - - /// @brief Copy assigns a map_iterator_proxy object. - map_iterator_proxy& operator=(const map_iterator_proxy&) = default; + const utf_encode_t encode_type = stream_utf_encode_detector::detect(stream); + return stream_input_adapter(stream, encode_type); +} - /// @brief Move constructs a map_iterator_proxy object. - map_iterator_proxy(map_iterator_proxy&&) = default; +FK_YAML_DETAIL_NAMESPACE_END - /// @brief Move assigns a map_iterator_proxy object. - map_iterator_proxy& operator=(map_iterator_proxy&&) = default; +#endif /* FK_YAML_DETAIL_INPUT_INPUT_ADAPTER_HPP */ - /// @brief Destructs a map_iterator_proxy object. - ~map_iterator_proxy() = default; +// #include +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library +// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani +// SPDX-License-Identifier: MIT - /// @brief Get reference to the base iterator object. - /// @return Reference to the base iterator object. - reference operator*() noexcept { - return m_base_iterator; - } +#ifndef FK_YAML_DETAIL_ITERATOR_HPP +#define FK_YAML_DETAIL_ITERATOR_HPP - /// @brief Get pointer to the base iterator object. - /// @return Pointer to the base iterator object. - pointer operator->() noexcept { - return &m_base_iterator; - } +#include +#include - /// @brief Pre-increments the base iterator object. - /// @return Reference to this map_iterator_proxy object. - map_iterator_proxy& operator++() noexcept { - ++m_base_iterator; - return *this; - } +// #include - /// @brief Post-increments the base iterator object. - /// @return A map_iterator_proxy object with its base iterator incremented. - map_iterator_proxy operator++(int) & noexcept { - auto result = *this; - ++(*this); - return result; - } +// #include - /// @brief Check equality between map_iterator_proxy objects. - /// @param rhs A map_iterator_proxy object to compare with. - /// @return true if this map_iterator_proxy object is equal to `rhs`, false otherwise. - bool operator==(const map_iterator_proxy& rhs) const noexcept { - return m_base_iterator == rhs.m_base_iterator; - } +// #include - /// @brief Check inequality between map_iterator_proxy objects. - /// @param rhs A map_iterator_proxy object to compare with. - /// @return true if this map_iterator_proxy object is not equal to `rhs`, false otherwise. - bool operator!=(const map_iterator_proxy& rhs) const noexcept { - return m_base_iterator != rhs.m_base_iterator; - } - /// @brief Get the mapping key node pointed by the base iterator. - /// @return Reference to the mapping key node. - typename Iterator::reference key() const { - return m_base_iterator.key(); - } +FK_YAML_DETAIL_NAMESPACE_BEGIN - /// @brief Get the mapping value node pointed by the base iterator. - /// @return Reference to the mapping value node. - typename Iterator::reference value() const noexcept { - return m_base_iterator.value(); - } +/// @brief The template definitions of type information used in @ref Iterator class +/// @tparam ValueType The type of iterated elements. +template +struct iterator_traits { + /// A type of iterated elements. + using value_type = typename ValueType::value_type; + /// A type to represent difference between iterators. + using difference_type = typename ValueType::difference_type; + /// A type of an element pointer. + using pointer = typename ValueType::pointer; + /// A type of reference to an element. + using reference = typename ValueType::reference; +}; -private: - /// The base iterator object. - Iterator m_base_iterator {}; +/// @brief A specialization of @ref iterator_traits for constant value types. +/// @tparam ValueType The type of iterated elements. +template +struct iterator_traits { + /// A type of iterated elements. + using value_type = typename ValueType::value_type; + /// A type to represent difference between iterators. + using difference_type = typename ValueType::difference_type; + /// A type of a constant element pointer. + using pointer = typename ValueType::const_pointer; + /// A type of constant reference to an element. + using reference = typename ValueType::const_reference; }; -/// @brief A helper struct which allows accessing node iterator member functions in range-based for loops. -/// @tparam BasicNodeType A basic_node template instance type. +/// @brief Definitions of iterator types for iterators internally held. +enum class iterator_t : std::uint8_t { + SEQUENCE, //!< sequence iterator type. + MAPPING, //!< mapping iterator type. +}; + +/// @brief The actual storage for iterators internally held in iterator. template -class map_range_proxy { +struct iterator_holder { static_assert( is_basic_node::value, - "map_range_proxy only accepts a basic_node type as its template parameter."); - -public: - /// @brief The type of non-const iterators. - using iterator = map_iterator_proxy::value, typename BasicNodeType::const_iterator, - typename BasicNodeType::iterator>::type>; + "iterator_holder class only accepts a basic_node as its template parameter."); - /// @brief The type of const iterators. - using const_iterator = map_iterator_proxy; + /// A sequence iterator object. + typename BasicNodeType::sequence_type::iterator sequence_iterator {}; + /// A mapping iterator object. + typename BasicNodeType::mapping_type::iterator mapping_iterator {}; +}; - /// @brief Constructs a map_range_proxy object with a BasicNodeType object. - /// @param map A mapping node object. - map_range_proxy(BasicNodeType& map) noexcept - : mp_map(&map) { - } +/// @brief A class which holds iterators either of sequence or mapping type +/// @tparam ValueType The type of iterated elements. +template +class iterator { + /// @brief The iterator type with ValueType of different const-ness. + using other_iterator_type = typename std::conditional< + std::is_const::value, iterator::type>, + iterator>::type; - /// @brief Copy constructs a map_range_proxy object. - map_range_proxy(const map_range_proxy&) = default; + friend other_iterator_type; - /// @brief Copy assigns a map_range_proxy object. - /// @return Reference to this map_range_proxy object. - map_range_proxy& operator=(const map_range_proxy&) = default; +public: + /// A type for iterator traits of instantiated @Iterator template class. + using iterator_traits_type = iterator_traits; - /// @brief Move constructs a map_range_proxy object. - map_range_proxy(map_range_proxy&&) = default; + /// A type for iterator category tag. + using iterator_category = std::bidirectional_iterator_tag; + /// A type of iterated element. + using value_type = typename iterator_traits_type::value_type; + /// A type to represent differences between iterators. + using difference_type = typename iterator_traits_type::difference_type; + /// A type of an element pointer. + using pointer = typename iterator_traits_type::pointer; + /// A type of reference to an element. + using reference = typename iterator_traits_type::reference; - /// @brief Move assigns a map_range_proxy object. - /// @return Reference to this map_range_proxy object. - map_range_proxy& operator=(map_range_proxy&&) = default; + static_assert(is_basic_node::value, "iterator class only accepts a basic_node as its value type."); - /// @brief Destructs a map_range_proxy object. - ~map_range_proxy() = default; + /// @brief Constructs an iterator object. + iterator() = default; - /// @brief Get an iterator to the first element. - /// @return An iterator to the first element. - iterator begin() noexcept { - return {mp_map->begin()}; + /// @brief Construct a new iterator object with sequence iterator object. + /// @param[in] itr An sequence iterator object. + iterator(const typename value_type::sequence_type::iterator& itr) noexcept { + m_iterator_holder.sequence_iterator = itr; } - /// @brief Get a const iterator to the first element. - /// @return A const iterator to the first element. - const_iterator begin() const noexcept { - return {mp_map->cbegin()}; + /// @brief Construct a new iterator object with mapping iterator object. + /// @param[in] itr An mapping iterator object. + iterator(const typename value_type::mapping_type::iterator& itr) noexcept + : m_inner_iterator_type(iterator_t::MAPPING) { + m_iterator_holder.mapping_iterator = itr; } - /// @brief Get an iterator to the past-the-last element. - /// @return An iterator to the past-the-last element. - iterator end() noexcept { - return {mp_map->end()}; + /// @brief Copy constructs an iterator. + iterator(const iterator&) = default; + + /// @brief Copy constructs an iterator from another iterator with different const-ness in ValueType. + /// @note This copy constructor is not defined if ValueType is not const to avoid const removal from ValueType. + /// @tparam OtherIterator The iterator type to copy from. + /// @param other An iterator to copy from with different const-ness in ValueType. + template < + typename OtherIterator, + enable_if_t< + conjunction, std::is_const>::value, int> = 0> + iterator(const OtherIterator& other) noexcept + : m_inner_iterator_type(other.m_inner_iterator_type), + m_iterator_holder(other.m_iterator_holder) { } - /// @brief Get a const iterator to the past-the-last element. - /// @return A const iterator to the past-the-last element. - const_iterator end() const noexcept { - return {mp_map->cend()}; + /// @brief A copy assignment operator of the iterator class. + iterator& operator=(const iterator&) = default; + + template < + typename OtherIterator, + enable_if_t< + conjunction, std::is_const>::value, int> = 0> + iterator& operator=(const OtherIterator& other) noexcept { + m_inner_iterator_type = other.m_inner_iterator_type; + m_iterator_holder = other.m_iterator_holder; + return *this; } -private: - /// Pointer to the mapping node object. (non-null) - BasicNodeType* mp_map {nullptr}; -}; + /// @brief Move constructs an iterator. + iterator(iterator&&) = default; -FK_YAML_DETAIL_NAMESPACE_END + /// @brief A move assignment operator of the iterator class. + iterator& operator=(iterator&&) = default; -#endif /* FK_YAML_DETAIL_MAP_RANGE_PROXY_HPP */ + /// @brief Destroys an iterator. + ~iterator() = default; -// #include + /// @brief An arrow operator of the iterator class. + /// @return pointer A pointer to the BasicNodeType object internally referenced by the actual iterator object. + pointer operator->() noexcept { + if (m_inner_iterator_type == iterator_t::SEQUENCE) { + return &(*(m_iterator_holder.sequence_iterator)); + } -// #include + // m_inner_iterator_type == iterator_t::MAPPING: + return &(m_iterator_holder.mapping_iterator->second); + } -// #include + /// @brief A dereference operator of the iterator class. + /// @return reference Reference to the Node object internally referenced by the actual iterator object. + reference operator*() const noexcept { + if (m_inner_iterator_type == iterator_t::SEQUENCE) { + return *(m_iterator_holder.sequence_iterator); + } -// #include + // m_inner_iterator_type == iterator_t::MAPPING: + return m_iterator_holder.mapping_iterator->second; + } -// #include + /// @brief A compound assignment operator by sum of the Iterator class. + /// @param i The difference from this Iterator object with which it moves forward. + /// @return Iterator& Reference to this Iterator object. + iterator& operator+=(difference_type i) noexcept { + switch (m_inner_iterator_type) { + case iterator_t::SEQUENCE: + std::advance(m_iterator_holder.sequence_iterator, i); + break; + case iterator_t::MAPPING: + std::advance(m_iterator_holder.mapping_iterator, i); + break; + } + return *this; + } -// #include -// _______ __ __ __ _____ __ __ __ -// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 -// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML -// -// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani -// SPDX-License-Identifier: MIT + /// @brief A plus operator of the iterator class. + /// @param i The difference from this iterator object. + /// @return iterator An iterator object which has been added @a i. + iterator operator+(difference_type i) const noexcept { + auto result = *this; + result += i; + return result; + } -#ifndef FK_YAML_DETAIL_NODE_REF_STORAGE_HPP -#define FK_YAML_DETAIL_NODE_REF_STORAGE_HPP + /// @brief An pre-increment operator of the iterator class. + /// @return iterator& Reference to this iterator object. + iterator& operator++() noexcept { + switch (m_inner_iterator_type) { + case iterator_t::SEQUENCE: + std::advance(m_iterator_holder.sequence_iterator, 1); + break; + case iterator_t::MAPPING: + std::advance(m_iterator_holder.mapping_iterator, 1); + break; + } + return *this; + } -#include -#include -#include + /// @brief A post-increment operator of the iterator class. + /// @return iterator An iterator object which has been incremented. + iterator operator++(int) & noexcept { + auto result = *this; + ++(*this); + return result; + } -// #include + /// @brief A compound assignment operator by difference of the iterator class. + /// @param i The difference from this iterator object with which it moves backward. + /// @return iterator& Reference to this iterator object. + iterator& operator-=(difference_type i) noexcept { + return operator+=(-i); + } + + /// @brief A minus operator of the iterator class. + /// @param i The difference from this iterator object. + /// @return iterator An iterator object from which has been subtracted @ i. + iterator operator-(difference_type i) const noexcept { + auto result = *this; + result -= i; + return result; + } + + /// @brief A pre-decrement operator of the iterator class. + /// @return iterator& Reference to this iterator object. + iterator& operator--() noexcept { + switch (m_inner_iterator_type) { + case iterator_t::SEQUENCE: + std::advance(m_iterator_holder.sequence_iterator, -1); + break; + case iterator_t::MAPPING: + std::advance(m_iterator_holder.mapping_iterator, -1); + break; + } + return *this; + } + + /// @brief A post-decrement operator of the iterator class + /// @return iterator An iterator object which has been decremented. + iterator operator--(int) & noexcept { + auto result = *this; + --(*this); + return result; + } -// #include + /// @brief An equal-to operator of the iterator class. + /// @param rhs An iterator object to be compared with this iterator object. + /// @return true This iterator object is equal to the other. + /// @return false This iterator object is not equal to the other. + template < + typename Iterator, + enable_if_t< + disjunction, std::is_same>::value, int> = 0> + bool operator==(const Iterator& rhs) const { + if FK_YAML_UNLIKELY (m_inner_iterator_type != rhs.m_inner_iterator_type) { + throw fkyaml::exception("Cannot compare iterators of different container types."); + } -// #include + if (m_inner_iterator_type == iterator_t::SEQUENCE) { + return (m_iterator_holder.sequence_iterator == rhs.m_iterator_holder.sequence_iterator); + } + // m_inner_iterator_type == iterator_t::MAPPING + return (m_iterator_holder.mapping_iterator == rhs.m_iterator_holder.mapping_iterator); + } -FK_YAML_DETAIL_NAMESPACE_BEGIN + /// @brief An not-equal-to operator of the iterator class. + /// @param rhs An iterator object to be compared with this iterator object. + /// @return true This iterator object is not equal to the other. + /// @return false This iterator object is equal to the other. + template < + typename Iterator, + enable_if_t< + disjunction, std::is_same>::value, int> = 0> + bool operator!=(const Iterator& rhs) const { + return !operator==(rhs); + } -/// @brief A temporal storage for basic_node class objects. -/// @note This class makes it easier to handle lvalue basic_node objects in basic_node ctor with std::initializer_list. -/// @tparam BasicNodeType A basic_node template instance type. -template -class node_ref_storage { - static_assert(is_basic_node::value, "node_ref_storage only accepts basic_node<...>"); + /// @brief A less-than operator of the iterator class. + /// @param rhs An iterator object to be compared with this iterator object. + /// @return true This iterator object is less than the other. + /// @return false This iterator object is not less than the other. + template < + typename Iterator, + enable_if_t< + disjunction, std::is_same>::value, int> = 0> + bool operator<(const Iterator& rhs) const { + if FK_YAML_UNLIKELY (m_inner_iterator_type != rhs.m_inner_iterator_type) { + throw fkyaml::exception("Cannot compare iterators of different container types."); + } - using node_type = BasicNodeType; + if FK_YAML_UNLIKELY (m_inner_iterator_type == iterator_t::MAPPING) { + throw fkyaml::exception("Cannot compare order of iterators of the mapping container type"); + } -public: - /// @brief Construct a new node ref storage object with an rvalue basic_node object. - /// @param n An rvalue basic_node object. - explicit node_ref_storage(node_type&& n) noexcept(std::is_nothrow_move_constructible::value) - : m_owned_value(std::move(n)) { + return (m_iterator_holder.sequence_iterator < rhs.m_iterator_holder.sequence_iterator); } - /// @brief Construct a new node ref storage object with an lvalue basic_node object. - /// @param n An lvalue basic_node object. - explicit node_ref_storage(const node_type& n) noexcept - : m_value_ref(&n) { + /// @brief A less-than-or-equal-to operator of the iterator class. + /// @param rhs An iterator object to be compared with this iterator object. + /// @return true This iterator object is either less than or equal to the other. + /// @return false This iterator object is neither less than nor equal to the other. + template < + typename Iterator, + enable_if_t< + disjunction, std::is_same>::value, int> = 0> + bool operator<=(const Iterator& rhs) const { + return !rhs.operator<(*this); } - /// @brief Construct a new node ref storage object with a std::initializer_list object. - /// @param init A std::initializer_list object. - node_ref_storage(std::initializer_list init) - : m_owned_value(init) { + /// @brief A greater-than operator of the iterator class. + /// @param rhs An iterator object to be compared with this iterator object. + /// @return true This iterator object is greater than the other. + /// @return false This iterator object is not greater than the other. + template < + typename Iterator, + enable_if_t< + disjunction, std::is_same>::value, int> = 0> + bool operator>(const Iterator& rhs) const { + return !operator<=(rhs); } - /// @brief Construct a new node ref storage object with variadic template arguments - /// @tparam Args Types of arguments to construct a basic_node object. - /// @param args Arguments to construct a basic_node object. - template ::value, int> = 0> - node_ref_storage(Args&&... args) - : m_owned_value(std::forward(args)...) { + /// @brief A greater-than-or-equal-to operator of the iterator class. + /// @param rhs An iterator object to be compared with this iterator object. + /// @return true This iterator object is either greater than or equal to the other. + /// @return false This iterator object is neither greater than nor equal to the other. + template < + typename Iterator, + enable_if_t< + disjunction, std::is_same>::value, int> = 0> + bool operator>=(const Iterator& rhs) const { + return !operator<(rhs); } - // allow only move construct/assignment - node_ref_storage(const node_ref_storage&) = delete; - node_ref_storage(node_ref_storage&&) = default; - node_ref_storage& operator=(const node_ref_storage&) = delete; - node_ref_storage& operator=(node_ref_storage&&) = default; +public: + /// @brief Get the type of the internal iterator implementation. + /// @return iterator_t The type of the internal iterator implementation. + iterator_t type() const noexcept { + return m_inner_iterator_type; + } - ~node_ref_storage() = default; + /// @brief Get the mapping key node of the current iterator. + /// @return The mapping key node of the current iterator. + const typename value_type::mapping_type::key_type& key() const { + if FK_YAML_UNLIKELY (m_inner_iterator_type == iterator_t::SEQUENCE) { + throw fkyaml::exception("Cannot retrieve key from non-mapping iterators."); + } -public: - /// @brief An arrow operator for node_ref_storage objects. - /// @return const node_type* A constant pointer to a basic_node object. - const node_type* operator->() const noexcept { - return m_value_ref ? m_value_ref : &m_owned_value; + return m_iterator_holder.mapping_iterator->first; } - /// @brief Releases a basic_node object internally held. - /// @return node_type A basic_node object internally held. - node_type release() const noexcept { - return m_value_ref ? *m_value_ref : std::move(m_owned_value); + /// @brief Get reference to the YAML node of the current iterator. + /// @return Reference to the YAML node of the current iterator. + reference value() const noexcept { + return operator*(); } private: - /// A storage for a basic_node object given with rvalue reference. - mutable node_type m_owned_value = nullptr; - /// A pointer to a basic_node object given with lvalue reference. - const node_type* m_value_ref = nullptr; + /// A type of the internally-held iterator. + iterator_t m_inner_iterator_type {iterator_t::SEQUENCE}; + /// A holder of actual iterators. + iterator_holder m_iterator_holder {}; }; +/// @brief Get reference to a mapping key node. +/// @tparam ValueType The iterator value type. +/// @tparam I The element index. +/// @param i An iterator object. +/// @return Reference to a mapping key node. +template = 0> +inline auto get(const iterator& i) -> decltype(i.key()) { + return i.key(); +} + +/// @brief Get reference to a mapping value node. +/// @tparam ValueType The iterator value type. +/// @tparam I The element index +/// @param i An iterator object. +/// @return Reference to a mapping value node. +template = 0> +inline auto get(const iterator& i) -> decltype(i.value()) { + return i.value(); +} + FK_YAML_DETAIL_NAMESPACE_END -#endif /* FK_YAML_DETAIL_NODE_REF_STORAGE_HPP */ +namespace std { -// #include -// _______ __ __ __ _____ __ __ __ -// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 -// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML -// -// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani -// SPDX-License-Identifier: MIT +#ifdef __clang__ +// clang emits warnings against mixed usage of class/struct for tuple_size/tuple_element. +// see also: https://groups.google.com/a/isocpp.org/g/std-discussion/c/QC-AMb5oO1w +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wmismatched-tags" +#endif -#ifndef FK_YAML_DETAIL_OUTPUT_SERIALIZER_HPP -#define FK_YAML_DETAIL_OUTPUT_SERIALIZER_HPP +/// @brief Partial specialization of std::tuple_size for iterator class. +/// @tparam ValueType The iterator value type. +template +// NOLINTNEXTLINE(cert-dcl58-cpp) +struct tuple_size<::fkyaml::detail::iterator> : integral_constant {}; -#include -#include -#include -#include +/// @brief Partial specialization of std::tuple_element for iterator class. +/// @tparam ValueType The iterator value type. +/// @tparam I The element index. +template +// NOLINTNEXTLINE(cert-dcl58-cpp) +struct tuple_element> { + using type = decltype(get(std::declval<::fkyaml::detail::iterator>())); +}; -// #include +#ifdef __clang__ +#pragma clang diagnostic pop +#endif -// #include +} // namespace std + +#endif /* FK_YAML_DETAIL_ITERATOR_HPP */ + +// #include // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library // | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 @@ -10660,471 +10701,295 @@ FK_YAML_DETAIL_NAMESPACE_END // SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani // SPDX-License-Identifier: MIT -#ifndef FK_YAML_DETAIL_CONVERSIONS_TO_STRING_HPP -#define FK_YAML_DETAIL_CONVERSIONS_TO_STRING_HPP - -#include -#include -#include -#include -#include +#ifndef FK_YAML_DETAIL_MAP_RANGE_PROXY_HPP +#define FK_YAML_DETAIL_MAP_RANGE_PROXY_HPP // #include -// #include - -// #include - - -FK_YAML_DETAIL_NAMESPACE_BEGIN - -/// @brief Converts a ValueType object to a string YAML token. -/// @tparam ValueType A source value type. -/// @tparam CharType The type of characters for the conversion result. -/// @param s A resulting output string. -/// @param v A source value. -template -inline void to_string(ValueType v, std::basic_string& s) noexcept; - -/// @brief Specialization of to_string() for null values. -/// @param s A resulting string YAML token. -/// @param (unused) nullptr -template <> -inline void to_string(std::nullptr_t /*unused*/, std::string& s) noexcept { - s = "null"; -} - -/// @brief Specialization of to_string() for booleans. -/// @param s A resulting string YAML token. -/// @param v A boolean source value. -template <> -inline void to_string(bool v, std::string& s) noexcept { - s = v ? "true" : "false"; -} - -/// @brief Specialization of to_string() for integers. -/// @tparam IntegerType An integer type. -/// @param s A resulting string YAML token. -/// @param i An integer source value. -template -inline enable_if_t::value> to_string(IntegerType v, std::string& s) noexcept { - s = std::to_string(v); -} +// #include -/// @brief Specialization of to_string() for floating point numbers. -/// @tparam FloatType A floating point number type. -/// @param s A resulting string YAML token. -/// @param f A floating point number source value. -template -inline enable_if_t::value> to_string(FloatType v, std::string& s) noexcept { - if (std::isnan(v)) { - s = ".nan"; - return; - } - if (std::isinf(v)) { - if (v == std::numeric_limits::infinity()) { - s = ".inf"; - } - else { - s = "-.inf"; - } - return; - } +FK_YAML_DETAIL_NAMESPACE_BEGIN - std::ostringstream oss; - oss << v; - s = oss.str(); +/// @brief A helper iterator class which wraps a mapping iterator object. +/// @tparam Iterator The base iterator type. +template +class map_iterator_proxy { +public: + /// @brief The type of the pointed-to elements by base iterators. + using value_type = Iterator; - // If `v` is actually an integer and no scientific notation is used for serialization, ".0" must be appended. - // The result would cause a roundtrip issue otherwise. https://github.com/fktn-k/fkYAML/issues/405 - const std::size_t pos = s.find_first_of(".e"); - if (pos == std::string::npos) { - s += ".0"; - } -} + /// @brief The type to represent difference between the pointed-to elements by base iterators. + using difference_type = std::ptrdiff_t; -FK_YAML_DETAIL_NAMESPACE_END + /// @brief The type of the pointed-to element references by base iterators. + using reference = value_type&; -#endif /* FK_YAML_DETAIL_CONVERSIONS_TO_STRING_HPP */ + /// @brief The type of the pointed-to element pointers by base iterators. + using pointer = value_type*; -// #include + /// @brief The iterator category. + using iterator_category = std::forward_iterator_tag; -// #include + /// @brief Constructs a map_iterator_proxy object. + map_iterator_proxy() = default; -// #include + /// @brief Constructs a map_iterator_proxy object with an Iterator object. + /// @param i A base iterator object. + map_iterator_proxy(const Iterator& i) noexcept + : m_base_iterator(i) { + } -// #include + /// @brief Copy constructs a map_iterator_proxy object. + map_iterator_proxy(const map_iterator_proxy&) = default; -// #include + /// @brief Copy assigns a map_iterator_proxy object. + map_iterator_proxy& operator=(const map_iterator_proxy&) = default; -// #include + /// @brief Move constructs a map_iterator_proxy object. + map_iterator_proxy(map_iterator_proxy&&) = default; + /// @brief Move assigns a map_iterator_proxy object. + map_iterator_proxy& operator=(map_iterator_proxy&&) = default; -FK_YAML_DETAIL_NAMESPACE_BEGIN + /// @brief Destructs a map_iterator_proxy object. + ~map_iterator_proxy() = default; -/// @brief A basic implementation of serialization feature for YAML nodes. -/// @tparam BasicNodeType A BasicNode template class instantiation. -template -class basic_serializer { - static_assert(detail::is_basic_node::value, "basic_serializer only accepts basic_node<...>"); + /// @brief Get reference to the base iterator object. + /// @return Reference to the base iterator object. + reference operator*() noexcept { + return m_base_iterator; + } -public: - /// @brief Construct a new basic_serializer object. - basic_serializer() = default; + /// @brief Get pointer to the base iterator object. + /// @return Pointer to the base iterator object. + pointer operator->() noexcept { + return &m_base_iterator; + } - /// @brief Serialize the given Node value. - /// @param node A Node object to be serialized. - /// @return std::string A serialization result of the given Node value. - std::string serialize(const BasicNodeType& node) { - std::string str {}; - serialize_document(node, str); - return str; - } // LCOV_EXCL_LINE + /// @brief Pre-increments the base iterator object. + /// @return Reference to this map_iterator_proxy object. + map_iterator_proxy& operator++() noexcept { + ++m_base_iterator; + return *this; + } - std::string serialize_docs(const std::vector& docs) { - std::string str {}; + /// @brief Post-increments the base iterator object. + /// @return A map_iterator_proxy object with its base iterator incremented. + map_iterator_proxy operator++(int) & noexcept { + auto result = *this; + ++(*this); + return result; + } - const auto size = static_cast(docs.size()); - for (uint32_t i = 0; i < size; i++) { - serialize_document(docs[i], str); - if (i + 1 < size) { - // Append the end-of-document marker for the next document. - str += "...\n"; - } - } + /// @brief Check equality between map_iterator_proxy objects. + /// @param rhs A map_iterator_proxy object to compare with. + /// @return true if this map_iterator_proxy object is equal to `rhs`, false otherwise. + bool operator==(const map_iterator_proxy& rhs) const noexcept { + return m_base_iterator == rhs.m_base_iterator; + } - return str; - } // LCOV_EXCL_LINE + /// @brief Check inequality between map_iterator_proxy objects. + /// @param rhs A map_iterator_proxy object to compare with. + /// @return true if this map_iterator_proxy object is not equal to `rhs`, false otherwise. + bool operator!=(const map_iterator_proxy& rhs) const noexcept { + return m_base_iterator != rhs.m_base_iterator; + } + + /// @brief Get the mapping key node pointed by the base iterator. + /// @return Reference to the mapping key node. + typename Iterator::reference key() const { + return m_base_iterator.key(); + } + + /// @brief Get the mapping value node pointed by the base iterator. + /// @return Reference to the mapping value node. + typename Iterator::reference value() const noexcept { + return m_base_iterator.value(); + } private: - void serialize_document(const BasicNodeType& node, std::string& str) { - const bool dirs_serialized = serialize_directives(node, str); + /// The base iterator object. + Iterator m_base_iterator {}; +}; - // the root node cannot be an alias node. - const bool root_has_props = node.is_anchor() || node.has_tag_name(); +/// @brief A helper struct which allows accessing node iterator member functions in range-based for loops. +/// @tparam BasicNodeType A basic_node template instance type. +template +class map_range_proxy { + static_assert( + is_basic_node::value, + "map_range_proxy only accepts a basic_node type as its template parameter."); - if (root_has_props) { - if (dirs_serialized) { - str.back() = ' '; // replace the last LF with a white space - } - bool is_anchor_appended = try_append_anchor(node, false, str); - try_append_tag(node, is_anchor_appended, str); - str += "\n"; - } - serialize_node(node, 0, str); +public: + /// @brief The type of non-const iterators. + using iterator = map_iterator_proxy::value, typename BasicNodeType::const_iterator, + typename BasicNodeType::iterator>::type>; + + /// @brief The type of const iterators. + using const_iterator = map_iterator_proxy; + + /// @brief Constructs a map_range_proxy object with a BasicNodeType object. + /// @param map A mapping node object. + map_range_proxy(BasicNodeType& map) noexcept + : mp_map(&map) { } - /// @brief Serialize the directives if any is applied to the node. - /// @param node The target node. - /// @param str A string to hold serialization result. - /// @return bool true if any directive is serialized, false otherwise. - bool serialize_directives(const BasicNodeType& node, std::string& str) { - const auto& p_meta = node.mp_meta; - bool needs_directive_end = false; + /// @brief Copy constructs a map_range_proxy object. + map_range_proxy(const map_range_proxy&) = default; - if (p_meta->is_version_specified) { - str += "%YAML "; - switch (p_meta->version) { - case yaml_version_type::VERSION_1_1: - str += "1.1\n"; - break; - case yaml_version_type::VERSION_1_2: - str += "1.2\n"; - break; - } - needs_directive_end = true; - } + /// @brief Copy assigns a map_range_proxy object. + /// @return Reference to this map_range_proxy object. + map_range_proxy& operator=(const map_range_proxy&) = default; - if (!p_meta->primary_handle_prefix.empty()) { - str += "%TAG ! "; - str += p_meta->primary_handle_prefix; - str += "\n"; - needs_directive_end = true; - } + /// @brief Move constructs a map_range_proxy object. + map_range_proxy(map_range_proxy&&) = default; - if (!p_meta->secondary_handle_prefix.empty()) { - str += "%TAG !! "; - str += p_meta->secondary_handle_prefix; - str += "\n"; - needs_directive_end = true; - } + /// @brief Move assigns a map_range_proxy object. + /// @return Reference to this map_range_proxy object. + map_range_proxy& operator=(map_range_proxy&&) = default; - if (!p_meta->named_handle_map.empty()) { - for (const auto& itr : p_meta->named_handle_map) { - str += "%TAG "; - str += itr.first; - str += " "; - str += itr.second; - str += "\n"; - } - needs_directive_end = true; - } + /// @brief Destructs a map_range_proxy object. + ~map_range_proxy() = default; - if (needs_directive_end) { - str += "---\n"; - } + /// @brief Get an iterator to the first element. + /// @return An iterator to the first element. + iterator begin() noexcept { + return {mp_map->begin()}; + } + + /// @brief Get a const iterator to the first element. + /// @return A const iterator to the first element. + const_iterator begin() const noexcept { + return {mp_map->cbegin()}; + } - return needs_directive_end; + /// @brief Get an iterator to the past-the-last element. + /// @return An iterator to the past-the-last element. + iterator end() noexcept { + return {mp_map->end()}; } - /// @brief Recursively serialize each Node object. - /// @param node A Node object to be serialized. - /// @param cur_indent The current indent width - /// @param str A string to hold serialization result. - void serialize_node(const BasicNodeType& node, const uint32_t cur_indent, std::string& str) { - switch (node.get_type()) { - case node_type::SEQUENCE: - if (node.size() == 0) { - str += "[]\n"; - return; - } - for (const auto& seq_item : node) { - insert_indentation(cur_indent, str); - str += "-"; + /// @brief Get a const iterator to the past-the-last element. + /// @return A const iterator to the past-the-last element. + const_iterator end() const noexcept { + return {mp_map->cend()}; + } - const bool is_appended = try_append_alias(seq_item, true, str); - if (is_appended) { - str += "\n"; - continue; - } +private: + /// Pointer to the mapping node object. (non-null) + BasicNodeType* mp_map {nullptr}; +}; - try_append_anchor(seq_item, true, str); - try_append_tag(seq_item, true, str); +FK_YAML_DETAIL_NAMESPACE_END - const bool is_scalar = seq_item.is_scalar(); - if (is_scalar) { - str += " "; - serialize_node(seq_item, cur_indent, str); - str += "\n"; - continue; - } +#endif /* FK_YAML_DETAIL_MAP_RANGE_PROXY_HPP */ - const bool is_empty = seq_item.empty(); - if (!is_empty) { - str += "\n"; - serialize_node(seq_item, cur_indent + 2, str); - continue; - } +// #include - // an empty sequence or mapping - if (seq_item.is_sequence()) { - str += " []\n"; - } - else /*seq_item.is_mapping()*/ { - str += " {}\n"; - } - } - break; - case node_type::MAPPING: - if (node.size() == 0) { - str += "{}\n"; - return; - } - for (auto itr : node.map_items()) { - insert_indentation(cur_indent, str); +// #include - // serialize a mapping key node. - const auto& key_node = itr.key(); +// #include - bool is_appended = try_append_alias(key_node, false, str); - if (is_appended) { - // The trailing white space is necessary since anchor names can contain a colon (:) at its end. - str += " "; - } - else { - const bool is_anchor_appended = try_append_anchor(key_node, false, str); - const bool is_tag_appended = try_append_tag(key_node, is_anchor_appended, str); - if (is_anchor_appended || is_tag_appended) { - str += " "; - } +// #include - const bool is_container = !key_node.is_scalar(); - if (is_container) { - str += "? "; - } - const auto indent = static_cast(get_cur_indent(str)); - serialize_node(key_node, indent, str); - if (is_container) { - // a newline code is already inserted in the above serialize_node() call. - insert_indentation(indent - 2, str); - } - } +// #include - str += ":"; +// #include +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library +// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani +// SPDX-License-Identifier: MIT - // serialize a mapping value node. - const auto& value_node = itr.value(); +#ifndef FK_YAML_DETAIL_NODE_REF_STORAGE_HPP +#define FK_YAML_DETAIL_NODE_REF_STORAGE_HPP - is_appended = try_append_alias(value_node, true, str); - if (is_appended) { - str += "\n"; - continue; - } +#include +#include +#include - try_append_anchor(value_node, true, str); - try_append_tag(value_node, true, str); +// #include - const bool is_scalar = itr->is_scalar(); - if (is_scalar) { - str += " "; - serialize_node(value_node, cur_indent, str); - str += "\n"; - continue; - } +// #include - const bool is_empty = itr->empty(); - if (is_empty) { - str += " "; - } - else { - str += "\n"; - } - serialize_node(value_node, cur_indent + 2, str); - } - break; - case node_type::NULL_OBJECT: - to_string(nullptr, m_tmp_str_buff); - str += m_tmp_str_buff; - break; - case node_type::BOOLEAN: - to_string(node.template get_value(), m_tmp_str_buff); - str += m_tmp_str_buff; - break; - case node_type::INTEGER: - to_string(node.template get_value(), m_tmp_str_buff); - str += m_tmp_str_buff; - break; - case node_type::FLOAT: - to_string(node.template get_value(), m_tmp_str_buff); - str += m_tmp_str_buff; - break; - case node_type::STRING: { - bool is_escaped = false; - auto str_val = get_string_node_value(node, is_escaped); +// #include - if (is_escaped) { - // There's no other token type with escapes than strings. - // Also, escapes must be in double-quoted strings. - str += '\"'; - str += str_val; - str += '\"'; - break; - } - // The next line is intentionally excluded from the LCOV coverage target since the next line is somehow - // misrecognized as it has a binary branch. Possibly begin() or end() has some conditional branch(es) - // internally. Confirmed with LCOV 1.14 on Ubuntu22.04. - const node_type type_if_plain = - scalar_scanner::scan(str_val.c_str(), str_val.c_str() + str_val.size()); // LCOV_EXCL_LINE +FK_YAML_DETAIL_NAMESPACE_BEGIN - if (type_if_plain != node_type::STRING) { - // Surround a string value with double quotes to keep semantic equality. - // Without them, serialized values will become non-string. (e.g., "1" -> 1) - str += '\"'; - str += str_val; - str += '\"'; - } - else { - str += str_val; - } - break; - } - } - } +/// @brief A temporal storage for basic_node class objects. +/// @note This class makes it easier to handle lvalue basic_node objects in basic_node ctor with std::initializer_list. +/// @tparam BasicNodeType A basic_node template instance type. +template +class node_ref_storage { + static_assert(is_basic_node::value, "node_ref_storage only accepts basic_node<...>"); - /// @brief Get the current indentation width. - /// @param s The target string object. - /// @return The current indentation width. - std::size_t get_cur_indent(const std::string& s) const noexcept { - const bool is_empty = s.empty(); - if (is_empty) { - return 0; - } + using node_type = BasicNodeType; - const std::size_t last_lf_pos = s.rfind('\n'); - return (last_lf_pos != std::string::npos) ? s.size() - last_lf_pos - 1 : s.size(); +public: + /// @brief Construct a new node ref storage object with an rvalue basic_node object. + /// @param n An rvalue basic_node object. + explicit node_ref_storage(node_type&& n) noexcept(std::is_nothrow_move_constructible::value) + : m_owned_value(std::move(n)) { } - /// @brief Insert indentation to the serialization result. - /// @param indent The indent width to be inserted. - /// @param str A string to hold serialization result. - void insert_indentation(const uint32_t indent, std::string& str) const noexcept { - if (indent == 0) { - return; - } - - str.append(indent - get_cur_indent(str), ' '); + /// @brief Construct a new node ref storage object with an lvalue basic_node object. + /// @param n An lvalue basic_node object. + explicit node_ref_storage(const node_type& n) noexcept + : m_value_ref(&n) { } - /// @brief Append an anchor property if it's available. Do nothing otherwise. - /// @param node The target node which is possibly an anchor node. - /// @param prepends_space Whether to prepend a space before an anchor property. - /// @param str A string to hold serialization result. - /// @return true if an anchor property has been appended, false otherwise. - bool try_append_anchor(const BasicNodeType& node, bool prepends_space, std::string& str) const { - if (node.is_anchor()) { - if (prepends_space) { - str += " "; - } - str += "&" + node.get_anchor_name(); - return true; - } - return false; + /// @brief Construct a new node ref storage object with a std::initializer_list object. + /// @param init A std::initializer_list object. + node_ref_storage(std::initializer_list init) + : m_owned_value(init) { } - /// @brief Append an alias property if it's available. Do nothing otherwise. - /// @param node The target node which is possibly an alias node. - /// @param prepends_space Whether to prepend a space before an alias property. - /// @param str A string to hold serialization result. - /// @return true if an alias property has been appended, false otherwise. - bool try_append_alias(const BasicNodeType& node, bool prepends_space, std::string& str) const { - if (node.is_alias()) { - if (prepends_space) { - str += " "; - } - str += "*" + node.get_anchor_name(); - return true; - } - return false; + /// @brief Construct a new node ref storage object with variadic template arguments + /// @tparam Args Types of arguments to construct a basic_node object. + /// @param args Arguments to construct a basic_node object. + template ::value, int> = 0> + node_ref_storage(Args&&... args) + : m_owned_value(std::forward(args)...) { } - /// @brief Append a tag name if it's available. Do nothing otherwise. - /// @param[in] node The target node which possibly has a tag name. - /// @param[out] str A string to hold serialization result. - /// @return true if a tag name has been appended, false otherwise. - bool try_append_tag(const BasicNodeType& node, bool prepends_space, std::string& str) const { - if (node.has_tag_name()) { - if (prepends_space) { - str += " "; - } - str += node.get_tag_name(); - return true; - } - return false; - } + // allow only move construct/assignment + node_ref_storage(const node_ref_storage&) = delete; + node_ref_storage(node_ref_storage&&) = default; + node_ref_storage& operator=(const node_ref_storage&) = delete; + node_ref_storage& operator=(node_ref_storage&&) = default; - /// @brief Get a string value from the given node and, if necessary, escape its contents. - /// @param[in] node The target string YAML node. - /// @param[out] is_escaped Whether the contents of an output string has been escaped. - /// @return The (escaped) string node value. - typename BasicNodeType::string_type get_string_node_value(const BasicNodeType& node, bool& is_escaped) { - FK_YAML_ASSERT(node.is_string()); + ~node_ref_storage() = default; - const auto& s = node.as_str(); - return yaml_escaper::escape(s.c_str(), s.c_str() + s.size(), is_escaped); - } // LCOV_EXCL_LINE +public: + /// @brief An arrow operator for node_ref_storage objects. + /// @return const node_type* A constant pointer to a basic_node object. + const node_type* operator->() const noexcept { + return m_value_ref ? m_value_ref : &m_owned_value; + } + + /// @brief Releases a basic_node object internally held. + /// @return node_type A basic_node object internally held. + node_type release() const noexcept { + return m_value_ref ? *m_value_ref : std::move(m_owned_value); + } private: - /// A temporal buffer for conversion from a scalar to a string. - std::string m_tmp_str_buff; + /// A storage for a basic_node object given with rvalue reference. + mutable node_type m_owned_value = nullptr; + /// A pointer to a basic_node object given with lvalue reference. + const node_type* m_value_ref = nullptr; }; FK_YAML_DETAIL_NAMESPACE_END -#endif /* FK_YAML_DETAIL_OUTPUT_SERIALIZER_HPP */ +#endif /* FK_YAML_DETAIL_NODE_REF_STORAGE_HPP */ -// #include +// #include // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library // | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 @@ -11133,313 +10998,490 @@ FK_YAML_DETAIL_NAMESPACE_END // SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani // SPDX-License-Identifier: MIT -#ifndef FK_YAML_DETAIL_REVERSE_ITERATOR_HPP -#define FK_YAML_DETAIL_REVERSE_ITERATOR_HPP +#ifndef FK_YAML_DETAIL_OUTPUT_SERIALIZER_HPP +#define FK_YAML_DETAIL_OUTPUT_SERIALIZER_HPP -#include +#include +#include +#include +#include // #include -// #include +// #include +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library +// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani +// SPDX-License-Identifier: MIT +#ifndef FK_YAML_DETAIL_CONVERSIONS_TO_STRING_HPP +#define FK_YAML_DETAIL_CONVERSIONS_TO_STRING_HPP -FK_YAML_DETAIL_NAMESPACE_BEGIN +#include +#include +#include +#include +#include -/// @brief An iterator adapter class that reverses the direction of a given node iterator. -/// @tparam Iterator The base iterator type. -template -class reverse_iterator { - static_assert( - is_basic_node::value, - "reverse_iterator only accepts a basic_node type as the underlying iterator's value type"); +// #include -public: - /// @brief The base iterator type. - using iterator_type = Iterator; +// #include - /// @brief The base iterator category. - using iterator_category = typename Iterator::iterator_category; +// #include - /// @brief The type of the pointed-to elements by base iterators. - using value_type = typename Iterator::value_type; - /// @brief The type to represent differences between the pointed-to elements by the base iterators. - using difference_type = typename Iterator::difference_type; +FK_YAML_DETAIL_NAMESPACE_BEGIN - /// @brief The type of the pointed-to element pointers by base iterators. - using pointer = typename Iterator::pointer; +/// @brief Converts a ValueType object to a string YAML token. +/// @tparam ValueType A source value type. +/// @tparam CharType The type of characters for the conversion result. +/// @param s A resulting output string. +/// @param v A source value. +template +inline void to_string(ValueType v, std::basic_string& s) noexcept; - /// @brief The type of the pointed-to element references by base iterators. - using reference = typename Iterator::reference; +/// @brief Specialization of to_string() for null values. +/// @param s A resulting string YAML token. +/// @param (unused) nullptr +template <> +inline void to_string(std::nullptr_t /*unused*/, std::string& s) noexcept { + s = "null"; +} - /// @brief Constructs a reverse_iterator object. - reverse_iterator() = default; +/// @brief Specialization of to_string() for booleans. +/// @param s A resulting string YAML token. +/// @param v A boolean source value. +template <> +inline void to_string(bool v, std::string& s) noexcept { + s = v ? "true" : "false"; +} - /// @brief Copy constructs a reverse_iterator object. - reverse_iterator(const reverse_iterator&) = default; +/// @brief Specialization of to_string() for integers. +/// @tparam IntegerType An integer type. +/// @param s A resulting string YAML token. +/// @param i An integer source value. +template +inline enable_if_t::value> to_string(IntegerType v, std::string& s) noexcept { + s = std::to_string(v); +} - /// @brief Copy assignments a reverse_iterator object. - reverse_iterator& operator=(const reverse_iterator&) = default; +/// @brief Specialization of to_string() for floating point numbers. +/// @tparam FloatType A floating point number type. +/// @param s A resulting string YAML token. +/// @param f A floating point number source value. +template +inline enable_if_t::value> to_string(FloatType v, std::string& s) noexcept { + if (std::isnan(v)) { + s = ".nan"; + return; + } - /// @brief Move constructs a reverse_iterator object. - reverse_iterator(reverse_iterator&&) = default; + if (std::isinf(v)) { + if (v == std::numeric_limits::infinity()) { + s = ".inf"; + } + else { + s = "-.inf"; + } + return; + } - /// @brief Move assignments a reverse_iterator object. - reverse_iterator& operator=(reverse_iterator&&) = default; + std::ostringstream oss; + oss << v; + s = oss.str(); - /// @brief Constructs a reverse_iterator object with an underlying iterator object. - /// @param i A base iterator object. - reverse_iterator(const Iterator& i) noexcept - : m_current(i) { + // If `v` is actually an integer and no scientific notation is used for serialization, ".0" must be appended. + // The result would cause a roundtrip issue otherwise. https://github.com/fktn-k/fkYAML/issues/405 + const std::size_t pos = s.find_first_of(".e"); + if (pos == std::string::npos) { + s += ".0"; } +} - /// @brief Copy constructs a reverse_iterator object with a compatible reverse_iterator object. - /// @tparam U A compatible iterator type with Iterator. - /// @param other A compatible reverse_iterator object. - template >::value, int> = 0> - reverse_iterator(const reverse_iterator& other) noexcept - : m_current(other.base()) { - } +FK_YAML_DETAIL_NAMESPACE_END - /// @brief Copy assigns a reverse_iterator object with a compatible reverse_iterator object. - /// @tparam U A compatible iterator type with Iterator. - /// @param other A compatible reverse_iterator object. - /// @return Reference to this reverse_iterator object. - template >::value, int> = 0> - reverse_iterator& operator=(const reverse_iterator& other) noexcept { - m_current = other.base(); - return *this; - } +#endif /* FK_YAML_DETAIL_CONVERSIONS_TO_STRING_HPP */ - /// @brief Destructs a reverse_iterator object. - ~reverse_iterator() = default; +// #include - /// @brief Accesses the underlying iterator object. - /// @return The underlying iterator object. - Iterator base() const noexcept { - return m_current; +// #include + +// #include + +// #include + +// #include + +// #include + + +FK_YAML_DETAIL_NAMESPACE_BEGIN + +/// @brief A basic implementation of serialization feature for YAML nodes. +/// @tparam BasicNodeType A BasicNode template class instantiation. +template +class basic_serializer { + static_assert(detail::is_basic_node::value, "basic_serializer only accepts basic_node<...>"); + +public: + /// @brief Construct a new basic_serializer object. + basic_serializer() = default; + + /// @brief Serialize the given Node value. + /// @param node A Node object to be serialized. + /// @return std::string A serialization result of the given Node value. + std::string serialize(const BasicNodeType& node) { + std::string str {}; + serialize_document(node, str); + return str; + } // LCOV_EXCL_LINE + + std::string serialize_docs(const std::vector& docs) { + std::string str {}; + + const auto size = static_cast(docs.size()); + for (uint32_t i = 0; i < size; i++) { + serialize_document(docs[i], str); + if (i + 1 < size) { + // Append the end-of-document marker for the next document. + str += "...\n"; + } + } + + return str; + } // LCOV_EXCL_LINE + +private: + void serialize_document(const BasicNodeType& node, std::string& str) { + const bool dirs_serialized = serialize_directives(node, str); + + // the root node cannot be an alias node. + const bool root_has_props = node.is_anchor() || node.has_tag_name(); + + if (root_has_props) { + if (dirs_serialized) { + str.back() = ' '; // replace the last LF with a white space + } + bool is_anchor_appended = try_append_anchor(node, false, str); + try_append_tag(node, is_anchor_appended, str); + str += "\n"; + } + serialize_node(node, 0, str); } - /// @brief Get reference to the pointed-to element. - /// @return Reference to the pointed-to element. - reference operator*() const noexcept { - Iterator tmp = m_current; - return *--tmp; - } + /// @brief Serialize the directives if any is applied to the node. + /// @param node The target node. + /// @param str A string to hold serialization result. + /// @return bool true if any directive is serialized, false otherwise. + bool serialize_directives(const BasicNodeType& node, std::string& str) { + const auto& p_meta = node.mp_meta; + bool needs_directive_end = false; - /// @brief Get pointer to the pointed-to element. - /// @return Pointer to the pointed-to element. - pointer operator->() const noexcept { - return &(operator*()); - } + if (p_meta->is_version_specified) { + str += "%YAML "; + switch (p_meta->version) { + case yaml_version_type::VERSION_1_1: + str += "1.1\n"; + break; + case yaml_version_type::VERSION_1_2: + str += "1.2\n"; + break; + } + needs_directive_end = true; + } - /// @brief Pre-increments the underlying iterator object. - /// @return Reference to this reverse_iterator object with its underlying iterator incremented. - reverse_iterator& operator++() noexcept { - --m_current; - return *this; - } + if (!p_meta->primary_handle_prefix.empty()) { + str += "%TAG ! "; + str += p_meta->primary_handle_prefix; + str += "\n"; + needs_directive_end = true; + } - /// @brief Post-increments the underlying iterator object. - /// @return A reverse_iterator object with the underlying iterator as-is. - reverse_iterator operator++(int) & noexcept { - auto result = *this; - --m_current; - return result; - } + if (!p_meta->secondary_handle_prefix.empty()) { + str += "%TAG !! "; + str += p_meta->secondary_handle_prefix; + str += "\n"; + needs_directive_end = true; + } - /// @brief Pre-decrements the underlying iterator object. - /// @return Reference to this reverse_iterator with its underlying iterator decremented. - reverse_iterator& operator--() noexcept { - ++m_current; - return *this; - } + if (!p_meta->named_handle_map.empty()) { + for (const auto& itr : p_meta->named_handle_map) { + str += "%TAG "; + str += itr.first; + str += " "; + str += itr.second; + str += "\n"; + } + needs_directive_end = true; + } - /// @brief Post-decrements the underlying iterator object. - /// @return A reverse_iterator object with the underlying iterator as-is. - reverse_iterator operator--(int) & noexcept { - auto result = *this; - ++m_current; - return result; - } + if (needs_directive_end) { + str += "---\n"; + } - /// @brief Advances the underlying iterator object by `n`. - /// @param n The distance by which the underlying iterator is advanced. - /// @return A reverse_iterator object with the underlying iterator advanced by `n`. - reverse_iterator operator+(difference_type n) const noexcept { - return reverse_iterator(m_current - n); + return needs_directive_end; } - /// @brief Advances the underlying iterator object by `n`. - /// @param n The distance by which the underlying iterator is advanced. - /// @return Reference to this reverse_iterator object with the underlying iterator advanced by `n`. - reverse_iterator& operator+=(difference_type n) noexcept { - m_current -= n; - return *this; - } + /// @brief Recursively serialize each Node object. + /// @param node A Node object to be serialized. + /// @param cur_indent The current indent width + /// @param str A string to hold serialization result. + void serialize_node(const BasicNodeType& node, const uint32_t cur_indent, std::string& str) { + switch (node.get_type()) { + case node_type::SEQUENCE: + if (node.size() == 0) { + str += "[]\n"; + return; + } + for (const auto& seq_item : node) { + insert_indentation(cur_indent, str); + str += "-"; - /// @brief Decrements the underlying iterator object by `n`. - /// @param n The distance by which the underlying iterator is decremented. - /// @return A reverse_iterator object with the underlying iterator decremented by `n`. - reverse_iterator operator-(difference_type n) const noexcept { - return reverse_iterator(m_current + n); - } + const bool is_appended = try_append_alias(seq_item, true, str); + if (is_appended) { + str += "\n"; + continue; + } - /// @brief Decrements the underlying iterator object by `n`. - /// @param n The distance by which the underlying iterator is decremented. - /// @return Reference to this reverse_iterator object with the underlying iterator decremented by `n`. - reverse_iterator& operator-=(difference_type n) noexcept { - m_current += n; - return *this; - } + try_append_anchor(seq_item, true, str); + try_append_tag(seq_item, true, str); - /// @brief Get the mapping key node of the underlying iterator. - /// @return The mapping key node of the underlying iterator. - auto key() const -> decltype(std::declval().key()) { - Iterator itr = --(base()); - return itr.key(); - } + const bool is_scalar = seq_item.is_scalar(); + if (is_scalar) { + str += " "; + serialize_node(seq_item, cur_indent, str); + str += "\n"; + continue; + } - /// @brief Get reference to the underlying iterator's value. - /// @return Reference to the underlying iterator's value. - reference value() noexcept { - Iterator itr = --(base()); - return *itr; - } + const bool is_empty = seq_item.empty(); + if (!is_empty) { + str += "\n"; + serialize_node(seq_item, cur_indent + 2, str); + continue; + } -private: - /// - Iterator m_current; -}; + // an empty sequence or mapping + if (seq_item.is_sequence()) { + str += " []\n"; + } + else /*seq_item.is_mapping()*/ { + str += " {}\n"; + } + } + break; + case node_type::MAPPING: + if (node.size() == 0) { + str += "{}\n"; + return; + } + for (auto itr : node.map_items()) { + insert_indentation(cur_indent, str); -/// @brief Check equality between reverse_iterator objects. -/// @tparam IteratorL Base iterator type for `lhs`. -/// @tparam IteratorR Base iterator type for `rhs`. -/// @param lhs A reverse_iterator object. -/// @param rhs A reverse_iterator object. -/// @return true if the two reverse_iterator objects are equal, false otherwise. -template -inline bool operator==(const reverse_iterator& lhs, const reverse_iterator& rhs) { - return lhs.base() == rhs.base(); -} + // serialize a mapping key node. + const auto& key_node = itr.key(); -/// @brief Check inequality between reverse_iterator objects. -/// @tparam IteratorL Base iterator type for `lhs`. -/// @tparam IteratorR Base iterator type for `rhs`. -/// @param lhs A reverse_iterator object. -/// @param rhs A reverse_iterator object. -/// @return true if the two reverse_iterator objects are not equal, false otherwise. -template -inline bool operator!=(const reverse_iterator& lhs, const reverse_iterator& rhs) { - return lhs.base() != rhs.base(); -} + bool is_appended = try_append_alias(key_node, false, str); + if (is_appended) { + // The trailing white space is necessary since anchor names can contain a colon (:) at its end. + str += " "; + } + else { + const bool is_anchor_appended = try_append_anchor(key_node, false, str); + const bool is_tag_appended = try_append_tag(key_node, is_anchor_appended, str); + if (is_anchor_appended || is_tag_appended) { + str += " "; + } -/// @brief Check if `lhs` is less than `rhs`. -/// @tparam IteratorL Base iterator type for `lhs`. -/// @tparam IteratorR Base iterator type for `rhs`. -/// @param lhs A reverse_iterator object. -/// @param rhs A reverse_iterator object. -/// @return true if `lhs` is less than `rhs`, false otherwise. -template -inline bool operator<(const reverse_iterator& lhs, const reverse_iterator& rhs) { - return lhs.base() > rhs.base(); -} + const bool is_container = !key_node.is_scalar(); + if (is_container) { + str += "? "; + } + const auto indent = static_cast(get_cur_indent(str)); + serialize_node(key_node, indent, str); + if (is_container) { + // a newline code is already inserted in the above serialize_node() call. + insert_indentation(indent - 2, str); + } + } -/// @brief Check if `lhs` is less than or equal to `rhs`. -/// @tparam IteratorL Base iterator type for `lhs`. -/// @tparam IteratorR Base iterator type for `rhs`. -/// @param lhs A reverse_iterator object. -/// @param rhs A reverse_iterator object. -/// @return true if `lhs` is less than or equal to `rhs`, false otherwise. -template -inline bool operator<=(const reverse_iterator& lhs, const reverse_iterator& rhs) { - return lhs.base() >= rhs.base(); -} + str += ":"; -/// @brief Check if `lhs` is greater than `rhs`. -/// @tparam IteratorL Base iterator type for `lhs`. -/// @tparam IteratorR Base iterator type for `rhs`. -/// @param lhs A reverse_iterator object. -/// @param rhs A reverse_iterator object. -/// @return true if `lhs` is greater than `rhs`, false otherwise. -template -inline bool operator>(const reverse_iterator& lhs, const reverse_iterator& rhs) { - return lhs.base() < rhs.base(); -} + // serialize a mapping value node. + const auto& value_node = itr.value(); -/// @brief Check if `lhs` is greater than or equal to `rhs`. -/// @tparam IteratorL Base iterator type for `lhs`. -/// @tparam IteratorR Base iterator type for `rhs`. -/// @param lhs A reverse_iterator object. -/// @param rhs A reverse_iterator object. -/// @return true if `lhs` is greater than or equal to `rhs`, false otherwise. -template -inline bool operator>=(const reverse_iterator& lhs, const reverse_iterator& rhs) { - return lhs.base() <= rhs.base(); -} + is_appended = try_append_alias(value_node, true, str); + if (is_appended) { + str += "\n"; + continue; + } -FK_YAML_DETAIL_NAMESPACE_END + try_append_anchor(value_node, true, str); + try_append_tag(value_node, true, str); -#endif /* FK_YAML_DETAIL_REVERSE_ITERATOR_HPP */ + const bool is_scalar = itr->is_scalar(); + if (is_scalar) { + str += " "; + serialize_node(value_node, cur_indent, str); + str += "\n"; + continue; + } -// #include + const bool is_empty = itr->empty(); + if (is_empty) { + str += " "; + } + else { + str += "\n"; + } + serialize_node(value_node, cur_indent + 2, str); + } + break; + case node_type::NULL_OBJECT: + to_string(nullptr, m_tmp_str_buff); + str += m_tmp_str_buff; + break; + case node_type::BOOLEAN: + to_string(node.template get_value(), m_tmp_str_buff); + str += m_tmp_str_buff; + break; + case node_type::INTEGER: + to_string(node.template get_value(), m_tmp_str_buff); + str += m_tmp_str_buff; + break; + case node_type::FLOAT: + to_string(node.template get_value(), m_tmp_str_buff); + str += m_tmp_str_buff; + break; + case node_type::STRING: { + bool is_escaped = false; + auto str_val = get_string_node_value(node, is_escaped); -// #include -// _______ __ __ __ _____ __ __ __ -// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 -// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML -// -// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani -// SPDX-License-Identifier: MIT + if (is_escaped) { + // There's no other token type with escapes than strings. + // Also, escapes must be in double-quoted strings. + str += '\"'; + str += str_val; + str += '\"'; + break; + } -#ifndef FK_YAML_DETAIL_TYPES_YAML_VERSION_T_HPP -#define FK_YAML_DETAIL_TYPES_YAML_VERSION_T_HPP + // The next line is intentionally excluded from the LCOV coverage target since the next line is somehow + // misrecognized as it has a binary branch. Possibly begin() or end() has some conditional branch(es) + // internally. Confirmed with LCOV 1.14 on Ubuntu22.04. + const node_type type_if_plain = + scalar_scanner::scan(str_val.c_str(), str_val.c_str() + str_val.size()); // LCOV_EXCL_LINE -#include + if (type_if_plain != node_type::STRING) { + // Surround a string value with double quotes to keep semantic equality. + // Without them, serialized values will become non-string. (e.g., "1" -> 1) + str += '\"'; + str += str_val; + str += '\"'; + } + else { + str += str_val; + } + break; + } + } + } -// #include + /// @brief Get the current indentation width. + /// @param s The target string object. + /// @return The current indentation width. + std::size_t get_cur_indent(const std::string& s) const noexcept { + const bool is_empty = s.empty(); + if (is_empty) { + return 0; + } -// #include + const std::size_t last_lf_pos = s.rfind('\n'); + return (last_lf_pos != std::string::npos) ? s.size() - last_lf_pos - 1 : s.size(); + } + /// @brief Insert indentation to the serialization result. + /// @param indent The indent width to be inserted. + /// @param str A string to hold serialization result. + void insert_indentation(const uint32_t indent, std::string& str) const noexcept { + if (indent == 0) { + return; + } -FK_YAML_DETAIL_NAMESPACE_BEGIN + str.append(indent - get_cur_indent(str), ' '); + } -/// @brief Definition of YAML version types. -enum class yaml_version_t : std::uint8_t { - VER_1_1, //!< YAML version 1.1 - VER_1_2, //!< YAML version 1.2 -}; + /// @brief Append an anchor property if it's available. Do nothing otherwise. + /// @param node The target node which is possibly an anchor node. + /// @param prepends_space Whether to prepend a space before an anchor property. + /// @param str A string to hold serialization result. + /// @return true if an anchor property has been appended, false otherwise. + bool try_append_anchor(const BasicNodeType& node, bool prepends_space, std::string& str) const { + if (node.is_anchor()) { + if (prepends_space) { + str += " "; + } + str += "&" + node.get_anchor_name(); + return true; + } + return false; + } -inline yaml_version_t convert_from_yaml_version_type(yaml_version_type t) noexcept { - switch (t) { - case yaml_version_type::VERSION_1_1: - return yaml_version_t::VER_1_1; - case yaml_version_type::VERSION_1_2: - default: - return yaml_version_t::VER_1_2; + /// @brief Append an alias property if it's available. Do nothing otherwise. + /// @param node The target node which is possibly an alias node. + /// @param prepends_space Whether to prepend a space before an alias property. + /// @param str A string to hold serialization result. + /// @return true if an alias property has been appended, false otherwise. + bool try_append_alias(const BasicNodeType& node, bool prepends_space, std::string& str) const { + if (node.is_alias()) { + if (prepends_space) { + str += " "; + } + str += "*" + node.get_anchor_name(); + return true; + } + return false; } -} -inline yaml_version_type convert_to_yaml_version_type(yaml_version_t t) noexcept { - switch (t) { - case yaml_version_t::VER_1_1: - return yaml_version_type::VERSION_1_1; - case yaml_version_t::VER_1_2: - default: - return yaml_version_type::VERSION_1_2; + /// @brief Append a tag name if it's available. Do nothing otherwise. + /// @param[in] node The target node which possibly has a tag name. + /// @param[out] str A string to hold serialization result. + /// @return true if a tag name has been appended, false otherwise. + bool try_append_tag(const BasicNodeType& node, bool prepends_space, std::string& str) const { + if (node.has_tag_name()) { + if (prepends_space) { + str += " "; + } + str += node.get_tag_name(); + return true; + } + return false; } -} -FK_YAML_DETAIL_NAMESPACE_END + /// @brief Get a string value from the given node and, if necessary, escape its contents. + /// @param[in] node The target string YAML node. + /// @param[out] is_escaped Whether the contents of an output string has been escaped. + /// @return The (escaped) string node value. + typename BasicNodeType::string_type get_string_node_value(const BasicNodeType& node, bool& is_escaped) { + FK_YAML_ASSERT(node.is_string()); -#endif /* FK_YAML_DETAIL_TYPES_YAML_VERSION_T_HPP */ + const auto& s = node.as_str(); + return yaml_escaper::escape(s.c_str(), s.c_str() + s.size(), is_escaped); + } // LCOV_EXCL_LINE -// #include +private: + /// A temporal buffer for conversion from a scalar to a string. + std::string m_tmp_str_buff; +}; -// #include +FK_YAML_DETAIL_NAMESPACE_END -// #include +#endif /* FK_YAML_DETAIL_OUTPUT_SERIALIZER_HPP */ + +// #include // _______ __ __ __ _____ __ __ __ // | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library // | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 @@ -11448,990 +11490,991 @@ FK_YAML_DETAIL_NAMESPACE_END // SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani // SPDX-License-Identifier: MIT -#ifndef FK_YAML_NODE_VALUE_CONVERTER_HPP -#define FK_YAML_NODE_VALUE_CONVERTER_HPP +#ifndef FK_YAML_DETAIL_REVERSE_ITERATOR_HPP +#define FK_YAML_DETAIL_REVERSE_ITERATOR_HPP -#include +#include // #include -// #include -// _______ __ __ __ _____ __ __ __ -// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 -// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML -// -// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani -// SPDX-License-Identifier: MIT +// #include -#ifndef FK_YAML_DETAIL_CONVERSIONS_FROM_NODE_HPP -#define FK_YAML_DETAIL_CONVERSIONS_FROM_NODE_HPP -#include -#include -#include -#include -#include -#include +FK_YAML_DETAIL_NAMESPACE_BEGIN -// #include +/// @brief An iterator adapter class that reverses the direction of a given node iterator. +/// @tparam Iterator The base iterator type. +template +class reverse_iterator { + static_assert( + is_basic_node::value, + "reverse_iterator only accepts a basic_node type as the underlying iterator's value type"); -// #include +public: + /// @brief The base iterator type. + using iterator_type = Iterator; -// #include + /// @brief The base iterator category. + using iterator_category = typename Iterator::iterator_category; -// #include + /// @brief The type of the pointed-to elements by base iterators. + using value_type = typename Iterator::value_type; + + /// @brief The type to represent differences between the pointed-to elements by the base iterators. + using difference_type = typename Iterator::difference_type; + + /// @brief The type of the pointed-to element pointers by base iterators. + using pointer = typename Iterator::pointer; + + /// @brief The type of the pointed-to element references by base iterators. + using reference = typename Iterator::reference; + + /// @brief Constructs a reverse_iterator object. + reverse_iterator() = default; -// #include + /// @brief Copy constructs a reverse_iterator object. + reverse_iterator(const reverse_iterator&) = default; -// #include + /// @brief Copy assignments a reverse_iterator object. + reverse_iterator& operator=(const reverse_iterator&) = default; + /// @brief Move constructs a reverse_iterator object. + reverse_iterator(reverse_iterator&&) = default; -#ifdef FK_YAML_HAS_CXX_17 -#include -#endif + /// @brief Move assignments a reverse_iterator object. + reverse_iterator& operator=(reverse_iterator&&) = default; -FK_YAML_DETAIL_NAMESPACE_BEGIN + /// @brief Constructs a reverse_iterator object with an underlying iterator object. + /// @param i A base iterator object. + reverse_iterator(const Iterator& i) noexcept + : m_current(i) { + } -/////////////////// -// from_node // -/////////////////// + /// @brief Copy constructs a reverse_iterator object with a compatible reverse_iterator object. + /// @tparam U A compatible iterator type with Iterator. + /// @param other A compatible reverse_iterator object. + template >::value, int> = 0> + reverse_iterator(const reverse_iterator& other) noexcept + : m_current(other.base()) { + } -// utility type traits and functors + /// @brief Copy assigns a reverse_iterator object with a compatible reverse_iterator object. + /// @tparam U A compatible iterator type with Iterator. + /// @param other A compatible reverse_iterator object. + /// @return Reference to this reverse_iterator object. + template >::value, int> = 0> + reverse_iterator& operator=(const reverse_iterator& other) noexcept { + m_current = other.base(); + return *this; + } -/// @brief Utility traits type alias to detect constructible associative container types from a mapping node, e.g., -/// std::map or std::unordered_map. -/// @tparam T A target type for detection. -template -using is_constructible_mapping_type = - conjunction, detect::has_mapped_type, detect::has_value_type>; + /// @brief Destructs a reverse_iterator object. + ~reverse_iterator() = default; -/// @brief Utility traits type alias to detect constructible container types from a sequence node, e.g., std::vector or -/// std::list. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam T A target type for detection. -template -using is_constructible_sequence_type = conjunction< - negation>, detect::has_iterator, detect::is_iterator_traits, - detect::has_begin_end, negation>, - negation>>; + /// @brief Accesses the underlying iterator object. + /// @return The underlying iterator object. + Iterator base() const noexcept { + return m_current; + } -/// @brief Utility traits type alias to detect a sequence container adapter type, e.g., std::stack or std::queue. -/// @tparam T A target type for detection. -template -using is_sequence_container_adapter = conjunction< - negation>, detect::has_container_type, detect::has_value_type, - negation>>; + /// @brief Get reference to the pointed-to element. + /// @return Reference to the pointed-to element. + reference operator*() const noexcept { + Iterator tmp = m_current; + return *--tmp; + } -/// @brief Helper struct for reserve() member function call switch for types which do not have reserve function. -/// @tparam ContainerType A container type. -template -struct call_reserve_if_available { - /// @brief Do nothing since ContainerType does not have reserve function. - static void call(ContainerType& /*unused*/, typename ContainerType::size_type /*unused*/) { + /// @brief Get pointer to the pointed-to element. + /// @return Pointer to the pointed-to element. + pointer operator->() const noexcept { + return &(operator*()); } -}; -/// @brief Helper struct for reserve() member function call switch for types which have reserve function. -/// @tparam ContainerType A container type. -template -struct call_reserve_if_available::value>> { - /// @brief Call reserve function on the ContainerType object with a given size. - /// @param c A container object. - /// @param n A size to reserve. - static void call(ContainerType& c, typename ContainerType::size_type n) { - c.reserve(n); + /// @brief Pre-increments the underlying iterator object. + /// @return Reference to this reverse_iterator object with its underlying iterator incremented. + reverse_iterator& operator++() noexcept { + --m_current; + return *this; } -}; -// from_node() implementations + /// @brief Post-increments the underlying iterator object. + /// @return A reverse_iterator object with the underlying iterator as-is. + reverse_iterator operator++(int) & noexcept { + auto result = *this; + --m_current; + return result; + } -/// @brief from_node function for C-style 1D arrays whose element type must be a basic_node template instance type or a -/// compatible type. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam T Element type of C-style 1D array. -/// @tparam N Size of the array. -/// @param n A basic_node object. -/// @param array An array object. -template -inline auto from_node(const BasicNodeType& n, T (&array)[N]) - -> decltype(n.get_value_inplace(std::declval()), void()) { - if FK_YAML_UNLIKELY (!n.is_sequence()) { - throw type_error("The target node value type is not sequence type.", n.get_type()); + /// @brief Pre-decrements the underlying iterator object. + /// @return Reference to this reverse_iterator with its underlying iterator decremented. + reverse_iterator& operator--() noexcept { + ++m_current; + return *this; } - // call get_value_inplace(), not get_value(), since the storage to fill the result into is already created. - for (std::size_t i = 0; i < N; i++) { - n.at(i).get_value_inplace(array[i]); + /// @brief Post-decrements the underlying iterator object. + /// @return A reverse_iterator object with the underlying iterator as-is. + reverse_iterator operator--(int) & noexcept { + auto result = *this; + ++m_current; + return result; } -} -/// @brief from_node function for C-style 2D arrays whose element type must be a basic_node template instance type or a -/// compatible type. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam T Element type of C-style 2D array. -/// @tparam N0 Size of the outer dimension. -/// @tparam N1 Size of the inner dimension. -/// @param n A basic_node object. -/// @param array An array object. -template -inline auto from_node(const BasicNodeType& n, T (&array)[N0][N1]) - -> decltype(n.get_value_inplace(std::declval()), void()) { - if FK_YAML_UNLIKELY (!n.is_sequence()) { - throw type_error("The target node value type is not sequence type.", n.get_type()); + /// @brief Advances the underlying iterator object by `n`. + /// @param n The distance by which the underlying iterator is advanced. + /// @return A reverse_iterator object with the underlying iterator advanced by `n`. + reverse_iterator operator+(difference_type n) const noexcept { + return reverse_iterator(m_current - n); } - // call get_value_inplace(), not get_value(), since the storage to fill the result into is already created. - for (std::size_t i0 = 0; i0 < N0; i0++) { - for (std::size_t i1 = 0; i1 < N1; i1++) { - n.at(i0).at(i1).get_value_inplace(array[i0][i1]); - } + /// @brief Advances the underlying iterator object by `n`. + /// @param n The distance by which the underlying iterator is advanced. + /// @return Reference to this reverse_iterator object with the underlying iterator advanced by `n`. + reverse_iterator& operator+=(difference_type n) noexcept { + m_current -= n; + return *this; } -} -/// @brief from_node function for C-style 2D arrays whose element type must be a basic_node template instance type or a -/// compatible type. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam T Element type of C-style 2D array. -/// @tparam N0 Size of the outermost dimension. -/// @tparam N1 Size of the middle dimension. -/// @tparam N2 Size of the innermost dimension. -/// @param n A basic_node object. -/// @param array An array object. -template -inline auto from_node(const BasicNodeType& n, T (&array)[N0][N1][N2]) - -> decltype(n.get_value_inplace(std::declval()), void()) { - if FK_YAML_UNLIKELY (!n.is_sequence()) { - throw type_error("The target node value type is not sequence type.", n.get_type()); + /// @brief Decrements the underlying iterator object by `n`. + /// @param n The distance by which the underlying iterator is decremented. + /// @return A reverse_iterator object with the underlying iterator decremented by `n`. + reverse_iterator operator-(difference_type n) const noexcept { + return reverse_iterator(m_current + n); } - // call get_value_inplace(), not get_value(), since the storage to fill the result into is already created. - for (std::size_t i0 = 0; i0 < N0; i0++) { - for (std::size_t i1 = 0; i1 < N1; i1++) { - for (std::size_t i2 = 0; i2 < N2; i2++) { - n.at(i0).at(i1).at(i2).get_value_inplace(array[i0][i1][i2]); - } - } + /// @brief Decrements the underlying iterator object by `n`. + /// @param n The distance by which the underlying iterator is decremented. + /// @return Reference to this reverse_iterator object with the underlying iterator decremented by `n`. + reverse_iterator& operator-=(difference_type n) noexcept { + m_current += n; + return *this; } -} -/// @brief from_node function for std::array objects whose element type must be a basic_node template instance type or a -/// compatible type. This function is necessary since insert function is not implemented for std::array. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam T Element type of std::array. -/// @tparam N Size of std::array. -/// @param n A basic_node object. -/// @param arr A std::array object. -template -inline auto from_node(const BasicNodeType& n, std::array& arr) - -> decltype(n.get_value_inplace(std::declval()), void()) { - if FK_YAML_UNLIKELY (!n.is_sequence()) { - throw type_error("The target node value type is not sequence type.", n.get_type()); + /// @brief Get the mapping key node of the underlying iterator. + /// @return The mapping key node of the underlying iterator. + auto key() const -> decltype(std::declval().key()) { + Iterator itr = --(base()); + return itr.key(); } - for (std::size_t i = 0; i < N; i++) { - // call get_value_inplace(), not get_value(), since the storage to fill the result into is already created. - n.at(i).get_value_inplace(arr.at(i)); + /// @brief Get reference to the underlying iterator's value. + /// @return Reference to the underlying iterator's value. + reference value() noexcept { + Iterator itr = --(base()); + return *itr; } + +private: + /// + Iterator m_current; +}; + +/// @brief Check equality between reverse_iterator objects. +/// @tparam IteratorL Base iterator type for `lhs`. +/// @tparam IteratorR Base iterator type for `rhs`. +/// @param lhs A reverse_iterator object. +/// @param rhs A reverse_iterator object. +/// @return true if the two reverse_iterator objects are equal, false otherwise. +template +inline bool operator==(const reverse_iterator& lhs, const reverse_iterator& rhs) { + return lhs.base() == rhs.base(); } -/// @brief from_node function for std::valarray objects whose element type must be a basic_node template instance type -/// or a compatible type. This function is necessary since insert function is not implemented for std::valarray. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam T Element type of std::valarray. -/// @param n A basic_node object. -/// @param va A std::valarray object. -template -inline auto from_node(const BasicNodeType& n, std::valarray& va) - -> decltype(n.get_value_inplace(std::declval()), void()) { - if FK_YAML_UNLIKELY (!n.is_sequence()) { - throw type_error("The target node value type is not sequence type.", n.get_type()); - } +/// @brief Check inequality between reverse_iterator objects. +/// @tparam IteratorL Base iterator type for `lhs`. +/// @tparam IteratorR Base iterator type for `rhs`. +/// @param lhs A reverse_iterator object. +/// @param rhs A reverse_iterator object. +/// @return true if the two reverse_iterator objects are not equal, false otherwise. +template +inline bool operator!=(const reverse_iterator& lhs, const reverse_iterator& rhs) { + return lhs.base() != rhs.base(); +} - std::size_t count = n.size(); - va.resize(count); - for (std::size_t i = 0; i < count; i++) { - // call get_value_inplace(), not get_value(), since the storage to fill the result into is already created. - n.at(i).get_value_inplace(va[i]); - } +/// @brief Check if `lhs` is less than `rhs`. +/// @tparam IteratorL Base iterator type for `lhs`. +/// @tparam IteratorR Base iterator type for `rhs`. +/// @param lhs A reverse_iterator object. +/// @param rhs A reverse_iterator object. +/// @return true if `lhs` is less than `rhs`, false otherwise. +template +inline bool operator<(const reverse_iterator& lhs, const reverse_iterator& rhs) { + return lhs.base() > rhs.base(); } -/// @brief from_node function for std::forward_list objects whose element type must be a basic_node template instance -/// type or a compatible type. This function is necessary since insert function is not implemented for -/// std::forward_list. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam T Element type of std::forward_list. -/// @tparam Alloc Allocator type of std::forward_list. -/// @param n A basic_node object. -/// @param fl A std::forward_list object. -template -inline auto from_node(const BasicNodeType& n, std::forward_list& fl) - -> decltype(n.template get_value(), void()) { - if FK_YAML_UNLIKELY (!n.is_sequence()) { - throw type_error("The target node value is not sequence type.", n.get_type()); - } +/// @brief Check if `lhs` is less than or equal to `rhs`. +/// @tparam IteratorL Base iterator type for `lhs`. +/// @tparam IteratorR Base iterator type for `rhs`. +/// @param lhs A reverse_iterator object. +/// @param rhs A reverse_iterator object. +/// @return true if `lhs` is less than or equal to `rhs`, false otherwise. +template +inline bool operator<=(const reverse_iterator& lhs, const reverse_iterator& rhs) { + return lhs.base() >= rhs.base(); +} - fl.clear(); +/// @brief Check if `lhs` is greater than `rhs`. +/// @tparam IteratorL Base iterator type for `lhs`. +/// @tparam IteratorR Base iterator type for `rhs`. +/// @param lhs A reverse_iterator object. +/// @param rhs A reverse_iterator object. +/// @return true if `lhs` is greater than `rhs`, false otherwise. +template +inline bool operator>(const reverse_iterator& lhs, const reverse_iterator& rhs) { + return lhs.base() < rhs.base(); +} - // std::forward_list does not have insert function. - auto insert_pos_itr = fl.before_begin(); - for (const auto& elem : n) { - insert_pos_itr = fl.emplace_after(insert_pos_itr, elem.template get_value()); - } +/// @brief Check if `lhs` is greater than or equal to `rhs`. +/// @tparam IteratorL Base iterator type for `lhs`. +/// @tparam IteratorR Base iterator type for `rhs`. +/// @param lhs A reverse_iterator object. +/// @param rhs A reverse_iterator object. +/// @return true if `lhs` is greater than or equal to `rhs`, false otherwise. +template +inline bool operator>=(const reverse_iterator& lhs, const reverse_iterator& rhs) { + return lhs.base() <= rhs.base(); } -/// @brief from_node function for container objects of only keys or values, e.g., std::vector or std::set, whose element -/// type must be a basic_node template instance type or a compatible type. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam CompatSeqType A container type. -/// @param n A basic_node object. -/// @param s A container object. -template < - typename BasicNodeType, typename CompatSeqType, - enable_if_t< - conjunction< - is_basic_node, is_constructible_sequence_type, - negation>>::value, - int> = 0> -inline auto from_node(const BasicNodeType& n, CompatSeqType& s) - -> decltype(n.template get_value(), void()) { - if FK_YAML_UNLIKELY (!n.is_sequence()) { - throw type_error("The target node value is not sequence type.", n.get_type()); - } +FK_YAML_DETAIL_NAMESPACE_END - s.clear(); +#endif /* FK_YAML_DETAIL_REVERSE_ITERATOR_HPP */ - // call reserve function first if it's available (like std::vector). - call_reserve_if_available::call(s, n.size()); +// #include - // transform a sequence node into a destination type object by calling insert function. - using std::end; - std::transform(n.begin(), n.end(), std::inserter(s, end(s)), [](const BasicNodeType& elem) { - return elem.template get_value(); - }); -} +// #include +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library +// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani +// SPDX-License-Identifier: MIT -/// @brief from_node function for sequence container adapter objects, e.g., std::stack or std::queue, whose element type -/// must be either a basic_node template instance type or a compatible type. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam SeqContainerAdapter A sequence container adapter type. -/// @param n A node object. -/// @param ca A sequence container adapter object. -template < - typename BasicNodeType, typename SeqContainerAdapter, - enable_if_t< - conjunction, is_sequence_container_adapter>::value, int> = 0> -inline auto from_node(const BasicNodeType& n, SeqContainerAdapter& ca) - -> decltype(n.template get_value(), ca.push(std::declval()), void()) { - if FK_YAML_UNLIKELY (!n.is_sequence()) { - throw type_error("The target node value is not sequence type.", n.get_type()); - } +#ifndef FK_YAML_DETAIL_TYPES_YAML_VERSION_T_HPP +#define FK_YAML_DETAIL_TYPES_YAML_VERSION_T_HPP - // clear existing elements manually since clear function is not implemented for container adapter classes. - while (!ca.empty()) { - ca.pop(); - } +#include - for (const auto& elem : n) { - // container adapter classes commonly have push function. - // emplace function cannot be used in case SeqContainerAdapter::container_type is std::vector in C++11. - ca.push(elem.template get_value()); - } -} +// #include -/// @brief from_node function for mappings whose key and value are of both compatible types. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam CompatibleKeyType Mapping key type compatible with BasicNodeType. -/// @tparam CompatibleValueType Mapping value type compatible with BasicNodeType. -/// @tparam Compare Comparator type for mapping keys. -/// @tparam Allocator Allocator type for destination mapping object. -/// @param n A node object. -/// @param m Mapping container object to store converted key/value objects. -template ::value, int> = 0> -inline auto from_node(const BasicNodeType& n, CompatMapType& m) - -> decltype( - std::declval().template get_value(), - std::declval().template get_value(), - m.emplace(std::declval(), std::declval()), - void()) { - if FK_YAML_UNLIKELY (!n.is_mapping()) { - throw type_error("The target node value type is not mapping type.", n.get_type()); - } +// #include - m.clear(); - call_reserve_if_available::call(m, n.size()); - for (const auto& pair : n.as_map()) { - m.emplace( - pair.first.template get_value(), - pair.second.template get_value()); - } -} +FK_YAML_DETAIL_NAMESPACE_BEGIN -/// @brief from_node function for nullptr. -/// @tparam BasicNodeType A basic_node template instance type. -/// @param n A node object. -/// @param null Storage for a null value. -template ::value, int> = 0> -inline void from_node(const BasicNodeType& n, std::nullptr_t& null) { - // to ensure the target node value type is null. - if FK_YAML_UNLIKELY (!n.is_null()) { - throw type_error("The target node value type is not null type.", n.get_type()); +/// @brief Definition of YAML version types. +enum class yaml_version_t : std::uint8_t { + VER_1_1, //!< YAML version 1.1 + VER_1_2, //!< YAML version 1.2 +}; + +inline yaml_version_t convert_from_yaml_version_type(yaml_version_type t) noexcept { + switch (t) { + case yaml_version_type::VERSION_1_1: + return yaml_version_t::VER_1_1; + case yaml_version_type::VERSION_1_2: + default: + return yaml_version_t::VER_1_2; } - null = nullptr; } -/// @brief from_node function for booleans. -/// @tparam BasicNodeType A basic_node template instance type. -/// @param n A node object. -/// @param b Storage for a boolean value. -template ::value, int> = 0> -inline void from_node(const BasicNodeType& n, bool& b) { - switch (n.get_type()) { - case node_type::NULL_OBJECT: - // nullptr is converted to false just as C++ implicitly does. - b = false; - break; - case node_type::BOOLEAN: - b = static_cast(n.as_bool()); - break; - case node_type::INTEGER: - // true: non-zero, false: zero - b = (n.as_int() != 0); - break; - case node_type::FLOAT: - // true: non-zero, false: zero - using float_type = typename BasicNodeType::float_number_type; - b = (n.as_float() != static_cast(0.)); - break; - case node_type::SEQUENCE: - case node_type::MAPPING: - case node_type::STRING: +inline yaml_version_type convert_to_yaml_version_type(yaml_version_t t) noexcept { + switch (t) { + case yaml_version_t::VER_1_1: + return yaml_version_type::VERSION_1_1; + case yaml_version_t::VER_1_2: default: - throw type_error("The target node value type is not compatible with boolean type.", n.get_type()); + return yaml_version_type::VERSION_1_2; } } -/// @brief Helper struct for node-to-int conversion. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam IntType Target integer value type (same as BasicNodeType::integer_type) -template < - typename BasicNodeType, typename IntType, bool = std::is_same::value> -struct from_node_int_helper { - /// @brief Convert node's integer value to the target integer type. - /// @param n A node object. - /// @return An integer value converted from the node's integer value. - static IntType convert(const BasicNodeType& n) { - return n.as_int(); - } -}; +FK_YAML_DETAIL_NAMESPACE_END + +#endif /* FK_YAML_DETAIL_TYPES_YAML_VERSION_T_HPP */ + +// #include + +// #include + +// #include +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library +// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani +// SPDX-License-Identifier: MIT + +#ifndef FK_YAML_NODE_VALUE_CONVERTER_HPP +#define FK_YAML_NODE_VALUE_CONVERTER_HPP + +#include + +// #include + +// #include +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library +// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani +// SPDX-License-Identifier: MIT + +#ifndef FK_YAML_DETAIL_CONVERSIONS_FROM_NODE_HPP +#define FK_YAML_DETAIL_CONVERSIONS_FROM_NODE_HPP + +#include +#include +#include +#include +#include +#include -/// @brief Helper struct for node-to-int conversion if IntType is not the node's integer value type. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam IntType Target integer value type (different from BasicNodeType::integer_type) -template -struct from_node_int_helper { - /// @brief Convert node's integer value to non-uint64_t integer types. - /// @param n A node object. - /// @return An integer value converted from the node's integer value. - static IntType convert(const BasicNodeType& n) { - using node_int_type = typename BasicNodeType::integer_type; - const node_int_type tmp_int = n.as_int(); +// #include - // under/overflow check. - if (std::is_same::value) { - if FK_YAML_UNLIKELY (tmp_int < 0) { - throw exception("Integer value underflow detected."); - } - } - else { - if FK_YAML_UNLIKELY (tmp_int < static_cast(std::numeric_limits::min())) { - throw exception("Integer value underflow detected."); - } - if FK_YAML_UNLIKELY (static_cast(std::numeric_limits::max()) < tmp_int) { - throw exception("Integer value overflow detected."); - } - } +// #include - return static_cast(tmp_int); - } -}; +// #include -/// @brief from_node function for integers. -/// @note If node's value is null, boolean, or float, such a value is converted into an integer internally. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam IntegerType An integer value type. -/// @param n A node object. -/// @param i Storage for an integer value. -template < - typename BasicNodeType, typename IntegerType, - enable_if_t, is_non_bool_integral>::value, int> = 0> -inline void from_node(const BasicNodeType& n, IntegerType& i) { - switch (n.get_type()) { - case node_type::NULL_OBJECT: - // nullptr is interpreted as 0 - i = static_cast(0); - break; - case node_type::BOOLEAN: - i = static_cast(n.as_bool()) ? static_cast(1) : static_cast(0); - break; - case node_type::INTEGER: - i = from_node_int_helper::convert(n); - break; - case node_type::FLOAT: { - // int64_t should be safe to express the integer part of possible floating point types. - const auto tmp_int = static_cast(n.as_float()); +// #include - // under/overflow check. - if (std::is_same::value) { - if FK_YAML_UNLIKELY (tmp_int < 0) { - throw exception("Integer value underflow detected."); - } - } - else { - if FK_YAML_UNLIKELY (tmp_int < static_cast(std::numeric_limits::min())) { - throw exception("Integer value underflow detected."); - } - if FK_YAML_UNLIKELY (static_cast(std::numeric_limits::max()) < tmp_int) { - throw exception("Integer value overflow detected."); - } - } +// #include - i = static_cast(tmp_int); - break; - } - case node_type::SEQUENCE: - case node_type::MAPPING: - case node_type::STRING: - default: - throw type_error("The target node value type is not compatible with integer type.", n.get_type()); - } -} +// #include -/// @brief Helper struct for node-to-float conversion if FloatType is the node's floating point value type. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam FloatType Target floating point value type (same as the BasicNodeType::float_number_type) -template < - typename BasicNodeType, typename FloatType, - bool = std::is_same::value> -struct from_node_float_helper { - /// @brief Convert node's floating point value to the target floating point type. - /// @param n A node object. - /// @return A floating point value converted from the node's floating point value. - static FloatType convert(const BasicNodeType& n) { - return n.as_float(); - } -}; -/// @brief Helper struct for node-to-float conversion if IntType is not the node's floating point value type. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam FloatType Target floating point value type (different from BasicNodeType::float_number_type) -template -struct from_node_float_helper { - /// @brief Convert node's floating point value to the target floating point type. - /// @param n A node object. - /// @return A floating point value converted from the node's floating point value. - static FloatType convert(const BasicNodeType& n) { - using node_float_type = typename BasicNodeType::float_number_type; - auto tmp_float = n.as_float(); +#ifdef FK_YAML_HAS_CXX_17 +#include +#endif - // check if the value is an infinite number (either positive or negative) - if (std::isinf(tmp_float)) { - if (tmp_float == std::numeric_limits::infinity()) { - return std::numeric_limits::infinity(); - } +FK_YAML_DETAIL_NAMESPACE_BEGIN - return static_cast(-1.) * std::numeric_limits::infinity(); - } +/////////////////// +// from_node // +/////////////////// - // check if the value is not a number - if (std::isnan(tmp_float)) { - return std::numeric_limits::quiet_NaN(); - } +// utility type traits and functors - // check if the value is expressible as FloatType. - if FK_YAML_UNLIKELY (tmp_float < std::numeric_limits::lowest()) { - throw exception("Floating point value underflow detected."); - } - if FK_YAML_UNLIKELY (std::numeric_limits::max() < tmp_float) { - throw exception("Floating point value overflow detected."); - } +/// @brief Utility traits type alias to detect constructible associative container types from a mapping node, e.g., +/// std::map or std::unordered_map. +/// @tparam T A target type for detection. +template +using is_constructible_mapping_type = + conjunction, detect::has_mapped_type, detect::has_value_type>; - return static_cast(tmp_float); +/// @brief Utility traits type alias to detect constructible container types from a sequence node, e.g., std::vector or +/// std::list. +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam T A target type for detection. +template +using is_constructible_sequence_type = conjunction< + negation>, detect::has_iterator, detect::is_iterator_traits, + detect::has_begin_end, negation>, + negation>>; + +/// @brief Utility traits type alias to detect a sequence container adapter type, e.g., std::stack or std::queue. +/// @tparam T A target type for detection. +template +using is_sequence_container_adapter = conjunction< + negation>, detect::has_container_type, detect::has_value_type, + negation>>; + +/// @brief Helper struct for reserve() member function call switch for types which do not have reserve function. +/// @tparam ContainerType A container type. +template +struct call_reserve_if_available { + /// @brief Do nothing since ContainerType does not have reserve function. + static void call(ContainerType& /*unused*/, typename ContainerType::size_type /*unused*/) { } }; -/// @brief from_node function for floating point values. -/// @note If node's value is null, boolean, or integer, such a value is converted into a floating point internally. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam FloatType A floating point value type. -/// @param n A node object. -/// @param f Storage for a float point value. -template < - typename BasicNodeType, typename FloatType, - enable_if_t, std::is_floating_point>::value, int> = 0> -inline void from_node(const BasicNodeType& n, FloatType& f) { - switch (n.get_type()) { - case node_type::NULL_OBJECT: - // nullptr is interpreted as 0.0 - f = static_cast(0.); - break; - case node_type::BOOLEAN: - f = static_cast(n.as_bool()) ? static_cast(1.) : static_cast(0.); - break; - case node_type::INTEGER: - f = static_cast(n.as_int()); - break; - case node_type::FLOAT: - f = from_node_float_helper::convert(n); - break; - case node_type::SEQUENCE: - case node_type::MAPPING: - case node_type::STRING: - default: - throw type_error("The target node value type is not compatible with float number type.", n.get_type()); +/// @brief Helper struct for reserve() member function call switch for types which have reserve function. +/// @tparam ContainerType A container type. +template +struct call_reserve_if_available::value>> { + /// @brief Call reserve function on the ContainerType object with a given size. + /// @param c A container object. + /// @param n A size to reserve. + static void call(ContainerType& c, typename ContainerType::size_type n) { + c.reserve(n); } -} +}; -/// @brief from_node function for BasicNodeType::string_type objects. +// from_node() implementations + +/// @brief from_node function for C-style 1D arrays whose element type must be a basic_node template instance type or a +/// compatible type. /// @tparam BasicNodeType A basic_node template instance type. +/// @tparam T Element type of C-style 1D array. +/// @tparam N Size of the array. /// @param n A basic_node object. -/// @param s A string node value object. -template ::value, int> = 0> -inline void from_node(const BasicNodeType& n, typename BasicNodeType::string_type& s) { - if FK_YAML_UNLIKELY (!n.is_string()) { - throw type_error("The target node value type is not string type.", n.get_type()); +/// @param array An array object. +template +inline auto from_node(const BasicNodeType& n, T (&array)[N]) + -> decltype(n.get_value_inplace(std::declval()), void()) { + if FK_YAML_UNLIKELY (!n.is_sequence()) { + throw type_error("The target node value type is not sequence type.", n.get_type()); } - s = n.as_str(); -} -/// @brief from_node function for compatible string type. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam CompatibleStringType A compatible string type. -/// @param n A basic_node object. -/// @param s A compatible string object. -template < - typename BasicNodeType, typename CompatibleStringType, - enable_if_t< - conjunction< - is_basic_node, - negation>, - disjunction< - std::is_constructible, - std::is_assignable>>::value, - int> = 0> -inline void from_node(const BasicNodeType& n, CompatibleStringType& s) { - if FK_YAML_UNLIKELY (!n.is_string()) { - throw type_error("The target node value type is not string type.", n.get_type()); + // call get_value_inplace(), not get_value(), since the storage to fill the result into is already created. + for (std::size_t i = 0; i < N; i++) { + n.at(i).get_value_inplace(array[i]); } - s = n.as_str(); } -/// @brief from_node function for std::pair objects whose element types must be either a basic_node template instance -/// type or a compatible type. +/// @brief from_node function for C-style 2D arrays whose element type must be a basic_node template instance type or a +/// compatible type. /// @tparam BasicNodeType A basic_node template instance type. -/// @tparam T The first type of the std::pair. -/// @tparam U The second type of the std::pair. +/// @tparam T Element type of C-style 2D array. +/// @tparam N0 Size of the outer dimension. +/// @tparam N1 Size of the inner dimension. /// @param n A basic_node object. -/// @param p A std::pair object. -template ::value, int> = 0> -inline auto from_node(const BasicNodeType& n, std::pair& p) - -> decltype(std::declval().template get_value(), std::declval().template get_value(), void()) { +/// @param array An array object. +template +inline auto from_node(const BasicNodeType& n, T (&array)[N0][N1]) + -> decltype(n.get_value_inplace(std::declval()), void()) { if FK_YAML_UNLIKELY (!n.is_sequence()) { throw type_error("The target node value type is not sequence type.", n.get_type()); } // call get_value_inplace(), not get_value(), since the storage to fill the result into is already created. - n.at(0).get_value_inplace(p.first); - n.at(1).get_value_inplace(p.second); + for (std::size_t i0 = 0; i0 < N0; i0++) { + for (std::size_t i1 = 0; i1 < N1; i1++) { + n.at(i0).at(i1).get_value_inplace(array[i0][i1]); + } + } } -/// @brief concrete implementation of from_node function for std::tuple objects. +/// @brief from_node function for C-style 2D arrays whose element type must be a basic_node template instance type or a +/// compatible type. /// @tparam BasicNodeType A basic_node template instance type. -/// @tparam ...Types The value types of std::tuple. -/// @tparam ...Idx Index sequence values for std::tuples value types. -/// @param n A basic_node object -/// @param _ Index sequence values (unused). -/// @return A std::tuple object converted from the sequence node values. -template -inline std::tuple from_node_tuple_impl(const BasicNodeType& n, index_sequence /*unused*/) { - return std::make_tuple(n.at(Idx).template get_value()...); +/// @tparam T Element type of C-style 2D array. +/// @tparam N0 Size of the outermost dimension. +/// @tparam N1 Size of the middle dimension. +/// @tparam N2 Size of the innermost dimension. +/// @param n A basic_node object. +/// @param array An array object. +template +inline auto from_node(const BasicNodeType& n, T (&array)[N0][N1][N2]) + -> decltype(n.get_value_inplace(std::declval()), void()) { + if FK_YAML_UNLIKELY (!n.is_sequence()) { + throw type_error("The target node value type is not sequence type.", n.get_type()); + } + + // call get_value_inplace(), not get_value(), since the storage to fill the result into is already created. + for (std::size_t i0 = 0; i0 < N0; i0++) { + for (std::size_t i1 = 0; i1 < N1; i1++) { + for (std::size_t i2 = 0; i2 < N2; i2++) { + n.at(i0).at(i1).at(i2).get_value_inplace(array[i0][i1][i2]); + } + } + } } -/// @brief from_node function for std::tuple objects whose value types must all be either a basic_node template instance -/// type or a compatible type +/// @brief from_node function for std::array objects whose element type must be a basic_node template instance type or a +/// compatible type. This function is necessary since insert function is not implemented for std::array. /// @tparam BasicNodeType A basic_node template instance type. -/// @tparam ...Types Value types of std::tuple. +/// @tparam T Element type of std::array. +/// @tparam N Size of std::array. /// @param n A basic_node object. -/// @param t A std::tuple object. -template ::value, int> = 0> -inline void from_node(const BasicNodeType& n, std::tuple& t) { +/// @param arr A std::array object. +template +inline auto from_node(const BasicNodeType& n, std::array& arr) + -> decltype(n.get_value_inplace(std::declval()), void()) { if FK_YAML_UNLIKELY (!n.is_sequence()) { throw type_error("The target node value type is not sequence type.", n.get_type()); } - // Types... must be explicitly specified; the return type would otherwise be std::tuple with no value types. - t = from_node_tuple_impl(n, index_sequence_for {}); + for (std::size_t i = 0; i < N; i++) { + // call get_value_inplace(), not get_value(), since the storage to fill the result into is already created. + n.at(i).get_value_inplace(arr.at(i)); + } } -#ifdef FK_YAML_HAS_CXX_17 - -/// @brief from_node function for std::optional objects whose value type must be either a basic_node template instance -/// type or a compatible type. +/// @brief from_node function for std::valarray objects whose element type must be a basic_node template instance type +/// or a compatible type. This function is necessary since insert function is not implemented for std::valarray. /// @tparam BasicNodeType A basic_node template instance type. -/// @tparam T A value type of the std::optional. +/// @tparam T Element type of std::valarray. /// @param n A basic_node object. -/// @param o A std::optional object. -template ::value, int> = 0> -inline auto from_node(const BasicNodeType& n, std::optional& o) -> decltype(n.template get_value(), void()) { - try { - o.emplace(n.template get_value()); +/// @param va A std::valarray object. +template +inline auto from_node(const BasicNodeType& n, std::valarray& va) + -> decltype(n.get_value_inplace(std::declval()), void()) { + if FK_YAML_UNLIKELY (!n.is_sequence()) { + throw type_error("The target node value type is not sequence type.", n.get_type()); } - catch (const std::exception& /*unused*/) { - // Any exception derived from std::exception is interpreted as a conversion failure in some way - // since user-defined from_node function may throw a different object from a fkyaml::type_error. - // and std::exception is usually the base class of user-defined exception types. - o = std::nullopt; + + std::size_t count = n.size(); + va.resize(count); + for (std::size_t i = 0; i < count; i++) { + // call get_value_inplace(), not get_value(), since the storage to fill the result into is already created. + n.at(i).get_value_inplace(va[i]); } } -#endif // defined(FK_YAML_HAS_CXX_17) - -/// @brief A function object to call from_node functions. -/// @note User-defined specialization is available by providing implementation **OUTSIDE** fkyaml namespace. -struct from_node_fn { - /// @brief Call from_node function suitable for the given T type. - /// @tparam BasicNodeType A basic_node template instance type. - /// @tparam T A target value type assigned from the basic_node object. - /// @param n A basic_node object. - /// @param val A target object assigned from the basic_node object. - /// @return decltype(from_node(n, std::forward(val))) void by default. User can set it to some other type. - template - auto operator()(const BasicNodeType& n, T&& val) const - noexcept(noexcept(from_node(n, std::forward(val)))) -> decltype(from_node(n, std::forward(val))) { - return from_node(n, std::forward(val)); +/// @brief from_node function for std::forward_list objects whose element type must be a basic_node template instance +/// type or a compatible type. This function is necessary since insert function is not implemented for +/// std::forward_list. +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam T Element type of std::forward_list. +/// @tparam Alloc Allocator type of std::forward_list. +/// @param n A basic_node object. +/// @param fl A std::forward_list object. +template +inline auto from_node(const BasicNodeType& n, std::forward_list& fl) + -> decltype(n.template get_value(), void()) { + if FK_YAML_UNLIKELY (!n.is_sequence()) { + throw type_error("The target node value is not sequence type.", n.get_type()); } -}; - -FK_YAML_DETAIL_NAMESPACE_END - -FK_YAML_NAMESPACE_BEGIN - -#ifndef FK_YAML_HAS_CXX_17 -// anonymous namespace to hold `from_node` functor. -// see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4381.html for why it's needed. -namespace // NOLINT(cert-dcl59-cpp,fuchsia-header-anon-namespaces,google-build-namespaces) -{ -#endif - -/// @brief A global object to represent ADL friendly from_node functor. -// NOLINTNEXTLINE(misc-definitions-in-headers) -FK_YAML_INLINE_VAR constexpr const auto& from_node = detail::static_const::value; - -#ifndef FK_YAML_HAS_CXX_17 -} // namespace -#endif - -FK_YAML_NAMESPACE_END - -#endif /* FK_YAML_DETAIL_CONVERSIONS_FROM_NODE_HPP */ -// #include -// _______ __ __ __ _____ __ __ __ -// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library -// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 -// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML -// -// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani -// SPDX-License-Identifier: MIT - -#ifndef FK_YAML_DETAIL_CONVERSIONS_TO_NODE_HPP -#define FK_YAML_DETAIL_CONVERSIONS_TO_NODE_HPP - -#include - -// #include - -// #include - -// #include + fl.clear(); -// #include + // std::forward_list does not have insert function. + auto insert_pos_itr = fl.before_begin(); + for (const auto& elem : n) { + insert_pos_itr = fl.emplace_after(insert_pos_itr, elem.template get_value()); + } +} -// #include +/// @brief from_node function for container objects of only keys or values, e.g., std::vector or std::set, whose element +/// type must be a basic_node template instance type or a compatible type. +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam CompatSeqType A container type. +/// @param n A basic_node object. +/// @param s A container object. +template < + typename BasicNodeType, typename CompatSeqType, + enable_if_t< + conjunction< + is_basic_node, is_constructible_sequence_type, + negation>>::value, + int> = 0> +inline auto from_node(const BasicNodeType& n, CompatSeqType& s) + -> decltype(n.template get_value(), void()) { + if FK_YAML_UNLIKELY (!n.is_sequence()) { + throw type_error("The target node value is not sequence type.", n.get_type()); + } -// #include + s.clear(); -// #include + // call reserve function first if it's available (like std::vector). + call_reserve_if_available::call(s, n.size()); + // transform a sequence node into a destination type object by calling insert function. + using std::end; + std::transform(n.begin(), n.end(), std::inserter(s, end(s)), [](const BasicNodeType& elem) { + return elem.template get_value(); + }); +} -FK_YAML_DETAIL_NAMESPACE_BEGIN +/// @brief from_node function for sequence container adapter objects, e.g., std::stack or std::queue, whose element type +/// must be either a basic_node template instance type or a compatible type. +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam SeqContainerAdapter A sequence container adapter type. +/// @param n A node object. +/// @param ca A sequence container adapter object. +template < + typename BasicNodeType, typename SeqContainerAdapter, + enable_if_t< + conjunction, is_sequence_container_adapter>::value, int> = 0> +inline auto from_node(const BasicNodeType& n, SeqContainerAdapter& ca) + -> decltype(n.template get_value(), ca.push(std::declval()), void()) { + if FK_YAML_UNLIKELY (!n.is_sequence()) { + throw type_error("The target node value is not sequence type.", n.get_type()); + } -/////////////////////////////////// -// external_node_constructor // -/////////////////////////////////// + // clear existing elements manually since clear function is not implemented for container adapter classes. + while (!ca.empty()) { + ca.pop(); + } -/// @brief The external constructor template for basic_node objects. -/// @note All the non-specialized instantiations results in compilation error since such instantiations are not -/// supported. -/// @warning All the specialization must call n.m_value.destroy() first in the construct function to avoid -/// memory leak. -/// @tparam node_type The resulting YAML node value type. -template -struct external_node_constructor { - template - static void sequence(BasicNodeType& n, Args&&... args) { - destroy(n); - n.m_attrs |= node_attr_bits::seq_bit; - n.m_value.p_seq = create_object(std::forward(args)...); + for (const auto& elem : n) { + // container adapter classes commonly have push function. + // emplace function cannot be used in case SeqContainerAdapter::container_type is std::vector in C++11. + ca.push(elem.template get_value()); } +} - template - static void mapping(BasicNodeType& n, Args&&... args) { - destroy(n); - n.m_attrs |= node_attr_bits::map_bit; - n.m_value.p_map = create_object(std::forward(args)...); +/// @brief from_node function for mappings whose key and value are of both compatible types. +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam CompatibleKeyType Mapping key type compatible with BasicNodeType. +/// @tparam CompatibleValueType Mapping value type compatible with BasicNodeType. +/// @tparam Compare Comparator type for mapping keys. +/// @tparam Allocator Allocator type for destination mapping object. +/// @param n A node object. +/// @param m Mapping container object to store converted key/value objects. +template ::value, int> = 0> +inline auto from_node(const BasicNodeType& n, CompatMapType& m) + -> decltype( + std::declval().template get_value(), + std::declval().template get_value(), + m.emplace(std::declval(), std::declval()), + void()) { + if FK_YAML_UNLIKELY (!n.is_mapping()) { + throw type_error("The target node value type is not mapping type.", n.get_type()); } - static void null_scalar(BasicNodeType& n, std::nullptr_t) { - destroy(n); - n.m_attrs |= node_attr_bits::null_bit; - n.m_value.p_map = nullptr; - } + m.clear(); + call_reserve_if_available::call(m, n.size()); - static void boolean_scalar(BasicNodeType& n, const typename BasicNodeType::boolean_type b) { - destroy(n); - n.m_attrs |= node_attr_bits::bool_bit; - n.m_value.boolean = b; + for (const auto& pair : n.as_map()) { + m.emplace( + pair.first.template get_value(), + pair.second.template get_value()); } +} - static void integer_scalar(BasicNodeType& n, const typename BasicNodeType::integer_type i) { - destroy(n); - n.m_attrs |= node_attr_bits::int_bit; - n.m_value.integer = i; +/// @brief from_node function for nullptr. +/// @tparam BasicNodeType A basic_node template instance type. +/// @param n A node object. +/// @param null Storage for a null value. +template ::value, int> = 0> +inline void from_node(const BasicNodeType& n, std::nullptr_t& null) { + // to ensure the target node value type is null. + if FK_YAML_UNLIKELY (!n.is_null()) { + throw type_error("The target node value type is not null type.", n.get_type()); } + null = nullptr; +} - static void float_scalar(BasicNodeType& n, const typename BasicNodeType::float_number_type f) { - destroy(n); - n.m_attrs |= node_attr_bits::float_bit; - n.m_value.float_val = f; +/// @brief from_node function for booleans. +/// @tparam BasicNodeType A basic_node template instance type. +/// @param n A node object. +/// @param b Storage for a boolean value. +template ::value, int> = 0> +inline void from_node(const BasicNodeType& n, bool& b) { + switch (n.get_type()) { + case node_type::NULL_OBJECT: + // nullptr is converted to false just as C++ implicitly does. + b = false; + break; + case node_type::BOOLEAN: + b = static_cast(n.as_bool()); + break; + case node_type::INTEGER: + // true: non-zero, false: zero + b = (n.as_int() != 0); + break; + case node_type::FLOAT: + // true: non-zero, false: zero + using float_type = typename BasicNodeType::float_number_type; + b = (n.as_float() != static_cast(0.)); + break; + case node_type::SEQUENCE: + case node_type::MAPPING: + case node_type::STRING: + default: + throw type_error("The target node value type is not compatible with boolean type.", n.get_type()); } +} - template - static void string_scalar(BasicNodeType& n, Args&&... args) { - destroy(n); - n.m_attrs |= node_attr_bits::string_bit; - n.m_value.p_str = create_object(std::forward(args)...); +/// @brief Helper struct for node-to-int conversion. +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam IntType Target integer value type (same as BasicNodeType::integer_type) +template < + typename BasicNodeType, typename IntType, bool = std::is_same::value> +struct from_node_int_helper { + /// @brief Convert node's integer value to the target integer type. + /// @param n A node object. + /// @return An integer value converted from the node's integer value. + static IntType convert(const BasicNodeType& n) { + return n.as_int(); } +}; -private: - static void destroy(BasicNodeType& n) { - n.m_value.destroy(n.m_attrs & node_attr_mask::value); - n.m_attrs &= ~node_attr_mask::value; +/// @brief Partial specialization for uint64_t when integer_type != uint64_t (the common int64_t case). +/// This must be declared BEFORE the generic specialization so the compiler always +/// prefers it for uint64_t. Using a hardcoded 'false' (not a value-dependent expression) avoids +/// the MSVC ambiguity that arises when std::is_same<...>::value is used as a template argument. +/// @tparam BasicNodeType A basic_node template instance type. +template +struct from_node_int_helper { + /// @brief Convert node's integer value to uint64_t via as_uint(). + /// @param n A node object. + /// @return The node value as uint64_t. + static uint64_t convert(const BasicNodeType& n) { + return n.as_uint(); } }; -///////////////// -// to_node // -///////////////// +/// @brief Helper struct for node-to-int conversion if IntType is not the node's integer value type +/// and IntType is not uint64_t (covered by the explicit specialization above). +/// @tparam BasicNodeType A basic_node template instance type. +/// @tparam IntType Target integer value type (different from BasicNodeType::integer_type, not uint64_t) +template +struct from_node_int_helper { + /// @brief Convert node's integer value to a narrower signed/unsigned integer type. + /// @param n A node object. + /// @return An integer value converted from the node's integer value. + static IntType convert(const BasicNodeType& n) { + using node_int_type = typename BasicNodeType::integer_type; + const node_int_type tmp_int = n.as_int(); -/// @brief to_node function for BasicNodeType::sequence_type objects. + if FK_YAML_UNLIKELY (tmp_int < static_cast(std::numeric_limits::min())) { + throw exception("Integer value underflow detected."); + } + if FK_YAML_UNLIKELY (static_cast(std::numeric_limits::max()) < tmp_int) { + throw exception("Integer value overflow detected."); + } + + return static_cast(tmp_int); + } +}; + +/// @brief from_node function for integers. +/// @note If node's value is null, boolean, or float, such a value is converted into an integer internally. /// @tparam BasicNodeType A basic_node template instance type. -/// @tparam T A sequence node value type. -/// @param n A basic_node object. -/// @param s A sequence node value object. +/// @tparam IntegerType An integer value type. +/// @param n A node object. +/// @param i Storage for an integer value. template < - typename BasicNodeType, typename T, - enable_if_t< - conjunction< - is_basic_node, - std::is_same>>::value, - int> = 0> -inline void to_node(BasicNodeType& n, T&& s) noexcept { - external_node_constructor::sequence(n, std::forward(s)); + typename BasicNodeType, typename IntegerType, + enable_if_t, is_non_bool_integral>::value, int> = 0> +inline void from_node(const BasicNodeType& n, IntegerType& i) { + switch (n.get_type()) { + case node_type::NULL_OBJECT: + // nullptr is interpreted as 0 + i = static_cast(0); + break; + case node_type::BOOLEAN: + i = static_cast(n.as_bool()) ? static_cast(1) : static_cast(0); + break; + case node_type::INTEGER: + i = from_node_int_helper::convert(n); + break; + case node_type::FLOAT: { + // int64_t should be safe to express the integer part of possible floating point types. + const auto tmp_int = static_cast(n.as_float()); + + // under/overflow check. + if (std::is_same::value) { + if FK_YAML_UNLIKELY (tmp_int < 0) { + throw exception("Integer value underflow detected."); + } + } + else { + if FK_YAML_UNLIKELY (tmp_int < static_cast(std::numeric_limits::min())) { + throw exception("Integer value underflow detected."); + } + if FK_YAML_UNLIKELY (static_cast(std::numeric_limits::max()) < tmp_int) { + throw exception("Integer value overflow detected."); + } + } + + i = static_cast(tmp_int); + break; + } + case node_type::SEQUENCE: + case node_type::MAPPING: + case node_type::STRING: + default: + throw type_error("The target node value type is not compatible with integer type.", n.get_type()); + } } -/// @brief to_node function for compatible sequence types. -/// @note This overload is enabled when -/// * both begin()/end() functions are callable on a `CompatSeqType` object -/// * CompatSeqType doesn't have `mapped_type` (mapping-like type) -/// * BasicNodeType::string_type cannot be constructed from a CompatSeqType object (string-like type) +/// @brief Helper struct for node-to-float conversion if FloatType is the node's floating point value type. /// @tparam BasicNodeType A basic_node template instance type. -/// @tparam CompatSeqType A container type. -/// @param n A basic_node object. -/// @param s A container object. +/// @tparam FloatType Target floating point value type (same as the BasicNodeType::float_number_type) template < - typename BasicNodeType, typename CompatSeqType, - enable_if_t< - conjunction< - is_basic_node, - negation>>, - negation>, detect::has_begin_end, - negation, detect::has_mapped_type>>, - negation>>::value, - int> = 0> -// NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) -inline void to_node(BasicNodeType& n, CompatSeqType&& s) { - using std::begin; - using std::end; - external_node_constructor::sequence(n, begin(s), end(s)); -} + typename BasicNodeType, typename FloatType, + bool = std::is_same::value> +struct from_node_float_helper { + /// @brief Convert node's floating point value to the target floating point type. + /// @param n A node object. + /// @return A floating point value converted from the node's floating point value. + static FloatType convert(const BasicNodeType& n) { + return n.as_float(); + } +}; -/// @brief to_node function for std::pair objects. +/// @brief Helper struct for node-to-float conversion if IntType is not the node's floating point value type. /// @tparam BasicNodeType A basic_node template instance type. -/// @tparam T The first type of std::pair. -/// @tparam U The second type of std::pair. -/// @param n A basic_node object. -/// @param p A std::pair object. -template -inline void to_node(BasicNodeType& n, const std::pair& p) { - n = {p.first, p.second}; -} +/// @tparam FloatType Target floating point value type (different from BasicNodeType::float_number_type) +template +struct from_node_float_helper { + /// @brief Convert node's floating point value to the target floating point type. + /// @param n A node object. + /// @return A floating point value converted from the node's floating point value. + static FloatType convert(const BasicNodeType& n) { + using node_float_type = typename BasicNodeType::float_number_type; + auto tmp_float = n.as_float(); + + // check if the value is an infinite number (either positive or negative) + if (std::isinf(tmp_float)) { + if (tmp_float == std::numeric_limits::infinity()) { + return std::numeric_limits::infinity(); + } + + return static_cast(-1.) * std::numeric_limits::infinity(); + } + + // check if the value is not a number + if (std::isnan(tmp_float)) { + return std::numeric_limits::quiet_NaN(); + } -/// @brief concrete implementation of to_node function for std::tuple objects. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam ...Types The value types of std::tuple. -/// @tparam ...Idx Index sequence values for std::tuple value types. -/// @param n A basic_node object. -/// @param t A std::tuple object. -/// @param _ An index sequence. (unused) -template -inline void to_node_tuple_impl(BasicNodeType& n, const std::tuple& t, index_sequence /*unused*/) { - n = {std::get(t)...}; -} + // check if the value is expressible as FloatType. + if FK_YAML_UNLIKELY (tmp_float < std::numeric_limits::lowest()) { + throw exception("Floating point value underflow detected."); + } + if FK_YAML_UNLIKELY (std::numeric_limits::max() < tmp_float) { + throw exception("Floating point value overflow detected."); + } -/// @brief to_node function for std::tuple objects with no value types. -/// @note This implementation is needed since calling `to_node_tuple_impl()` with an empty tuple creates a null node. + return static_cast(tmp_float); + } +}; + +/// @brief from_node function for floating point values. +/// @note If node's value is null, boolean, or integer, such a value is converted into a floating point internally. /// @tparam BasicNodeType A basic_node template instance type. -/// @param n A basic_node object. -/// @param _ A std::tuple object. (unused) -template -inline void to_node(BasicNodeType& n, const std::tuple<>& /*unused*/) { - n = BasicNodeType::sequence(); +/// @tparam FloatType A floating point value type. +/// @param n A node object. +/// @param f Storage for a float point value. +template < + typename BasicNodeType, typename FloatType, + enable_if_t, std::is_floating_point>::value, int> = 0> +inline void from_node(const BasicNodeType& n, FloatType& f) { + switch (n.get_type()) { + case node_type::NULL_OBJECT: + // nullptr is interpreted as 0.0 + f = static_cast(0.); + break; + case node_type::BOOLEAN: + f = static_cast(n.as_bool()) ? static_cast(1.) : static_cast(0.); + break; + case node_type::INTEGER: + f = static_cast(n.as_int()); + break; + case node_type::FLOAT: + f = from_node_float_helper::convert(n); + break; + case node_type::SEQUENCE: + case node_type::MAPPING: + case node_type::STRING: + default: + throw type_error("The target node value type is not compatible with float number type.", n.get_type()); + } } -/// @brief to_node function for std::tuple objects with at least one value type. +/// @brief from_node function for BasicNodeType::string_type objects. /// @tparam BasicNodeType A basic_node template instance type. -/// @tparam ...FirstType The first value types of std::tuple. -/// @tparam ...RestTypes The rest value types of std::tuple. (maybe empty) /// @param n A basic_node object. -/// @param t A std::tuple object. -template -inline void to_node(BasicNodeType& n, const std::tuple& t) { - to_node_tuple_impl(n, t, index_sequence_for {}); +/// @param s A string node value object. +template ::value, int> = 0> +inline void from_node(const BasicNodeType& n, typename BasicNodeType::string_type& s) { + if FK_YAML_UNLIKELY (!n.is_string()) { + throw type_error("The target node value type is not string type.", n.get_type()); + } + s = n.as_str(); } -/// @brief to_node function for BasicNodeType::mapping_type objects. +/// @brief from_node function for compatible string type. /// @tparam BasicNodeType A basic_node template instance type. -/// @tparam T A mapping node value type. +/// @tparam CompatibleStringType A compatible string type. /// @param n A basic_node object. -/// @param m A mapping node value object. +/// @param s A compatible string object. template < - typename BasicNodeType, typename T, + typename BasicNodeType, typename CompatibleStringType, enable_if_t< conjunction< - is_basic_node, std::is_same>>::value, + is_basic_node, + negation>, + disjunction< + std::is_constructible, + std::is_assignable>>::value, int> = 0> -inline void to_node(BasicNodeType& n, T&& m) noexcept { - external_node_constructor::mapping(n, std::forward(m)); +inline void from_node(const BasicNodeType& n, CompatibleStringType& s) { + if FK_YAML_UNLIKELY (!n.is_string()) { + throw type_error("The target node value type is not string type.", n.get_type()); + } + s = n.as_str(); } -/// @brief to_node function for compatible mapping types. -/// @note This overload is enabled when -/// * both begin()/end() functions are callable on a `CompatMapType` object -/// * CompatMapType has both `key_type` and `mapped_type` +/// @brief from_node function for std::pair objects whose element types must be either a basic_node template instance +/// type or a compatible type. /// @tparam BasicNodeType A basic_node template instance type. -/// @tparam CompatMapType A container type. +/// @tparam T The first type of the std::pair. +/// @tparam U The second type of the std::pair. /// @param n A basic_node object. -/// @param m A container object. -template < - typename BasicNodeType, typename CompatMapType, - enable_if_t< - conjunction< - is_basic_node, negation>, - negation>>, - detect::has_begin_end, detect::has_key_type, - detect::has_mapped_type>::value, - int> = 0> -inline void to_node(BasicNodeType& n, CompatMapType&& m) { - external_node_constructor::mapping(n); - auto& map = n.as_map(); - for (const auto& pair : std::forward(m)) { - map.emplace(pair.first, pair.second); +/// @param p A std::pair object. +template ::value, int> = 0> +inline auto from_node(const BasicNodeType& n, std::pair& p) + -> decltype(std::declval().template get_value(), std::declval().template get_value(), void()) { + if FK_YAML_UNLIKELY (!n.is_sequence()) { + throw type_error("The target node value type is not sequence type.", n.get_type()); } -} -/// @brief to_node function for null objects. -/// @tparam BasicNodeType A mapping node value type. -/// @tparam NullType This must be std::nullptr_t type -template ::value, int> = 0> -inline void to_node(BasicNodeType& n, std::nullptr_t /*unused*/) { - external_node_constructor::null_scalar(n, nullptr); + // call get_value_inplace(), not get_value(), since the storage to fill the result into is already created. + n.at(0).get_value_inplace(p.first); + n.at(1).get_value_inplace(p.second); } -/// @brief to_node function for BasicNodeType::boolean_type objects. +/// @brief concrete implementation of from_node function for std::tuple objects. /// @tparam BasicNodeType A basic_node template instance type. -/// @tparam T A boolean scalar node value type. -/// @param n A basic_node object. -/// @param b A boolean scalar node value object. -template ::value, int> = 0> -inline void to_node(BasicNodeType& n, typename BasicNodeType::boolean_type b) noexcept { - external_node_constructor::boolean_scalar(n, b); +/// @tparam ...Types The value types of std::tuple. +/// @tparam ...Idx Index sequence values for std::tuples value types. +/// @param n A basic_node object +/// @param _ Index sequence values (unused). +/// @return A std::tuple object converted from the sequence node values. +template +inline std::tuple from_node_tuple_impl(const BasicNodeType& n, index_sequence /*unused*/) { + return std::make_tuple(n.at(Idx).template get_value()...); } -/// @brief to_node function for integers. +/// @brief from_node function for std::tuple objects whose value types must all be either a basic_node template instance +/// type or a compatible type /// @tparam BasicNodeType A basic_node template instance type. -/// @tparam T An integer type. +/// @tparam ...Types Value types of std::tuple. /// @param n A basic_node object. -/// @param i An integer object. -template < - typename BasicNodeType, typename T, - enable_if_t, is_non_bool_integral>::value, int> = 0> -inline void to_node(BasicNodeType& n, T i) noexcept { - external_node_constructor::integer_scalar(n, i); -} +/// @param t A std::tuple object. +template ::value, int> = 0> +inline void from_node(const BasicNodeType& n, std::tuple& t) { + if FK_YAML_UNLIKELY (!n.is_sequence()) { + throw type_error("The target node value type is not sequence type.", n.get_type()); + } -/// @brief to_node function for floating point numbers. -/// @tparam BasicNodeType A basic_node template instance type. -/// @tparam T A floating point number type. -/// @param n A basic_node object. -/// @param f A floating point number object. -template < - typename BasicNodeType, typename T, - enable_if_t, std::is_floating_point>::value, int> = 0> -inline void to_node(BasicNodeType& n, T f) noexcept { - external_node_constructor::float_scalar(n, f); + // Types... must be explicitly specified; the return type would otherwise be std::tuple with no value types. + t = from_node_tuple_impl(n, index_sequence_for {}); } -/// @brief to_node function for compatible strings. +#ifdef FK_YAML_HAS_CXX_17 + +/// @brief from_node function for std::optional objects whose value type must be either a basic_node template instance +/// type or a compatible type. /// @tparam BasicNodeType A basic_node template instance type. -/// @tparam T A compatible string type. +/// @tparam T A value type of the std::optional. /// @param n A basic_node object. -/// @param s A compatible string object. -template < - typename BasicNodeType, typename T, - enable_if_t< - conjunction< - is_basic_node, negation>, - std::is_constructible>::value, - int> = 0> -inline void to_node(BasicNodeType& n, T&& s) { - external_node_constructor::string_scalar(n, std::forward(s)); +/// @param o A std::optional object. +template ::value, int> = 0> +inline auto from_node(const BasicNodeType& n, std::optional& o) -> decltype(n.template get_value(), void()) { + try { + o.emplace(n.template get_value()); + } + catch (const std::exception& /*unused*/) { + // Any exception derived from std::exception is interpreted as a conversion failure in some way + // since user-defined from_node function may throw a different object from a fkyaml::type_error. + // and std::exception is usually the base class of user-defined exception types. + o = std::nullopt; + } } -/// @brief A function object to call to_node functions. +#endif // defined(FK_YAML_HAS_CXX_17) + +/// @brief A function object to call from_node functions. /// @note User-defined specialization is available by providing implementation **OUTSIDE** fkyaml namespace. -struct to_node_fn { - /// @brief Call to_node function suitable for the given T type. +struct from_node_fn { + /// @brief Call from_node function suitable for the given T type. /// @tparam BasicNodeType A basic_node template instance type. - /// @tparam T A target value type assigned to the basic_node object. + /// @tparam T A target value type assigned from the basic_node object. /// @param n A basic_node object. - /// @param val A target object assigned to the basic_node object. - /// @return decltype(to_node(n, std::forward(val))) void by default. User can set it to some other type. + /// @param val A target object assigned from the basic_node object. + /// @return decltype(from_node(n, std::forward(val))) void by default. User can set it to some other type. template - auto operator()(BasicNodeType& n, T&& val) const - noexcept(noexcept(to_node(n, std::forward(val)))) -> decltype(to_node(n, std::forward(val))) { - return to_node(n, std::forward(val)); + auto operator()(const BasicNodeType& n, T&& val) const + noexcept(noexcept(from_node(n, std::forward(val)))) -> decltype(from_node(n, std::forward(val))) { + return from_node(n, std::forward(val)); } }; @@ -12440,15 +12483,15 @@ FK_YAML_DETAIL_NAMESPACE_END FK_YAML_NAMESPACE_BEGIN #ifndef FK_YAML_HAS_CXX_17 -// anonymous namespace to hold `to_node` functor. +// anonymous namespace to hold `from_node` functor. // see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4381.html for why it's needed. namespace // NOLINT(cert-dcl59-cpp,fuchsia-header-anon-namespaces,google-build-namespaces) { #endif -/// @brief A global object to represent ADL friendly to_node functor. +/// @brief A global object to represent ADL friendly from_node functor. // NOLINTNEXTLINE(misc-definitions-in-headers) -FK_YAML_INLINE_VAR constexpr const auto& to_node = detail::static_const::value; +FK_YAML_INLINE_VAR constexpr const auto& from_node = detail::static_const::value; #ifndef FK_YAML_HAS_CXX_17 } // namespace @@ -12456,7 +12499,9 @@ FK_YAML_INLINE_VAR constexpr const auto& to_node = detail::static_const FK_YAML_NAMESPACE_BEGIN @@ -14122,6 +14167,36 @@ class basic_node { throw fkyaml::type_error("The node value is not a boolean.", get_type()); } + /// @brief Checks if the node is an integer that was parsed from a uint64_t value exceeding INT64_MAX. + /// @return true if the node holds an unsigned integer, false otherwise. + bool is_uint() const noexcept { + return resolve_reference().is_uint_impl(); + } + + /// @brief Returns the integer node value as an unsigned 64-bit integer. + /// This is valid both for nodes where integer_type is unsigned and for nodes where a large + /// positive decimal scalar (> INT64_MAX) was stored with the uint_bit flag set. + /// @throw fkyaml::type_error if the node is not a compatible integer. + /// @return The node value as uint64_t. + uint64_t as_uint() const { + const basic_node& act_node = resolve_reference(); + if FK_YAML_LIKELY (act_node.is_integer_impl()) { + // When integer_type is unsigned the stored value IS the uint64_t directly. + if (std::is_unsigned::value) { + return static_cast(act_node.m_value.integer); + } + // When integer_type is signed, only uint_bit-marked nodes carry a uint64_t. + if (act_node.m_attrs & detail::node_attr_bits::uint_bit) { + return static_cast(act_node.m_value.integer); + } + // Signed values in the non-negative range can be returned safely. + if (act_node.m_value.integer >= static_cast(0)) { + return static_cast(act_node.m_value.integer); + } + } + throw fkyaml::type_error("The node value cannot be represented as an unsigned integer.", get_type()); + } + /// @brief Returns reference to the integer node value. /// @throw fkyaml::type_error The node value is not an integer. /// @return Reference to the integer node value. @@ -14129,18 +14204,30 @@ class basic_node { integer_type& as_int() { basic_node& act_node = resolve_reference(); if FK_YAML_LIKELY (act_node.is_integer_impl()) { + if FK_YAML_UNLIKELY (act_node.is_uint_impl()) { + throw fkyaml::type_error( + "The integer value exceeds INT64_MAX and cannot be returned as a signed integer. " + "Use as_uint() instead.", + get_type()); + } return act_node.m_value.integer; } throw fkyaml::type_error("The node value is not an integer.", get_type()); } /// @brief Returns reference to the integer node value. - /// @throw fkyaml::type_error The node value is not an integer. + /// @throw fkyaml::type_error The node value is not an integer, or exceeds INT64_MAX. /// @return Constant reference to the integer node value. /// @sa https://fktn-k.github.io/fkYAML/api/basic_node/as_int/ const integer_type& as_int() const { const basic_node& act_node = resolve_reference(); if FK_YAML_LIKELY (act_node.is_integer_impl()) { + if FK_YAML_UNLIKELY (act_node.is_uint_impl()) { + throw fkyaml::type_error( + "The integer value exceeds INT64_MAX and cannot be returned as a signed integer. " + "Use as_uint() instead.", + get_type()); + } return act_node.m_value.integer; } throw fkyaml::type_error("The node value is not an integer.", get_type()); @@ -14418,6 +14505,12 @@ class basic_node { return m_attrs & detail::node_attr_bits::int_bit; } + bool is_uint_impl() const noexcept { + // Both int_bit and uint_bit must be set: this node stores a uint64_t value + // whose bit pattern was placed into the signed integer_type field. + return (m_attrs & detail::node_attr_bits::int_bit) && (m_attrs & detail::node_attr_bits::uint_bit); + } + bool is_float_number_impl() const noexcept { return m_attrs & detail::node_attr_bits::float_bit; } diff --git a/tests/unit_test/CMakeLists.txt b/tests/unit_test/CMakeLists.txt index bca79e0e..ae2795b2 100644 --- a/tests/unit_test/CMakeLists.txt +++ b/tests/unit_test/CMakeLists.txt @@ -226,7 +226,9 @@ add_executable( test_iterator_class.cpp test_lexical_analyzer_class.cpp test_node_attrs.cpp + test_node_attrs_uint.cpp test_node_class.cpp + test_node_class_uint.cpp test_node_ref_storage_class.cpp test_node_type.cpp test_ordered_map_class.cpp @@ -234,6 +236,7 @@ add_executable( test_reverse_iterator_class.cpp test_scalar_conv.cpp test_scalar_parser_class.cpp + test_scalar_parser_class_uint.cpp test_scalar_scanner_class.cpp test_serializer_class.cpp test_str_view_class.cpp diff --git a/tests/unit_test/test_node_attrs_uint.cpp b/tests/unit_test/test_node_attrs_uint.cpp new file mode 100644 index 00000000..b360ef35 --- /dev/null +++ b/tests/unit_test/test_node_attrs_uint.cpp @@ -0,0 +1,61 @@ +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) +// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani +// SPDX-License-Identifier: MIT + +#include + +#include + +TEST_CASE("NodeAttrs_UintBit_IsNonZero") { + REQUIRE(fkyaml::detail::node_attr_bits::uint_bit != 0u); +} + +TEST_CASE("NodeAttrs_UintBit_LivesInStyleArea") { + // uint_bit must live in 0x00FF0000 so it is orthogonal to the value-type area. + REQUIRE((fkyaml::detail::node_attr_bits::uint_bit & fkyaml::detail::node_attr_mask::style) != 0u); + REQUIRE((fkyaml::detail::node_attr_bits::uint_bit & fkyaml::detail::node_attr_mask::value) == 0u); + REQUIRE((fkyaml::detail::node_attr_bits::uint_bit & fkyaml::detail::node_attr_mask::anchoring) == 0u); +} + +TEST_CASE("NodeAttrs_UintBit_NotInScalarBits") { + // scalar_bits is a type-classification mask. uint_bit is a metadata annotation + // on an INTEGER node, not a separate type. It must not pollute scalar_bits. + REQUIRE((fkyaml::detail::node_attr_bits::uint_bit & fkyaml::detail::node_attr_bits::scalar_bits) == 0u); +} + +TEST_CASE("NodeAttrs_UintBit_InteractsWithIntBit") { + fkyaml::detail::node_attr_t attrs = 0u; + + SECTION("both bits can be set simultaneously") { + attrs |= fkyaml::detail::node_attr_bits::int_bit; + attrs |= fkyaml::detail::node_attr_bits::uint_bit; + REQUIRE((attrs & fkyaml::detail::node_attr_bits::int_bit) != 0u); + REQUIRE((attrs & fkyaml::detail::node_attr_bits::uint_bit) != 0u); + } + + SECTION("clearing uint_bit leaves int_bit intact") { + attrs = fkyaml::detail::node_attr_bits::int_bit | fkyaml::detail::node_attr_bits::uint_bit; + attrs &= ~fkyaml::detail::node_attr_bits::uint_bit; + REQUIRE((attrs & fkyaml::detail::node_attr_bits::int_bit) != 0u); + REQUIRE((attrs & fkyaml::detail::node_attr_bits::uint_bit) == 0u); + } +} + +TEST_CASE("NodeAttrs_UintBit_FromNodeTypeNeverSetsIt") { + // from_node_type has no knowledge of uint_bit; it must not accidentally set it. + fkyaml::detail::node_attr_t bits = fkyaml::detail::node_attr_bits::from_node_type(fkyaml::node_type::INTEGER); + REQUIRE(bits == fkyaml::detail::node_attr_bits::int_bit); + REQUIRE((bits & fkyaml::detail::node_attr_bits::uint_bit) == 0u); +} + +TEST_CASE("NodeAttrs_UintBit_ToNodeTypeStillReturnsInteger") { + // uint_bit lives outside the value mask, so type classification must be unchanged + // when both int_bit and uint_bit are present. + const fkyaml::detail::node_attr_t attrs = + fkyaml::detail::node_attr_bits::int_bit | fkyaml::detail::node_attr_bits::uint_bit; + REQUIRE(fkyaml::detail::node_attr_bits::to_node_type(attrs) == fkyaml::node_type::INTEGER); +} diff --git a/tests/unit_test/test_node_class_uint.cpp b/tests/unit_test/test_node_class_uint.cpp new file mode 100644 index 00000000..f3a50e4c --- /dev/null +++ b/tests/unit_test/test_node_class_uint.cpp @@ -0,0 +1,139 @@ +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) +// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani +// SPDX-License-Identifier: MIT + +#include +#include +#include + +#include + +#include + +TEST_CASE("Node_IsUint_ReturnsFalseForNonIntegerTypes") { + SECTION("null") { + fkyaml::node n; + REQUIRE(n.is_uint() == false); + } + + SECTION("boolean") { + REQUIRE(fkyaml::node::deserialize("v: true")["v"].is_uint() == false); + } + + SECTION("float") { + REQUIRE(fkyaml::node::deserialize("v: 3.14")["v"].is_uint() == false); + } + + SECTION("string") { + REQUIRE(fkyaml::node::deserialize("v: hello")["v"].is_uint() == false); + } +} + +TEST_CASE("Node_IsUint_ReturnsFalseWithinSignedRange") { + auto input = GENERATE( + std::string("-9223372036854775808"), // INT64_MIN + std::string("-1"), + std::string("0"), + std::string("1"), + std::string("9223372036854775807")); // INT64_MAX + REQUIRE(fkyaml::node::deserialize("v: " + input)["v"].is_uint() == false); +} + +TEST_CASE("Node_IsUint_ReturnsTrueAboveSignedRange") { + auto input = GENERATE( + std::string("9223372036854775808"), // INT64_MAX + 1 + std::string("15745692345339290292"), // xxHash value from the bug report + std::string("18446744073709551615")); // UINT64_MAX + REQUIRE(fkyaml::node::deserialize("v: " + input)["v"].is_uint() == true); +} + +TEST_CASE("Node_AsUint_ReturnsCorrectValueForUInt64RangeNodes") { + using test_data_t = std::pair; + auto test_data = GENERATE( + test_data_t {"9223372036854775808", UINT64_C(9223372036854775808)}, + test_data_t {"15745692345339290292", UINT64_C(15745692345339290292)}, + test_data_t {"18446744073709551615", UINT64_C(18446744073709551615)}); + REQUIRE(fkyaml::node::deserialize("v: " + test_data.first)["v"].as_uint() == test_data.second); +} + +TEST_CASE("Node_AsUint_SucceedsForNonNegativeSignedIntegers") { + using test_data_t = std::pair; + auto test_data = GENERATE( + test_data_t {"0", 0ULL}, + test_data_t {"100", 100ULL}, + test_data_t {"9223372036854775807", static_cast(INT64_MAX)}); + REQUIRE(fkyaml::node::deserialize("v: " + test_data.first)["v"].as_uint() == test_data.second); +} + +TEST_CASE("Node_AsUint_ThrowsForNegativeIntegers") { + auto input = GENERATE(std::string("-1"), std::string("-42")); + REQUIRE_THROWS_AS(fkyaml::node::deserialize("v: " + input)["v"].as_uint(), fkyaml::type_error); +} + +TEST_CASE("Node_AsUint_ThrowsForNonIntegerNodes") { + SECTION("null") { + REQUIRE_THROWS_AS(fkyaml::node().as_uint(), fkyaml::type_error); + } + + SECTION("boolean") { + REQUIRE_THROWS_AS(fkyaml::node::deserialize("v: true")["v"].as_uint(), fkyaml::type_error); + } + + SECTION("float") { + REQUIRE_THROWS_AS(fkyaml::node::deserialize("v: 3.14")["v"].as_uint(), fkyaml::type_error); + } + + SECTION("string") { + REQUIRE_THROWS_AS(fkyaml::node::deserialize("v: hello")["v"].as_uint(), fkyaml::type_error); + } +} + +TEST_CASE("Node_GetValueUInt64_RoundTrips") { + using test_data_t = std::pair; + auto test_data = GENERATE( + test_data_t {"0", 0ULL}, + test_data_t {"15745692345339290292", UINT64_C(15745692345339290292)}, + test_data_t {"18446744073709551615", UINT64_MAX}); + REQUIRE(fkyaml::node::deserialize("v: " + test_data.first)["v"].get_value() == test_data.second); +} + +TEST_CASE("Node_GetValueUInt64_ThrowsForNegativeValue") { + REQUIRE_THROWS_AS(fkyaml::node::deserialize("v: -5")["v"].get_value(), fkyaml::exception); +} + +TEST_CASE("Node_AsInt_UnaffectedByUint64Change") { + // Regression: existing signed-integer behaviour must be preserved. + using test_data_t = std::pair; + auto test_data = GENERATE( + test_data_t {"-9223372036854775808", INT64_MIN}, + test_data_t {"-42", -42}, + test_data_t {"0", 0}, + test_data_t {"1234", 1234}, + test_data_t {"9223372036854775807", INT64_MAX}); + REQUIRE(fkyaml::node::deserialize("v: " + test_data.first)["v"].as_int() == test_data.second); +} + +TEST_CASE("Node_AsInt_ThrowsForUintFlaggedNode") { + // as_int() must throw type_error when uint_bit is set, because the stored + // bit-pattern is not representable as a signed int64_t. + auto input = GENERATE( + std::string("9223372036854775808"), // INT64_MAX + 1 + std::string("15745692345339290292"), // xxHash value from the bug report + std::string("18446744073709551615")); // UINT64_MAX + REQUIRE_THROWS_AS(fkyaml::node::deserialize("v: " + input)["v"].as_int(), fkyaml::type_error); +} + +TEST_CASE("Node_UintBit_ClearedOnReassignment") { + // After assigning a new signed integer value the uint_bit must be cleared. + fkyaml::node n = fkyaml::node::deserialize("v: 15745692345339290292")["v"]; + REQUIRE(n.is_uint() == true); + + n = fkyaml::node(static_cast(42)); + REQUIRE(n.is_integer() == true); + REQUIRE(n.is_uint() == false); + REQUIRE(n.as_int() == 42); +} diff --git a/tests/unit_test/test_scalar_parser_class_uint.cpp b/tests/unit_test/test_scalar_parser_class_uint.cpp new file mode 100644 index 00000000..bb8f73f9 --- /dev/null +++ b/tests/unit_test/test_scalar_parser_class_uint.cpp @@ -0,0 +1,66 @@ +// _______ __ __ __ _____ __ __ __ +// | __| |_/ | \_/ |/ _ \ / \/ \| | fkYAML: A C++ header-only YAML library (supporting code) +// | __| _ < \_ _/| ___ | _ | |___ version 0.4.2 +// |__| |_| \__| |_| |_| |_|___||___|______| https://github.com/fktn-k/fkYAML +// +// SPDX-FileCopyrightText: 2023-2025 Kensuke Fukutani +// SPDX-License-Identifier: MIT + +#include + +#include + +#include + +TEST_CASE("ScalarParser_UInt64_LargeDecimalParsedAsInteger") { + // Values above INT64_MAX must be stored as INTEGER nodes, not demoted to STRING. + auto input = GENERATE( + std::string("9223372036854775808"), // INT64_MAX + 1 + std::string("15745692345339290292"), // xxHash value from the bug report + std::string("18446744073709551615")); // UINT64_MAX + auto node = fkyaml::node::deserialize("v: " + input)["v"]; + REQUIRE(node.is_integer() == true); + REQUIRE(node.is_string() == false); +} + +TEST_CASE("ScalarParser_UInt64_LargeDecimalSetsUintBit") { + // The uint_bit flag must be set for any value that overflows int64_t. + auto input = GENERATE( + std::string("9223372036854775808"), std::string("15745692345339290292"), std::string("18446744073709551615")); + auto node = fkyaml::node::deserialize("v: " + input)["v"]; + REQUIRE(node.is_uint() == true); +} + +TEST_CASE("ScalarParser_UInt64_AsUintRecoversPreciseValue") { + using test_data_t = std::pair; + auto test_data = GENERATE( + test_data_t {"9223372036854775808", UINT64_C(9223372036854775808)}, + test_data_t {"15745692345339290292", UINT64_C(15745692345339290292)}, + test_data_t {"18446744073709551615", UINT64_C(18446744073709551615)}); + auto node = fkyaml::node::deserialize("v: " + test_data.first)["v"]; + REQUIRE(node.as_uint() == test_data.second); + REQUIRE(node.get_value() == test_data.second); +} + +TEST_CASE("ScalarParser_UInt64_SignedRangeNoUintBit") { + // Values within int64_t range must NOT have uint_bit set. + auto input = GENERATE( + std::string("-1"), + std::string("0"), + std::string("42"), + std::string("9223372036854775807")); // INT64_MAX + auto node = fkyaml::node::deserialize("v: " + input)["v"]; + REQUIRE(node.is_integer() == true); + REQUIRE(node.is_uint() == false); +} + +TEST_CASE("ScalarParser_UInt64_ExplicitIntTagOnOverflowThrows") { + // An explicit !!int tag must not silently demote to string on overflow. + REQUIRE_THROWS_AS(fkyaml::node::deserialize("v: !!int 99999999999999999999999"), fkyaml::parse_error); +} + +TEST_CASE("ScalarParser_UInt64_BeyondUint64MaxBecomesString") { + // Without a tag, a decimal that exceeds UINT64_MAX falls back to string. + auto node = fkyaml::node::deserialize("v: 99999999999999999999999")["v"]; + REQUIRE(node.is_string() == true); +}