46typedef unsigned T_Encoding;
47#elif defined(__unix__)
48typedef const char *
const *T_Encoding;
57 C_UnicodeIn(std::string_view sv, T_Encoding codepage =0);
58 C_UnicodeIn(std::string &&s, T_Encoding codepage =0) =
delete;
60 C_UnicodeIn(std::istream &in, T_Encoding codepage =0);
68 T_Encoding
encoding() const noexcept {
return m_CodePage; }
79 const char *buffer() const noexcept;
80 T_Utf16 getUtf16(
size_t pos,
bool reverseWord =false) const;
81 void pop(
size_t bytes);
82 void read(
size_t bytes);
84 size_t size() const noexcept;
90 std::
string m_ReadBuf;
98 T_Encoding m_CodePage;
100 iconv_t m_iconv{(iconv_t)-1};
109 void readReverseUTF16();
110 bool readUTF16(C_Source &src,
bool reverseWord);
113 void setCodePage(T_Encoding cp);
114 bool testCodePage(T_Encoding cp);
125 typedef void (*F_PushCh)(std::string &dst,
char c);
128 C_MBCStr(T_Encoding codepage = 0) noexcept: m_codepage(codepage) {}
133 void operator=(
C_MBCStr &&other)
noexcept;
135 C_MBCStr(std::string_view s, T_Encoding codepage = 0) noexcept: m_str(s), m_codepage(codepage) {}
136 void operator +=(std::string_view s);
138 template<
typename T>
C_MBCStr(
const T *ps,
size_t size = 0, T_Encoding codepage = 0): m_codepage(codepage)
140 template<
typename T>
void operator +=(
const T *ps)
143 template<
typename T>
C_MBCStr(std::basic_string_view<T> s, T_Encoding codepage = 0): m_codepage(codepage)
144 {
append(s.data(), s.size()); }
145 template<
typename T>
void operator +=(
const std::basic_string<T> &s)
146 {
append(s.data(), s.size()); }
148 void append(
const char *src,
size_t srcBytes);
153 size = std::char_traits<T>::length(ps);
155 append(
reinterpret_cast<const char*
>(ps), size*
sizeof(T));
158 bool empty() const noexcept;
159 const std::
string &escape(F_PushCh pushCh) const;
160 const std::
string &escJSON() const;
161 const std::
string &strU8() const;
166 std::vector<
T_Utf32> mutable m_u32s;
167 std::
string mutable m_str;
168 F_PushCh mutable m_pushCh{};
169 T_Encoding m_codepage{};
172 void appendNonRaw(
const char *src,
size_t srcBytes)
const;
173 void appendStr(
T_Utf32 u32)
const;
182std::string
to_utf8(std::string_view s, T_Encoding codepage = 0);
183std::string
to_utf8(std::istream &s, T_Encoding codepage = 0);
185auto to_utf8(
const T *ps,
size_t size = 0, T_Encoding codepage = 0) {
return C_MBCStr{ps, size, codepage}.
strU8(); }
189std::wstring
BOM(
const std::wstring_view &ws);
C_MBCStr(const T *ps, size_t size=0, T_Encoding codepage=0)
C_MBCStr(std::basic_string_view< T > s, T_Encoding codepage=0)
void append(const T *ps, size_t size)
C_MBCStr(const C_MBCStr &)=delete
C_MBCStr & operator=(const C_MBCStr &)=delete
const std::string & strU8() const
C_MBCStr(T_Encoding codepage=0) noexcept
C_MBCStr(std::string_view s, T_Encoding codepage=0) noexcept
C_UnicodeIn(std::string &&s, T_Encoding codepage=0)=delete
C_UnicodeIn(const char *s, T_Encoding codepage=0)
T_Encoding encoding() const noexcept
C_UnicodeIn(FH_ReadChar &&readc, T_Encoding codepage=0)
int lastError() const noexcept
THE common namespace of bux library.
std::string_view to_utf8(T_Utf32 uc)
std::wstring BOM(const std::wstring &ws)
std::function< std::optional< char >()> FH_ReadChar
std::uint16_t T_Utf16
UTF-16: You need T_Utf16[2] to hold full range of unicode.
std::uint8_t T_Utf8
UTF-8: You need T_Utf8[4] to hold full range of unicode.
@ UIE_NO_UNICODE_TRANSLATION
void append(const T &src, std::string &dst)
std::uint32_t T_Utf32
UTF-32 to cover the full range of codespace U+0000 ~ U+10FFFF.