bux API Reference 1.9.0
Static library of whatever are seen required in general purpose but not directly supported from Modern C++. Or whatever reusable originated from my side projects.
Loading...
Searching...
No Matches
UnicodeCvt.cpp
Go to the documentation of this file.
1#include "UnicodeCvt.h"
2#include "XException.h" // RUNTIME_ERROR()
3#include <cstring> // memcmp()
4#include <format> // std::format()
5#include <istream> // std::istream
6#include <memory> // std::make_unique<>()
7
8#ifdef _WIN32
9#pragma comment(lib, "Advapi32.lib") // IsTextUnicode()
10#include <cstdlib> // swab()
11#include <windows.h> // Win32 API
12#elif defined(__unix__)
13#include <unistd.h> // swab()
14#include <errno.h> // errno
15#endif
16
17namespace {
18
19//
20// In-Module Types
21//
22class FC_ReadMem
23{
24public:
25
26 // Nonvirtuals
27 FC_ReadMem(std::string_view sv) noexcept: m_Src(sv.data()), m_End(sv.data()+sv.size()) {}
28 std::optional<char> operator()() noexcept
29 {
30 if (m_Src < m_End)
31 return *m_Src++;
32
33 return {};
34 }
35
36private:
37
38 // Data
39 const char *m_Src, *const m_End;
40};
41
42//
43// In-Module Constants
44//
45#ifdef _WIN32
46enum
47{
48 CHSETS_UTF8 = CP_UTF8
49};
50#elif defined(__unix__)
51const char *const CHSETS_UTF8[] ={"UTF-8", "UTF8", 0};
52#endif
53
54//
55// In-Module Functions
56//
57#ifdef __unix__
58bool testUtf16(const char *src_, size_t bytes)
59{
60 if (bytes &1)
61 // Odd number of bytes
62 return false;
63
64 size_t wordZeros =0, hiByteZeros =0;
65 for (size_t i =0; i < bytes; i +=2)
66 {
67 const char *t =src_ +i;
68 if (!*reinterpret_cast<const int16_t*>(t))
69 ++wordZeros;
70 else if (!t[1])
71 ++hiByteZeros;
72 }
73 if (!hiByteZeros || wordZeros)
74 // Felt unsecure if there is any null word or there is no ascii char.
75 return false;
76
77 static const char *const CHSETS_UTF16[] ={"UCS-2", "UTF-16", "USC2", "UTF16", 0};
78 iconv_t cd =(iconv_t)(-1);
79 for (const char *const *i =CHSETS_UTF16; *i && cd == (iconv_t)(-1); ++i)
80 cd =iconv_open("UCS-4", *i);
81
82 if (cd == (iconv_t)(-1))
83 // Fail to initialize the corresponding iconv_t descriptor
84 return false;
85
86 const auto ucs4 = std::make_unique<T_Utf32[]>(bytes);
87 size_t size_ucs4 =bytes*4;
88 char *src =const_cast<char*>(src_);
89 char *dst =reinterpret_cast<char*>(ucs4.get());
90 bool ret = size_t(-1) != iconv(cd, &src, &bytes, &dst, &size_ucs4) || // conversion ok
91 errno != EILSEQ; // invalid multibyte sequence
92 iconv_close(cd);
93 return ret;
94}
95#endif
96
97int u32toutf8(T_Utf32 c, T_Utf8 *dst) noexcept
98{
99 int ret;
100 if (c < 0x80)
101 // 1 byte
102 {
103 ret =1;
104 *dst = T_Utf8(c);
105 }
106 else if (c < 0x800)
107 // 2 bytes
108 {
109 ret =2;
110 goto Encode;
111 }
112 else if (c < 0x10000)
113 // 3 bytes
114 {
115 ret =3;
116 goto Encode;
117 }
118 else if (c < 0x200000)
119 // 4 bytes
120 {
121 ret =4;
122 goto Encode;
123 }
124 else if (c < 0x4000000)
125 // 5 bytes
126 {
127 ret =5;
128 goto Encode;
129 }
130 else if (c < 0x80000000)
131 // 6 bytes
132 {
133 ret =6;
134 goto Encode;
135 }
136 else
137 ret =-1;
138
139 return ret;
140Encode:
141 for (int i =ret; --i > 0;)
142 {
143 dst[i] = T_Utf8((c &0x3F) | 0x80);
144 c >>=6;
145 }
146 dst[0] = T_Utf8(c |((const unsigned char*)"\xC0\xE0\xF0\xF8\xFC")[ret-2]);
147 return ret;
148}
149
150} // namespace
151
152namespace bux {
153
154//
155// Function Defitions
156//
157std::string_view to_utf8(T_Utf32 uc)
158{
159 static thread_local T_Utf8 buf[MAX_UTF8];
160 const auto bytes = u32toutf8(uc, buf);
161 if (bytes <= 0)
162 RUNTIME_ERROR("u32toutf8(u+{:x}) returns {}", uc, bytes);
163
164 return {reinterpret_cast<char*>(buf), size_t(bytes)};
165}
166
167std::string to_utf8(std::string_view s, T_Encoding codepage)
168{
169 return C_MBCStr{s, codepage}.strU8();
170}
171
172std::string to_utf8(std::istream &in, T_Encoding codepage)
188{
189 C_UnicodeIn cvt{in, codepage};
190 T_Utf8 u8[MAX_UTF8];
191 int n;
192 std::string ret;
193 while ((n = cvt.get(u8)) > 0)
194 ret.append(reinterpret_cast<char*>(u8), size_t(n));
195
196 if (n < 0)
197 RUNTIME_ERROR("UTF-8 conversion error {}", n);
198
199 return ret;
200}
201
202std::wstring BOM(const std::wstring &ws)
203{
204 return L'\xFEFF'+ws;
205}
206
207//
208// Class Implementations
209//
210C_UnicodeIn::C_UnicodeIn(FH_ReadChar &&readc, T_Encoding codepage):
211 m_Src(std::move(readc)),
212 m_CodePage(codepage)
213{
214 init();
215}
216
217C_UnicodeIn::C_UnicodeIn(std::string_view sv, T_Encoding codepage):
218 C_UnicodeIn(FC_ReadMem(sv), codepage)
219{
220}
221
222C_UnicodeIn::C_UnicodeIn(std::istream &in, T_Encoding codepage):
223 m_Src([&]()->std::optional<char> {
224 char ch;
225 if (static_cast<bool>(in.get(ch)))
226 return ch;
227 return {};
228 }),
229 m_CodePage(codepage)
246{
247 init();
248}
249
251{
252#ifdef __unix__
253 reset_iconv();
254#endif
255}
256
258{
259 if (m_GetQ.empty())
260 {
261 if (m_ErrCode < 0)
262 // Error code persists
263 return m_ErrCode;
264
265 if (m_ReadMethod)
266 (this->*m_ReadMethod)();
267
268 if (lastError() <= 0)
269 // Error happens
270 return m_ErrCode;
271 }
272 c = m_GetQ.front();
273 m_GetQ.pop();
274 return 1;
275}
276
278{
279 T_Utf32 c;
280 int ret = get(c);
281 if (ret > 0)
282 {
283 if (c < 0x10000)
284 // Encode into a single word
285 *dst =T_Utf16(c);
286 else
287 // Encode into two words
288 {
289 ret =2;
290 c -=0x10000;
291 dst[0] =T_Utf16((c >>10) |0xD800);
292 dst[1] =T_Utf16((c &0x3FF) |0xDC00);
293 }
294 }
295 return ret;
296}
297
299{
300 T_Utf32 c;
301 int ret = get(c);
302 if (ret > 0)
303 ret = u32toutf8(c, dst);
304
305 return ret;
306}
307
308void C_UnicodeIn::ingestMBCS()
311{
312 if (auto size = m_Src.size())
313 {
314#ifdef _WIN32
315 const auto utf16 = std::make_unique<wchar_t[]>(size);
316 if (int wn = MultiByteToWideChar(m_CodePage, MB_ERR_INVALID_CHARS, m_Src.buffer(), int(size), utf16.get(), int(size)))
317 {
318 FC_ReadMem read({reinterpret_cast<char*>(utf16.get()), size_t(wn*2)});
319 C_Source src(std::move(read));
320 while (readUTF16(src, false));
321 m_Src.pop(size);
322 }
323 else
324 m_ErrCode = UIE_NO_UNICODE_TRANSLATION;
325#elif defined(__unix__)
326 for (T_Encoding i =m_CodePage; *i && m_iconv == (iconv_t)(-1); ++i)
327 m_iconv =iconv_open("UCS-4LE", *i);
328
329 if (m_iconv == (iconv_t)(-1))
330 // Fail to initialize the corresponding iconv_t descriptor
331 {
332 m_ErrCode = UIE_NO_UNICODE_TRANSLATION;
333 return;
334 }
335 const auto ucs4 = std::make_unique<T_Utf32[]>(size);
336 size_t size_ucs4 =size*4;
337 auto src =const_cast<char*>(m_Src.buffer());
338 auto dst =reinterpret_cast<char*>(ucs4.get());
339 if (size_t(-1) != iconv(m_iconv, &src, &size, &dst, &size_ucs4))
340 // Fully converted
341 {
342 for (const T_Utf32 *i = ucs4.get(); i < reinterpret_cast<T_Utf32*>(dst); m_GetQ.push(le32toh(*i++)));
343 m_Src.pop(m_Src.size());
344 }
345 else switch (errno)
346 {
347 case EILSEQ: // invalid multibyte sequence
348 m_ErrCode = UIE_NO_UNICODE_TRANSLATION;
349 break;
350 case EINVAL: // incomplete multibyte sequence
351 for (const T_Utf32 *i =ucs4.get(); i < reinterpret_cast<T_Utf32*>(dst); m_GetQ.push(le32toh(*i++)));
352 m_Src.pop(m_Src.size()-size);
353 break;
354 case E2BIG: // output buffer overflow, which is impossible.
355 default:
356 m_ErrCode = UIE_INTERNAL;
357 }
358#endif
359 }
360}
361
362void C_UnicodeIn::init()
363{
364 m_Src.read(2);
365 switch (m_Src.size())
366 {
367 case 2:
368 switch (m_Src.getUtf16(0))
369 {
370 case 0xFEFF: // UTF-16 with BOM
371 m_Src.pop(2);
372 m_ReadMethod = &C_UnicodeIn::readUTF16;
373 return;
374 case 0xFFFE: // Reverse UTF-16 with BOM
375 m_Src.pop(2);
376 m_ReadMethod = &C_UnicodeIn::readReverseUTF16;
377 return;
378 default:
379 m_Src.read(3);
380 if (m_Src.size() >= 3 && 0 == memcmp(m_Src.buffer(), "\xef\xbb\xbf", 3))
381 // UTF-8 with BOM
382 {
383 m_Src.pop(3);
384 setCodePage(CHSETS_UTF8);
385 m_ReadMethod = &C_UnicodeIn::readCodePage;
386 return;
387 }
388
389 // Infer the encoding of the 1000-byte header chunk (UTF-8, ACP, or something else ?)
390 if (!m_CodePage)
391 {
392 m_Src.read(1000);
393 const auto size = m_Src.size();
394#ifdef _WIN32
395 int mask = IS_TEXT_UNICODE_UNICODE_MASK;
396 if (IsTextUnicode(m_Src.buffer(), int(size), &mask))
397#elif defined(__unix__)
398 if (testUtf16(m_Src.buffer(), size))
399#endif
400 {
401 m_ReadMethod = &C_UnicodeIn::readUTF16;
402 return;
403 }
404 const auto revBuf = std::make_unique<char[]>(size);
405 swab(const_cast<char*>(m_Src.buffer()), revBuf.get(), int(size));
406#ifdef _WIN32
407 mask = IS_TEXT_UNICODE_UNICODE_MASK;
408 if (IsTextUnicode(revBuf.get(), int(size), &mask))
409#elif defined(__unix__)
410 if (testUtf16(revBuf.get(), size))
411#endif
412 {
413 m_ReadMethod = &C_UnicodeIn::readReverseUTF16;
414 return;
415 }
416 }
417
418 // Read heading asciis off then we can distinguish CP_UTF8 from CP_ACP.
419 m_ReadMethod = &C_UnicodeIn::readASCII;
420 }
421 break;
422 case 1: // has to be ascii
423 if (*m_Src.buffer() &0x80)
424 m_ErrCode = UIE_INCOMPLETE_UNICODE;
425 else
426 m_GetQ.push(T_Utf8(*m_Src.buffer()));
427 break;
428 case 0: // empty string
429 break;
430 default:
431 m_ErrCode = UIE_INTERNAL;
432 }
433}
434
435void C_UnicodeIn::readASCII()
436{
437 m_Src.read(1);
438 if (m_Src.size())
439 {
440 const auto c = T_Utf8(*m_Src.buffer());
441 if (!(c &0x80))
442 // Still an ASCII
443 {
444 m_GetQ.push(c);
445 return m_Src.pop(1);
446 }
447 }
448
449 // Is the rest CP_UTF8 or CP_ACP ?
450 m_Src.readTillCtrl();
451 if (m_CodePage)
452 {
453 ingestMBCS();
454 if (m_ErrCode != UIE_NO_UNICODE_TRANSLATION)
455 // Should be m_CodePage
456 {
457 m_ReadMethod = &C_UnicodeIn::readCodePage;
458 return;
459 }
460 }
461
462#ifdef __unix__
463 static const char *const CHSETS_SJIS[] = {"CP932", "EUC-JP", "SHIFT_JIS", "SHIFT-JIS", "SJIS", 0};
464 static const char *const CHSETS_GB[] = {"CP936", "EUC-CN", "GB18030", "GBK", 0};
465 static const char *const CHSETS_KSC[] = {"CP949", "EUC-KR", "JOHAB", 0};
466 static const char *const CHSETS_BIG5[] = {"CP950", "EUC-TW", "BIG5-HKSCS", "BIG5HKSCS", "BIG-5", "BIG5", 0};
467 static const char *const CHSETS_UTF7[] = {"UTF-7", "UTF7", 0};
468 static const char *const CHSETS_UTF16LE[] = {"UCS-2LE", "UTF-16LE", "USC2LE", "UTF16LE", 0};
469 static const char *const CHSETS_UTF16BE[] = {"UCS-2BE", "UTF-16BE", "USC2BE", "UTF16BE", 0};
470#endif
471 static const T_Encoding MBCS_CODEPAGES[] ={
472#ifdef _WIN32
473 CP_ACP, CP_UTF8,
474 932, 936, 949, 950, 951, // from https://en.wikipedia.org/wiki/Windows_code_page#East_Asian_multi-byte_code_pages
475 CP_UTF7
476#elif defined(__unix__)
477 CHSETS_UTF8, CHSETS_SJIS, CHSETS_GB, CHSETS_KSC, CHSETS_BIG5, CHSETS_UTF7, CHSETS_UTF16LE, CHSETS_UTF16BE
478#endif
479 };
480 for (size_t i = 0; i < std::size(MBCS_CODEPAGES); ++i)
481 if (testCodePage(MBCS_CODEPAGES[i]))
482 {
483 m_ReadMethod = &C_UnicodeIn::readCodePage;
484 return;
485 }
486 // Or else, an error occurred.
487}
488
489bool C_UnicodeIn::readUTF16(C_Source &src, bool reverseWord)
490{
491 bool ret = false;
492 src.read(2);
493 const size_t read = src.size();
494 if (read >= 2)
495 {
496 const T_Utf16 uc = src.getUtf16(0,reverseWord);
497 if (0xD800 <= uc && uc < 0xDC00)
498 // Hi word of 2-word encoding
499 {
500 src.read(4);
501 if (src.size() >= 4)
502 {
503 const T_Utf16 uc2 = src.getUtf16(1,reverseWord);
504 if (0xDC00 <= uc2 && uc2 < 0xE000)
505 // Low word of 2-word encoding
506 {
507 src.pop(4);
508 m_GetQ.push(T_Utf32((((uc&0x3FF)<<10)|(uc2&0x3FF))+0x10000));
509 ret =true;
510 }
511 else
512 // Anything lese is ill-formed
513 m_ErrCode = UIE_ILLFORMED_UNICODE;
514 }
515 else
516 m_ErrCode = UIE_INCOMPLETE_UNICODE;
517 }
518 else if (0xDC00 <= uc && uc < 0xE000)
519 // Low word of 2-word encoding - ill-fomed
520 m_ErrCode = UIE_ILLFORMED_UNICODE;
521 else
522 {
523 src.pop(2);
524 m_GetQ.push(uc);
525 ret = true;
526 }
527 }
528 else if (read > 0)
529 m_ErrCode = UIE_INCOMPLETE_UNICODE;
530
531 return ret;
532}
533
534void C_UnicodeIn::readUTF16()
535{
536 readUTF16(m_Src, false);
537}
538
539void C_UnicodeIn::readReverseUTF16()
540{
541 readUTF16(m_Src, true);
542}
543
544void C_UnicodeIn::readCodePage()
545{
546 m_Src.readTillCtrl();
547 ingestMBCS();
548}
549
550void C_UnicodeIn::setCodePage(T_Encoding cp)
551{
552 m_CodePage = cp;
553#ifdef __unix__
554 reset_iconv();
555#endif
556}
557
558bool C_UnicodeIn::testCodePage(T_Encoding cp)
559{
560 m_ErrCode = UIE_EOF; // reset error code
561 setCodePage(cp);
562 ingestMBCS();
563 return m_ErrCode != UIE_NO_UNICODE_TRANSLATION;
564}
565
566#ifdef __unix__
567void C_UnicodeIn::reset_iconv()
568{
569 if (m_iconv != (iconv_t)(-1))
570 {
571 iconv_close(m_iconv);
572 m_iconv = (iconv_t)(-1);
573 }
574}
575#endif
576
577C_UnicodeIn::C_Source::C_Source(FH_ReadChar &&readc) noexcept:
578 m_ReadCh(std::move(readc)),
579 m_AvailBeg(0)
580{
581}
582
583const char *C_UnicodeIn::C_Source::buffer() const noexcept
584{
585 return m_ReadBuf.data() + m_AvailBeg;
586}
587
588T_Utf16 C_UnicodeIn::C_Source::getUtf16(size_t pos, bool reverseWord) const
589{
590 const size_t off = m_AvailBeg +pos *2;
591 if (off +2 > m_ReadBuf.size())
592 RUNTIME_ERROR("End of char {} passes end of buffer", off+2);
593
594 const auto p = m_ReadBuf.data() + off;
595 if (reverseWord)
596 {
597 union
598 {
599 char a[2];
600 T_Utf16 ret;
601 };
602 a[0] = p[1];
603 a[1] = p[0];
604 return ret;
605 }
606 return *(const T_Utf16*)p;
607}
608
609void C_UnicodeIn::C_Source::pop(size_t bytes)
610{
611 m_AvailBeg += bytes;
612 if (m_AvailBeg > m_ReadBuf.size())
613 {
614 m_AvailBeg -= bytes; // rollback
615 RUNTIME_ERROR("m_AvailBeg overflow");
616 }
617}
618
619void C_UnicodeIn::C_Source::read(size_t bytes)
620{
621 if (m_AvailBeg + bytes > m_ReadBuf.size())
622 {
623 if (m_AvailBeg)
624 {
625 m_ReadBuf.erase(0, m_AvailBeg);
626 m_AvailBeg = 0;
627 }
628 bytes -= m_ReadBuf.size();
629 for (size_t i = 0; i < bytes; ++i)
630 if (auto c = m_ReadCh())
631 m_ReadBuf += *c;
632 else
633 break;
634 }
635}
636
637void C_UnicodeIn::C_Source::readTillCtrl()
638{
639 if (m_AvailBeg == m_ReadBuf.size())
640 {
641 m_ReadBuf.clear();
642 m_AvailBeg = 0;
643 }
644
645 while (auto c = m_ReadCh())
646 {
647 m_ReadBuf += *c;
648 if (0 == (*c &0xE0))
649 // Control char
650 break;
651 }
652}
653
654size_t C_UnicodeIn::C_Source::size() const noexcept
655{
656 return m_ReadBuf.size() - m_AvailBeg;
657}
658
659C_MBCStr::C_MBCStr(C_MBCStr &&other) noexcept:
660 m_u32s (std::move(other.m_u32s)),
661 m_str (std::move(other.m_str)),
662 m_pushCh (other.m_pushCh),
663 m_codepage (other.m_codepage)
664{
665}
666
667void C_MBCStr::operator=(C_MBCStr &&other) noexcept
668{
669 m_u32s = std::move(other.m_u32s);
670 m_str = std::move(other.m_str);
671 m_pushCh = other.m_pushCh;
672 m_codepage = other.m_codepage;
673}
674
675void C_MBCStr::operator +=(std::string_view s)
676{
677 if (empty())
678 {
679 m_str = s;
680 m_pushCh = {};
681 }
682 else if (!m_pushCh)
683 m_str += s;
684 else
685 appendNonRaw(s.data(), s.size());
686}
687
688void C_MBCStr::append(const char *src, size_t srcBytes)
689{
690 if (!srcBytes)
691 srcBytes = strlen(src);
692
693 if (empty())
694 {
695 m_str.assign(src, srcBytes);
696 m_pushCh = {};
697 }
698 else if (!m_pushCh)
699 m_str.append(src, srcBytes);
700 else
701 appendNonRaw(src, srcBytes);
702}
703
704void C_MBCStr::appendNonRaw(const char *src, size_t srcBytes) const
705{
706 C_UnicodeIn uin(std::string_view{src, srcBytes}, m_codepage);
707 T_Utf32 t;
708 while (uin.get(t))
709 {
710 m_u32s.push_back(t);
711 appendStr(t);
712 }
713}
714
715void C_MBCStr::appendStr(T_Utf32 u32) const
716{
717 if (u32 >= 0x100)
718 // Multi-byte
719 {
720 T_Utf8 buf[MAX_UTF8];
721 const auto rc = u32toutf8(u32, buf);
722 if (rc < 0)
723 RUNTIME_ERROR("MBC string conversion error {} after {} bytes converted", rc, m_str.size());
724
725 m_str.append(reinterpret_cast<char*>(buf), size_t(rc));
726 }
727 else
728 m_pushCh(m_str, static_cast<char>(u32));
729}
730
731bool C_MBCStr::empty() const noexcept
732{
733 return m_str.empty() && m_u32s.empty();
734}
735
736const std::string &C_MBCStr::escape(F_PushCh pushCh) const
737{
738 if (!m_pushCh && !m_str.empty())
739 {
740 C_UnicodeIn uin(m_str, m_codepage);
741 T_Utf32 t;
742 while (uin.get(t))
743 m_u32s.push_back(t);
744 }
745 if (m_pushCh != pushCh)
746 {
747 m_pushCh = pushCh;
748 m_str.clear();
749 for (auto i: m_u32s)
750 appendStr(i);
751 }
752 return m_str;
753}
754
755const std::string &C_MBCStr::escJSON() const
756{
757 return escape([](std::string &dst, char c) {
758 /* From http://stackoverflow.com/questions/19176024/how-to-escape-special-characters-in-building-a-json-string
759 \b Backspace (ascii code 08)
760 \f Form feed (ascii code 0C)
761 \n New line
762 \r Carriage return
763 \t Tab
764 \" Double quote
765 \\ Backslash character
766 */
767 static constexpr char JSON_ESCS[] = "\b\f\n\r\t\"\\";
768 if (auto p = strchr(JSON_ESCS, c))
769 {
770 dst += '\\';
771 dst += "bfnrt\"\\"[p - JSON_ESCS];
772 }
773 else if (isprint(c))
774 dst += c;
775 else
776 dst += std::format("\\u{:04}", int{static_cast<unsigned char>(c)});
777 });
778}
779
780const std::string &C_MBCStr::strU8() const
781{
782 return escape([](std::string &dst, char c) { dst += c; });
783}
784
785} // namespace bux
#define RUNTIME_ERROR(fmtStr,...)
Wrap FILE(DATE)#__LINE__ FUNCTION: msg into std::runtime_error.
Definition XException.h:32
void append(const char *src, size_t srcBytes)
const std::string & escape(F_PushCh pushCh) const
const std::string & escJSON() const
C_MBCStr & operator=(const C_MBCStr &)=delete
void operator+=(std::string_view s)
bool empty() const noexcept
const std::string & strU8() const
C_MBCStr(T_Encoding codepage=0) noexcept
Definition UnicodeCvt.h:128
T & front() const
Definition XQue.h:79
void pop()
Definition XQue.h:238
T & push()
Definition XQue.h:81
bool empty() const
Definition XQue.h:28
int get(T_Utf32 &c)
~C_UnicodeIn() noexcept
C_UnicodeIn(FH_ReadChar &&readc, T_Encoding codepage=0)
int lastError() const noexcept
Definition UnicodeCvt.h:67
THE common namespace of bux library.
Definition AtomiX.cpp:3
std::string_view to_utf8(T_Utf32 uc)
std::wstring BOM(const std::wstring &ws)
std::function< std::optional< char >()> FH_ReadChar
Definition UnicodeCvt.h:43
void read(const std::string &src, size_t &off, T &data) noexcept
Definition Serialize.h:35
std::uint16_t T_Utf16
UTF-16: You need T_Utf16[2] to hold full range of unicode.
Definition UnicodeCvt.h:40
std::uint8_t T_Utf8
UTF-8: You need T_Utf8[4] to hold full range of unicode.
Definition UnicodeCvt.h:41
@ MAX_UTF8
Definition UnicodeCvt.h:24
@ UIE_ILLFORMED_UNICODE
Definition UnicodeCvt.h:30
@ UIE_EOF
Definition UnicodeCvt.h:29
@ UIE_INCOMPLETE_UNICODE
Definition UnicodeCvt.h:31
@ UIE_INTERNAL
Definition UnicodeCvt.h:33
@ UIE_NO_UNICODE_TRANSLATION
Definition UnicodeCvt.h:32
std::uint32_t T_Utf32
UTF-32 to cover the full range of codespace U+0000 ~ U+10FFFF.
Definition UnicodeCvt.h:39