From c8ea0a017d0ea851225f159f26816d86799163a8 Mon Sep 17 00:00:00 2001 From: Sam Hocevar Date: Sun, 29 Sep 2019 12:15:13 +0200 Subject: [PATCH] Unicode: UTF32 support improvements (#2541, #2538, #2815) - Make ImWchar32 unsigned. - Fix Win32 version of ImFileOpen by including windows.h sooner. - Make ImGuiIO::AddInputCharacterUTF16() more robust by disallowing illegal surrogate pairs. - Allow pushing higher plane codepoints through ImGuiIO::AddInputCharacter(). - Minor cleaning up in the high-plane Unicode support. - Fix Clang -Wunreachable-code warning --- imgui.cpp | 57 +++++++++++++++++++++++++++++++------------------- imgui.h | 20 +++++++----------- imgui_draw.cpp | 4 ++-- 3 files changed, 46 insertions(+), 35 deletions(-) diff --git a/imgui.cpp b/imgui.cpp index 9e7ac21c5..8b1bda5c9 100644 --- a/imgui.cpp +++ b/imgui.cpp @@ -1094,30 +1094,33 @@ ImGuiIO::ImGuiIO() // - on Windows you can get those using ToAscii+keyboard state, or via the WM_CHAR message void ImGuiIO::AddInputCharacter(unsigned int c) { - if (c > 0 && c <= IM_UNICODE_CODEPOINT_MAX) - InputQueueCharacters.push_back((ImWchar)c); + InputQueueCharacters.push_back(c > 0 && c <= IM_UNICODE_CODEPOINT_MAX ? (ImWchar)c : IM_UNICODE_CODEPOINT_INVALID); } -// UTF16 string use Surrogate to encode unicode > 0x10000, so we should save the Surrogate. +// UTF16 strings use surrogate pairs to encode codepoints >= 0x10000, so +// we should save the high surrogate. void ImGuiIO::AddInputCharacterUTF16(ImWchar16 c) { - if (c >= 0xD800 && c <= 0xDBFF) + if ((c & 0xFC00) == 0xD800) // High surrogate, must save { - Surrogate = c; + if (InputQueueSurrogate != 0) + InputQueueCharacters.push_back(0xFFFD); + InputQueueSurrogate = c; + return; } - else + + ImWchar cp = c; + if (InputQueueSurrogate != 0) { - ImWchar cp = c; - if (c >= 0xDC00 && c <= 0xDFFF) - { - if (sizeof(ImWchar) == 2) - cp = IM_UNICODE_CODEPOINT_INVALID; - else - cp = ((ImWchar)(Surrogate - 0xD800) << 10) + (c - 0xDC00) + 0x10000; - Surrogate = 0; - } - InputQueueCharacters.push_back(cp); + if ((c & 0xFC00) != 0xDC00) // Invalid low surrogate + InputQueueCharacters.push_back(IM_UNICODE_CODEPOINT_INVALID); + else if (IM_UNICODE_CODEPOINT_MAX == (0xFFFF)) // Codepoint will not fit in ImWchar (extra parenthesis around 0xFFFF somehow fixes -Wunreachable-code with Clang) + cp = IM_UNICODE_CODEPOINT_INVALID; + else + cp = (ImWchar)(((InputQueueSurrogate - 0xD800) << 10) + (c - 0xDC00) + 0x10000); + InputQueueSurrogate = 0; } + InputQueueCharacters.push_back(cp); } void ImGuiIO::AddInputCharactersUTF8(const char* utf8_chars) @@ -1506,6 +1509,18 @@ ImU32 ImHashStr(const char* data_p, size_t data_size, ImU32 seed) // Default file functions #ifndef IMGUI_DISABLE_DEFAULT_FILE_FUNCTIONS + +#if defined(_WIN32) && !defined(IMGUI_DISABLE_WIN32_FUNCTIONS) && !defined(__CYGWIN__) && !defined(__GNUC__) +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#ifndef __MINGW32__ +#include +#else +#include +#endif +#endif + ImFileHandle ImFileOpen(const char* filename, const char* mode) { #if defined(_WIN32) && !defined(IMGUI_DISABLE_WIN32_FUNCTIONS) && !defined(__CYGWIN__) && !defined(__GNUC__) @@ -1514,9 +1529,9 @@ ImFileHandle ImFileOpen(const char* filename, const char* mode) const int mode_wsize = ::MultiByteToWideChar(CP_UTF8, 0, mode, -1, NULL, 0); ImVector buf; buf.resize(filename_wsize + mode_wsize); - ::MultiByteToWideChar(CP_UTF8, 0, filename, -1, &buf[0], filename_wsize); - ::MultiByteToWideChar(CP_UTF8, 0, mode, -1,&buf[filename_wsize], mode_wsize); - return _wfopen(&buf[0], &buf[filename_wsize]); + ::MultiByteToWideChar(CP_UTF8, 0, filename, -1, (wchar_t*)&buf[0], filename_wsize); + ::MultiByteToWideChar(CP_UTF8, 0, mode, -1, (wchar_t*)&buf[filename_wsize], mode_wsize); + return _wfopen((const wchar_t*)&buf[0], (const wchar_t*)&buf[filename_wsize]); #else return fopen(filename, mode); #endif @@ -1628,8 +1643,8 @@ int ImTextCharFromUtf8(unsigned int* out_char, const char* in_text, const char* c += (*str++ & 0x3f); // utf-8 encodings of values used in surrogate pairs are invalid if ((c & 0xFFFFF800) == 0xD800) return 4; - // If ImWchar is 16bit, use replacement character U+FFFD instead - if (sizeof(ImWchar) == 2 && c >= 0x10000) c = IM_UNICODE_CODEPOINT_INVALID; + // If codepoint does not fit in ImWchar, use replacement character U+FFFD instead + if (c > IM_UNICODE_CODEPOINT_MAX) c = IM_UNICODE_CODEPOINT_INVALID; *out_char = c; return 4; } diff --git a/imgui.h b/imgui.h index 09160d117..cbed10101 100644 --- a/imgui.h +++ b/imgui.h @@ -92,7 +92,7 @@ Index of this file: #else #define IM_OFFSETOF(_TYPE,_MEMBER) ((size_t)&(((_TYPE*)0)->_MEMBER)) // Offset of _MEMBER within _TYPE. Old style macro. #endif -#define IM_UNICODE_CODEPOINT_MAX 0xFFFF // Last Unicode code point supported by this build. +#define IM_UNICODE_CODEPOINT_MAX (sizeof(ImWchar) == 2 ? 0xFFFF : 0x10FFFF) // Last Unicode code point supported by this build. #define IM_UNICODE_CODEPOINT_INVALID 0xFFFD // Standard invalid Unicode code point. // Warnings @@ -147,7 +147,7 @@ typedef unsigned int ImGuiID; // Unique ID used by widgets (typically hash #define ImWchar ImWchar16 #endif typedef unsigned short ImWchar16; // A single U16 character for keyboard input/display. We encode them as multi bytes UTF-8 when used in strings. -typedef int ImWchar32; // A single 32bit character for keyboard input/display, define ImWchar to ImWchar32 to use it. See imconfig.h . +typedef unsigned int ImWchar32; // A single U32 character for keyboard input/display. Define ImWchar to ImWchar32 to use it. See imconfig.h . typedef int ImGuiCol; // -> enum ImGuiCol_ // Enum: A color identifier for styling typedef int ImGuiCond; // -> enum ImGuiCond_ // Enum: A condition for many Set*() functions typedef int ImGuiDataType; // -> enum ImGuiDataType_ // Enum: A primary data type @@ -1512,7 +1512,7 @@ struct ImGuiIO float KeysDownDurationPrev[512]; // Previous duration the key has been down float NavInputsDownDuration[ImGuiNavInput_COUNT]; float NavInputsDownDurationPrev[ImGuiNavInput_COUNT]; - ImWchar16 Surrogate; // For AddInputCharacterUTF16 + ImWchar16 InputQueueSurrogate; // For AddInputCharacterUTF16 ImVector InputQueueCharacters; // Queue of _characters_ input (obtained by platform back-end). Fill using AddInputCharacter() helper. IMGUI_API ImGuiIO(); @@ -2097,15 +2097,11 @@ struct ImFontGlyphRangesBuilder { ImVector UsedChars; // Store 1-bit per Unicode code point (0=unused, 1=used) - ImFontGlyphRangesBuilder() { Clear(); } - inline void Clear() - { - int MaxUnicode = sizeof(ImWchar) == 2 ? 0x10000 : 0x110000; - UsedChars.resize(MaxUnicode / sizeof(int)); memset(UsedChars.Data, 0, MaxUnicode / sizeof(int)); - } - inline bool GetBit(int n) const { int off = (n >> 5); ImU32 mask = 1u << (n & 31); return (UsedChars[off] & mask) != 0; } // Get bit n in the array - inline void SetBit(int n) { int off = (n >> 5); ImU32 mask = 1u << (n & 31); UsedChars[off] |= mask; } // Set bit n in the array - inline void AddChar(ImWchar c) { SetBit(c); } // Add character + ImFontGlyphRangesBuilder() { Clear(); } + inline void Clear() { int size_in_bytes = (IM_UNICODE_CODEPOINT_MAX + 1) / 8; UsedChars.resize(size_in_bytes / (int)sizeof(ImU32)); memset(UsedChars.Data, 0, (size_t)size_in_bytes); } + inline bool GetBit(size_t n) const { int off = (int)(n >> 5); ImU32 mask = 1u << (n & 31); return (UsedChars[off] & mask) != 0; } // Get bit n in the array + inline void SetBit(size_t n) { int off = (int)(n >> 5); ImU32 mask = 1u << (n & 31); UsedChars[off] |= mask; } // Set bit n in the array + inline void AddChar(ImWchar c) { SetBit(c); } // Add character IMGUI_API void AddText(const char* text, const char* text_end = NULL); // Add string (each character of the UTF-8 string are added) IMGUI_API void AddRanges(const ImWchar* ranges); // Add ranges, e.g. builder.AddRanges(ImFontAtlas::GetGlyphRangesDefault()) to force add all of ASCII/Latin+Ext IMGUI_API void BuildRanges(ImVector* out_ranges); // Output new ranges diff --git a/imgui_draw.cpp b/imgui_draw.cpp index 28f77281b..ae6b9803f 100644 --- a/imgui_draw.cpp +++ b/imgui_draw.cpp @@ -2724,7 +2724,7 @@ void ImFont::AddRemapChar(ImWchar dst, ImWchar src, bool overwrite_dst) const ImFontGlyph* ImFont::FindGlyph(ImWchar c) const { - if (c >= IndexLookup.Size) + if (c >= (size_t)IndexLookup.Size) return FallbackGlyph; const ImWchar i = IndexLookup.Data[c]; if (i == (ImWchar)-1) @@ -2734,7 +2734,7 @@ const ImFontGlyph* ImFont::FindGlyph(ImWchar c) const const ImFontGlyph* ImFont::FindGlyphNoFallback(ImWchar c) const { - if (c >= IndexLookup.Size) + if (c >= (size_t)IndexLookup.Size) return NULL; const ImWchar i = IndexLookup.Data[c]; if (i == (ImWchar)-1)