使用SDK在PDF上生成文本,打开后显示乱码或不显示,造成该问题有以下2种可呢能的原因:
- 可能是字体没有嵌入,导致PDF文件生成后在部分软件上显示错误。可以在生成时将字体通过Font::Embed ()接口嵌入进文档中。
- 显示乱码可能和字体的使用有关,需要检查一下使用字体时Charset是否和当前使用场景相符。
字符集简介:字符集在字体处理、文本编码和文本信息交换方面起着重要作用。例如CharsetChineseBig5与CharsetGB2312它们分别适用于简体中文和繁体中文的编码和处理,为中文文本信息的计算机处理提供了标准化的支持。在选择使用哪个字符集时,需要根据具体的应用场景和需求来决定。
以下是C++判断字符属于哪个CharSet的示例:
WString wstr = L"∅";
const char* str = wstr.UTF8Encode();
uint32_t unicodeCode = static_cast<uint32_t>(wstr[0]);
Font::Charset charSet=(Font::Charset)GetCharsetFromUnicode(unicodeCode);
uint32 GetCharsetFromUnicode(uint32 word) {
//输出unicode码
// std::cout << std::hex << word << endl;
// to avoid CJK Font to show ASCII
if (word < 0x7F)
return ANSI_CHARSET;
int sys_acp = FXSYS_GetACP();
switch (sys_acp) {
case 932:
case 936:
case 950:
case 949:
if ((word >= 0x2E80 && word <= 0x2EFF) || (word >= 0x3000 && word <= 0x303F) ||
(word >= 0x3200 && word <= 0x32FF) || (word >= 0x3300 && word <= 0x33FF) ||
(word >= 0x3400 && word <= 0x4DB5) || (word >= 0x4E00 && word <= 0x9FFF) ||
(word >= 0xF900 && word <= 0xFAFF) || (word >= 0xFE30 && word <= 0xFE4F) ||
(word >= 0x20000 && word <= 0x2A6D6) || (word >= 0x2F800 && word <= 0x2FA1F) ||
(word >= 0xFF00 && word <= 0xFF5E)) {
switch (sys_acp) {
case 932:
return SHIFTJIS_CHARSET;
case 936:
return GB2312_CHARSET;
case 950:
return CHINESEBIG5_CHARSET;
case 949:
return HANGUL_CHARSET;
}
}
break;
}
// find new charset
if (word == 0x20A9)
return HANGUL_CHARSET;
if ((word >= 0x4E00 && word <= 0x9FA5) || (word >= 0xE7C7 && word <= 0xE7F3) || (word >= 0x3000 && word <= 0x303F) ||
(word >= 0x2000 && word <= 0x206F)) {
return GB2312_CHARSET;
}
if (((word >= 0x3040) && (word <= 0x309F)) || ((word >= 0x30A0) && (word <= 0x30FF)) ||
((word >= 0x31F0) && (word <= 0x31FF)) || ((word >= 0xFF5F) && (word <= 0xFFEF))) {
return SHIFTJIS_CHARSET;
}
if (((word >= 0xAC00) && (word <= 0xD7AF)) || ((word >= 0x1100) && (word <= 0x11FF)) ||
((word >= 0x3130) && (word <= 0x318F))) {
return HANGUL_CHARSET;
}
if (word >= 0x0E00 && word <= 0x0E7F)
return THAI_CHARSET;
if ((word >= 0x0370 && word <= 0x03FF) || (word >= 0x1F00 && word <= 0x1FFF))
return GREEK_CHARSET;
if ((word >= 0x0600 && word <= 0x06FF) || (word >= 0xFB50 && word <= 0xFEFC))
return ARABIC_CHARSET;
if (word >= 0x0590 && word <= 0x05FF)
return HEBREW_CHARSET;
if (word >= 0x0400 && word <= 0x04FF)
return RUSSIAN_CHARSET;
if (word == 0x11E || word == 0x11F || word == 0x130 || word == 0x131 || word == 0x15E || word == 0x15F)
return TURKISH_CHARSET;
if (word >= 0x0100 && word <= 0x024F)
return EASTEUROPE_CHARSET;
if (word >= 0x1E00 && word <= 0x1EFF)
return VIETNAMESE_CHARSET;
return DEFAULT_CHARSET;
}