1 Star 1 Fork 0

Lee/ConvertUTF

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
test.cpp 2.25 KB
一键复制 编辑 原始数据 按行查看 历史
lipeilei01 提交于 2017-04-14 09:45 . 提交多字节转换到UTF8的代码
#include <iostream>
#include <string.h>
using namespace std;
typedef unsigned char UTF8; /* typically 8 bits */
static const char trailingBytesForUTF8[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};
bool isLegalUTF8(const UTF8 *source, int length) {
UTF8 a;
const UTF8 *srcptr = source+length;
switch (length) {
default: return false;
/* Everything else falls through when "true"... */
case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
switch (*source) {
/* no fall-through in this inner switch */
case 0xE0: if (a < 0xA0) return false; break;
case 0xED: if (a > 0x9F) return false; break;
case 0xF0: if (a < 0x90) return false; break;
case 0xF4: if (a > 0x8F) return false; break;
default: if (a < 0x80) return false;
}
case 1: if (*source >= 0x80 && *source < 0xC2) return false;
}
if (*source > 0xF4) return false;
return true;
}
//以UTF8格式截取字符串
int cutUTF8StringLen(const UTF8* utf8)
{
const UTF8** source = &utf8;
int len = strlen((const char*)utf8);
if(len > 20) //超过多少字符截取
len = 10;
else
return 0;
char *strDest = new char[21];
const UTF8* sourceEnd = utf8 + len;
int lenSave = 0;
while (*source != sourceEnd) {
int length = trailingBytesForUTF8[**source] + 1;
if (length > (sourceEnd - *source) || !isLegalUTF8(*source, length))
break;
*source += length;
lenSave += length;
}
return lenSave;
}
int main()
{
string strName = "qaz@#$李中天789气味人水电费是滴水电费士大夫是";
cout<<strlen(strName.c_str())<<endl;
int lenSave = cutUTF8StringLen((unsigned char*)strName.c_str());
cout<<lenSave<<endl;
strName = strName.substr(0,lenSave);
cout<<strName<<endl;
return 0;
}
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
C++
1
https://gitee.com/LiPeiLei/convertutf.git
git@gitee.com:LiPeiLei/convertutf.git
LiPeiLei
convertutf
ConvertUTF
master

搜索帮助

0d507c66 1850385 C8b1a773 1850385