#ifndef _CODING_H_
#define _CODING_H_
#include "base.h"
#include "exb.h"
//gb编码转unicode 返回的字符串需要释放内存
char *gbToUn(char *text)
{
int len;
return (char*)c2u(text, NULL, &len);
}
//unicode编码转gb 返回的字符串需要释放内存
char *unToGb(char *text)
{
char *input= text;
int input_len= wstrlen(input);
char *output= malloc(input_len+2);
printf("input_len %d\n",input_len);
int output_len;
u2c( input, input_len, &output, &output_len);
return output;
}
//判断utf编码,0为成功,-1失败
int IsUTF8( void* pBuffer, long size)
{
int IsUTF8 = 0;
unsigned char* start = (unsigned char*)pBuffer;
unsigned char* end = (unsigned char*)pBuffer + size;
while ((int)start < (int)end)
{
if (*start < 0x80) // (10000000): 值小于0x80的为ASCII字符
{
start++;
}
else if (*start < (0xC0)) // (11000000): 值介于0x80与0xC0之间的为无效UTF-8字符
{
IsUTF8 = -1;
break;
}
else if (*start < (0xE0)) // (11100000): 此范围内为2字节UTF-8字符
{
if ((int)start >= (int)end - 1)
break;
if ((start[1] & (0xC0)) != 0x80)
{
IsUTF8 = -1;
break;
}
start += 2;
}
else if (*start < (0xF0)) // (11110000): 此范围内为3字节UTF-8字符
{
if ((int)start >= (int)end - 2)
break;
if ((start[1] & (0xC0)) != 0x80 || (start[2] & (0xC0)) != 0x80)
{
IsUTF8 = -1;
break;
}
start += 3;
}
else
{
IsUTF8 = -1;
break;
}
}
return IsUTF8;
}
//复制unicode编码字符串,返回的字符串需要手动free
char *un_copy(char *text)
{
int len= wstrlen(text)+2;
char *temp= malloc(len);
memcpy(temp,text,len);
return temp;
}
//un编码数字转换为int
int un_atoi(char *text)
{
char *temp=unToGb(text);
int num=atoi(temp);
free(temp);
return num;
}
// unicode字符串比较
int wstrcmp(char *str1, char *str2)
{
while ((str1[0] || str1[1]) && str1[0] == str2[0] && str1[1] == str2[1])
{
str1 += 2;
str2 += 2;
}
return str1[0] == str2[0] ? str1[1] - str2[1] : str1[0] - str2[0];
}
// unicode字符串复制
char *wstrcpy(char *dst, char *src)
{
int i = 0;
while (src[i] || src[i + 1])
{
dst[i] = src[i];
dst[i+1] = src[i + 1];
i += 2;
}
dst[i] = dst[i + 1] = '\0';
return dst;
}
// unicode字符串复制指定长度
char *wstrncpy(char *dst, char *src, int32 size)
{
int i = 0;
size -= 2;
while (src[i] || src[i + 1])
{
if (i >= size) break;
dst[i] = src[i];
dst[i + 1] = src[i + 1];
i += 2;
}
dst[i] = dst[i + 1] = '\0';
return dst;
}
// unicode字符串附加
char *wstrcat(char *dst, char *src)
{
int len = wstrlen(dst);
return wstrcpy(dst + len, src);
}
/*
// unicode字符串复制
char *wstrdup( char *str)
{
int len = 0;
char *pDest = NULL;
if (NULL == str)
return NULL;
len = wstrlen(str) + 2;
pDest = (char*)malloc(len);
if(NULL != pDest)
memcpy(pDest, str, len);
return pDest;
}
*/
/*---------------------------------------------------------------------------*/
//UTF8转换为Unicode
//参数:UTF8字符串,Unicode缓冲区,缓冲区大小
//返回:缓冲区使用大小
int UTF8ToUni( char *utf8str, char *unistr, int size)
{
int32 i = 0, u = 0;
uint8 *utf8, *unicode;
utf8 = (unsigned char*)utf8str;
unicode = (unsigned char*)unistr;
while (utf8[i] && u < size - 2)
{
if((utf8[i] & 0x80) == 0)
{
// one byte 0...
unicode[u] = 0;
unicode[u + 1] = utf8[i++];
}else if((utf8[i] & 0x20) == 0){
// two bytes 110... 10...
unicode[u] = (unsigned char)((utf8[i] & 0x1f) >> 2);
unicode[u + 1] = (unsigned char)((utf8[i] << 6) & 0xff) | (utf8[i + 1] & 0x3f);
i += 2;
}else{
// three bytes 1110... 10... 10...
unicode[u] = (unsigned char)((utf8[i] << 4) & 0xff) | ((utf8[i + 1] & 0x3f) >> 2);
unicode[u + 1] = (unsigned char)((utf8[i + 1] << 6) & 0xff) | (utf8[i + 2] & 0x3f);
i += 3;
}
u += 2;
}
unicode[u] = '\0';
unicode[u + 1] = '\0';
return u;
}
//Unicode转换为UTF8
//参数:Unicode字符串,UTF8缓冲区,缓冲区大小
//返回:缓冲区使用大小
int UniToUTF8(char *unistr, char *utf8str, int size)
{
int u = 0, i = 0;
unsigned char *unicode, *utf8;
unicode = (unsigned char*)unistr;
utf8 = (unsigned char*)utf8str;
while ((*(unicode+u) || *(unicode+u+1)) && i < size - 1)
{
if (*(unicode+u) == 0 && *(unicode+u + 1) < 128) {
// 0 - 7 bits
utf8[i++] = *(unicode+u + 1);
} else if((*(unicode+u) & 0xf8) == 0) {
// 8 - 11 bits
utf8[i++] = 0xc0 | ((unicode[u] << 2) & 0xff) | (unicode[u + 1] >> 6);
utf8[i++] = 0x80 | (unicode[u + 1] & 0x3f);
} else {
// 12 - 16 bits
utf8[i++] = 0xe0 | (unicode[u] >> 4);
utf8[i++] = 0x80 | ((unicode[u] << 2) & 0x3f) | (unicode[u + 1] >> 6);
utf8[i++] = 0x80 | (unicode[u + 1] & 0x3f);
}
u += 2;
}
utf8[i] = '\0';
return i;
}
// 转换ascii字符串为unicode字符串
int Asc2Uni(char *input, char *output, int32 outlen)
{
int i;
if (NULL == input || NULL == output)
return -1;
for (i = 0; input[i] != '\0' && outlen >= 4; i++)
{
output[i * 2] = '\0';
output[i * 2 + 1] = input[i];
outlen -= 2;
}
output[i * 2] = '\0';
output[i * 2 + 1] = '\0';
return (i * 2);
}
// 转换unicode字符串为ascii字符串
int Uni2Asc(char *input, char *output,int outlen)
{
int32 i;
if (NULL == input || NULL == output)
return -1;
for (i = 0; input[i * 2 + 1] != '\0' && outlen >= 2; i++)
{
output[i] = input[i * 2 + 1];
outlen--;
}
output[i] = '\0';
return (i);
}
//将utf8编码转换为gb,需要free
char *utfToGb(char *text)
{
int len=strlen(text)*2+2;
char *un_text=malloc(len);
UTF8ToUni(text,un_text,len);
char *gb_text=unToGb(un_text);
free(un_text);
return gb_text;
}
//将gb转utf,需要free
char *gbToUtf(char *text)
{
int len;
char *un_text;
char *utf_text;
len=strlen(text)*2+2;
utf_text=malloc(len);
un_text=gbToUn(text);
UniToUTF8(un_text,utf_text,len);
free(un_text);
return utf_text;
}
#endif
登陆可见附件内容.
|
---|
Processed Time:0.21169996261597