/*
ahk正则判断双字节正则为[^\x00-\xff],判断汉字字集范围表达方式为:[\x{范围}-\x{范围}]。例如判断是否是基本汉字[\x{4E00}-\x{9FA5}]+
*/
str:="字集顚亾?礱龑龖瘇龘〇㠩略?戔㵸"
result:={},chars:=""
GetStrLen(str,Array)
for key,value In Array
{
if Type:=CheckChineseCharset(value){
if !result[Type]{
result[Type]:=value "、"
}Else{
result[Type].=value "、"
}
}
}
for key,value In result
{
if key
chars.=key ":{ " Trim(value,"、") " }`n"
}
MsgBox % chars
; 中文字集判断
; GBK汉字范围->8140-FEFE
; GB2312汉字范围->B0A1-F7FE
CheckChineseCharset(String){
VarSetCapacity(str,StrPut(String, "CP936")*4),StrPut(String,&str, "CP936")
L1:=Format("0x{:X}",NumGet(&str,0,"UChar")),L2:=Format("0x{:X}",NumGet(&str,1,"UChar"))
if String~="^[\x{4E00}-\x{9FFF}]$"{
if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
Return "GB2312"
}Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
Return "GBK"
}Else
Return "非中文"
}Else if String~="^[\x{3400}-\x{4DBF}]$"{ ; 扩展A
Return "扩A"
}Else if String~="^[\x{20000}-\x{2A6DF}]$"{ ; 扩展B
Return "扩B"
}Else if String~="^[\x{2A700}-\x{2B73F}]$"{ ; 扩展C
Return "扩C"
}Else if String~="^[\x{2B740}-\x{2B81F}]$"{ ; 扩展D
Return "扩D"
}Else if String~="^[\x{2B820}-\x{2CEAF}]$"{ ; 扩展E
Return "扩E"
}Else if String~="^[\x{2CEB0}-\x{2EBEF}]$"{ ; 扩展F
Return "扩F"
}Else if String~="^[\x{30000}-\x{3134F}]$"{ ; 扩展G
Return "扩G"
}Else if String~="^[\x{31350}-\x{323BC}]$"{ ; 扩展H
Return "扩H"
}Else if String~="^[\x{3007}]$"{ ; 〇
if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
Return "GB2312"
}Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
Return "GBK"
}Else
Return "〇"
}Else if String~="^[\x{2F00}-\x{2FD5}]$"{
if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
Return "GB2312"
}Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
Return "GBK"
}Else
Return "康熙部首"
}Else if String~="^[\x{2E80}-\x{2EF3}]$"{
if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
Return "GB2312"
}Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
Return "GBK"
}Else
Return "部首扩展"
}Else if String~="^[\x{F900}-\x{FAD9}]$"{
if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
Return "GB2312"
}Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
Return "GBK"
}Else
Return "CJK兼容汉字"
}Else if String~="^[\x{2F800}-\x{2FA1D}]$"{
if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
Return "GB2312"
}Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
Return "GBK"
}Else
Return "CJK兼容扩展"
}Else if String~="^[\x{E815}-\x{E86F}]$"{
if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
Return "GB2312"
}Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
Return "GBK"
}Else
Return "PUA(GBK)部件"
}Else if String~="^[\x{E400}-\x{E5E8}]$"{
if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
Return "GB2312"
}Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
Return "GBK"
}Else
Return "部件扩展"
Return "部件扩展"
}Else if String~="^[\x{E600}-\x{E6CF}]$"{
if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
Return "GB2312"
}Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
Return "GBK"
}Else
Return "PUA增补"
}Else if chars~="^[\x{31C0}-\x{31E3}]$"{
if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
Return "GB2312"
}Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
Return "GBK"
}Else
Return "汉字笔画"
}Else if String~="^[\x{2FF0}-\x{2FFB}]$"{
if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
Return "GB2312"
}Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
Return "GBK"
}Else
Return "汉字结构"
}Else if String~="^[\x{3105}-\x{312F}]$"{
if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
Return "GB2312"
}Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
Return "GBK"
}Else
Return "汉语注音"
}Else if String~="^[\x{31A0}-\x{31BA}]$"{
if (L1>=0xB0&&L2>=0xA1&&L1<=0xF7&&L2<=0xFE){
Return "GB2312"
}Else if (L1>=0x81&&L2>=0x40&&L1<=0xFE&&L2<=0xFE){
Return "GBK"
}Else
Return "注音扩展"
}Else if String~="^[\x00-\xff]$"{
Return "未知分类"
}
}
GetStrLen(str,ByRef obj:="") {
Static Count:=0
str:=RegExReplace(str,"[\s\t\r\n]")
obj:=StrSplit(RTrim(RegExReplace(str,"(.)","$1☯☯☯",Count),"☯☯☯"),"☯☯☯")
Return Count
}
声明:本站所有文章,如无特殊说明或标注,均为本站原创发布。任何个人或组织,在未征得本站同意时,禁止复制、盗用、采集、发布本站内容到任何网站、书籍等各类媒体平台。如若本站内容侵犯了原著者的合法权益,可联系我们进行处理。
影子开启ZB模式