正则筛选字数&GBK转码
2016-10-10 本文已影响29人
音吹
正则:
//1.去除Html语句<>
NSString * preStr1 =@"<([0-9a-zA-Z\"=#:\\/\\.\\_\\-\\? ])*>|<\\/([0-9a-zA-Z\"=#:\\/\\.\\_\\-\\? ])*>|<([0-9a-zA-Z\"=#:\\/\\.\\_\\-\\? ])*\\/>";
NSString * replacement = @"";
NSRegularExpression *regExp = [[NSRegularExpression alloc] initWithPattern:preStr1
options:NSRegularExpressionCaseInsensitive
error:nil];
//替换
newLengthStr = [regExp stringByReplacingMatchesInString:newLengthStr
options:NSMatchingReportProgress
range:NSMakeRange(0, newLengthStr.length)
withTemplate:replacement];
- 用到的正则:1.特殊符号
"/(?<=[^x00-xff!…“”《》—、’])( | )(?=[^x00-xff!…""《》-、'])/"
- 2.\r\n 如果连续大于3个的情况
"(\r\n){3,}"
根据GBK计算长度
//GBK转码
NSStringEncoding gbkEncoding = CFStringConvertEncodingToNSStringEncoding(kCFStringEncodingGB_18030_2000);
NSData *data=[newLengthStr dataUsingEncoding:gbkEncoding];
NSInteger length = 0;
if (data.length % 2 == 0) {
length = data.length /2;
}else{
length = data.length /2 + 1;
}