对鼠须管词库进行简单排序
2018-02-12 本文已影响25人
十一岁的加重
在输入汉字时,会从这个文件 wubi86.dict.yaml
中读取可输入的汉字以及自动造词生成的汉字,所以这个文件里的内容如果过多会影响输入效率,并且有些繁体字或者生僻字平时我们根本用不到,与其让其拖慢我们输入效率还不如写写代码排序,因为之前我已经把其中一些繁体字和者生僻字去除了,所以这次的需求是,降序排序,过一过里面输入次数较少的词。
直接上代码吧
oc版本
#import <Foundation/Foundation.h>
@interface LineObj :NSObject
@property (copy, nonatomic) NSString *outputValue;
@property (copy, nonatomic) NSString *outputKey;
@property(nonatomic, assign) NSUInteger inputNum;
@property (copy, nonatomic) NSString *otherOutputKey;
@property (copy, nonatomic) NSString *line;
@end
@implementation LineObj
- (NSString *)line {
if (self.otherOutputKey && self.otherOutputKey.length) {
return [NSString stringWithFormat:@"%@\t%@\t%tu\t%@\n", self.outputValue, self.outputKey,self.inputNum, self.otherOutputKey];
}
return [NSString stringWithFormat:@"%@\t%@\t%tu\n", self.outputValue, self.outputKey,self.inputNum];
}
@end
int main(int argc, const char * argv[]) {
@autoreleasepool {
NSError *encodeStringError ;
unsigned long encode = CFStringConvertEncodingToNSStringEncoding(kCFStringEncodingUTF8);
NSString *thesaurusFilePath = @"/Users/mac/Desktop/wubi86.dict.yaml";
NSString *content = [NSString stringWithContentsOfFile:thesaurusFilePath encoding:encode error:&encodeStringError];
if (encodeStringError) {
NSLog(@"%@",encodeStringError);
}
NSData *thesaurusData = [content dataUsingEncoding:NSUTF8StringEncoding];
NSString *thesaurusString = [[NSString alloc] initWithData:thesaurusData encoding:NSUTF8StringEncoding];
NSArray<NSString *> *lineStrings = [thesaurusString componentsSeparatedByString:@"\n"];
NSMutableArray *lineObjs = [NSMutableArray array];
for (NSString *lineString in lineStrings) {
NSArray<NSString *> *lineValues = [lineString componentsSeparatedByString:@"\t"];
if (lineValues.count >= 3) {
LineObj *lineObj = [[LineObj alloc] init];
lineObj.outputValue = lineValues[0];
lineObj.outputKey = lineValues[1] ;
lineObj.inputNum = [lineValues[2] integerValue];
if (lineValues.count == 4) {
lineObj.otherOutputKey = lineValues[3];
}
[lineObjs addObject:lineObj];
}
}
NSSortDescriptor *destor = [NSSortDescriptor sortDescriptorWithKey:@"inputNum" ascending:YES];
lineObjs = [[lineObjs sortedArrayUsingDescriptors:@[destor]] mutableCopy];
NSMutableString *outputMuString = [NSMutableString string];
for (LineObj *obj in lineObjs) {
[outputMuString appendString:obj.line];
}
NSLog(@"----%@---", outputMuString);
}
return 0;
}
然后 我看到了自己经常输入的不是词语,而是这些字
image.png
那么问题就来了,还是得注意,多打词语才能提升五笔的速度。
python版本
import operator
class LineObj(object):
def __init__(self, outputValue, outputKey, outputNum, outputOtherKey=None):
self.outputValue = outputValue
self.outputKey = outputKey
self.outputNum = int(outputNum)
if outputOtherKey is not None:
self.outputOtherKey = outputOtherKey
else:
self.outputOtherKey = ''
lineObjs = []
with open('wubi86.dict.yaml', 'rt') as f:
for line in f:
words = line.split('\t')
if len(words) == 3:
lineObj = LineObj(words[0], words[1], words[2])
lineObjs.append(lineObj)
elif len(words) == 4:
lineObj = LineObj(words[0], words[1], words[2], words[3])
lineObjs.append(lineObj)
cmpfun = operator.attrgetter('outputNum')
lineObjs.sort(key = cmpfun)
for lineObj in lineObjs:
print lineObj.outputValue + '\t' + lineObj.outputKey + '\t' + str(lineObj.outputNum) + '\t' + lineObj.outputOtherKey
可见已排好序
image.png