forked from JanX2/UniversalDetector
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathUniversalDetector.m
90 lines (73 loc) · 1.75 KB
/
UniversalDetector.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#import "UniversalDetector.h"
#import "WrappedUniversalDetector.h"
@implementation UniversalDetector
-(id)init
{
self = [super init];
if(self)
{
detectorPtr = AllocUniversalDetector();
charsetName = nil;
confidence = 0;
}
return self;
}
-(void)dealloc
{
FreeUniversalDetector(detectorPtr);
[charsetName release];
[super dealloc];
}
-(void)analyzeContentsOfFile:(NSString *)path
{
NSData *data = [[NSData alloc] initWithContentsOfMappedFile:path];
if (data) {
[self analyzeBytes:(const char *)[data bytes] length:[data length]];
}
[data release];
}
-(void)analyzeData:(NSData *)data
{
[self analyzeBytes:(const char *)[data bytes] length:[data length]];
}
-(void)analyzeBytes:(const char *)data length:(int)len
{
UniversalDetectorHandleData(detectorPtr, data, len);
[charsetName release];
charsetName=nil;
}
-(void)reset
{
UniversalDetectorReset(detectorPtr);
}
-(BOOL)done
{
return UniversalDetectorDone(detectorPtr);
}
-(NSString *)MIMECharset
{
if(!charsetName)
{
const char *cstr=UniversalDetectorCharset(detectorPtr, &confidence);
if(!cstr) return nil;
charsetName=[[NSString alloc] initWithUTF8String:cstr];
}
return charsetName;
}
-(NSStringEncoding)encoding
{
NSString *mimecharset=[self MIMECharset];
if(!mimecharset) return 0;
CFStringEncoding cfenc=CFStringConvertIANACharSetNameToEncoding((CFStringRef)mimecharset);
if(cfenc==kCFStringEncodingInvalidId) return 0;
// UniversalDetector detects CP949 but returns "EUC-KR" because CP949 lacks an IANA name.
// Kludge to make strings decode properly anyway.
if(cfenc==kCFStringEncodingEUC_KR) cfenc=kCFStringEncodingDOSKorean;
return CFStringConvertEncodingToNSStringEncoding(cfenc);
}
-(float)confidence
{
if(!charsetName) [self MIMECharset];
return confidence;
}
@end