File Coverage

src/nsLatin1Prober.cpp
Criterion Covered Total %
statement 30 33 90.9
branch 14 20 70.0
condition n/a
subroutine n/a
pod n/a
total 44 53 83.0


line stmt bran cond sub pod time code
1             /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2             /* ***** BEGIN LICENSE BLOCK *****
3             * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4             *
5             * The contents of this file are subject to the Mozilla Public License Version
6             * 1.1 (the "License"); you may not use this file except in compliance with
7             * the License. You may obtain a copy of the License at
8             * http://www.mozilla.org/MPL/
9             *
10             * Software distributed under the License is distributed on an "AS IS" basis,
11             * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12             * for the specific language governing rights and limitations under the
13             * License.
14             *
15             * The Original Code is Mozilla Universal charset detector code.
16             *
17             * The Initial Developer of the Original Code is
18             * Netscape Communications Corporation.
19             * Portions created by the Initial Developer are Copyright (C) 2001
20             * the Initial Developer. All Rights Reserved.
21             *
22             * Contributor(s):
23             * Shy Shalom <shooshX@gmail.com>
24             *
25             * Alternatively, the contents of this file may be used under the terms of
26             * either the GNU General Public License Version 2 or later (the "GPL"), or
27             * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28             * in which case the provisions of the GPL or the LGPL are applicable instead
29             * of those above. If you wish to allow use of your version of this file only
30             * under the terms of either the GPL or the LGPL, and not to allow others to
31             * use your version of this file under the terms of the MPL, indicate your
32             * decision by deleting the provisions above and replace them with the notice
33             * and other provisions required by the GPL or the LGPL. If you do not delete
34             * the provisions above, a recipient may use your version of this file under
35             * the terms of any one of the MPL, the GPL or the LGPL.
36             *
37             * ***** END LICENSE BLOCK ***** */
38              
39             #include "nsLatin1Prober.h"
40             #include "prmem.h"
41             #include <stdio.h>
42              
43             #define UDF 0 // undefined
44             #define OTH 1 //other
45             #define ASC 2 // ascii capital letter
46             #define ASS 3 // ascii small letter
47             #define ACV 4 // accent capital vowel
48             #define ACO 5 // accent capital other
49             #define ASV 6 // accent small vowel
50             #define ASO 7 // accent small other
51             #define CLASS_NUM 8 // total classes
52              
53             static unsigned char Latin1_CharToClass[] =
54             {
55             OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 00 - 07
56             OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 08 - 0F
57             OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 10 - 17
58             OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 18 - 1F
59             OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 20 - 27
60             OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 28 - 2F
61             OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 30 - 37
62             OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 38 - 3F
63             OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 40 - 47
64             ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 48 - 4F
65             ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 50 - 57
66             ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, // 58 - 5F
67             OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 60 - 67
68             ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 68 - 6F
69             ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 70 - 77
70             ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, // 78 - 7F
71             OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, // 80 - 87
72             OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, // 88 - 8F
73             UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 90 - 97
74             OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, // 98 - 9F
75             OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // A0 - A7
76             OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // A8 - AF
77             OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // B0 - B7
78             OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // B8 - BF
79             ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, // C0 - C7
80             ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, // C8 - CF
81             ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, // D0 - D7
82             ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, // D8 - DF
83             ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, // E0 - E7
84             ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, // E8 - EF
85             ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, // F0 - F7
86             ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, // F8 - FF
87             };
88              
89              
90             /* 0 : illegal
91             1 : very unlikely
92             2 : normal
93             3 : very likely
94             */
95             static unsigned char Latin1ClassModel[] =
96             {
97             /* UDF OTH ASC ASS ACV ACO ASV ASO */
98             /*UDF*/ 0, 0, 0, 0, 0, 0, 0, 0,
99             /*OTH*/ 0, 3, 3, 3, 3, 3, 3, 3,
100             /*ASC*/ 0, 3, 3, 3, 3, 3, 3, 3,
101             /*ASS*/ 0, 3, 3, 3, 1, 1, 3, 3,
102             /*ACV*/ 0, 3, 3, 3, 1, 2, 1, 2,
103             /*ACO*/ 0, 3, 3, 3, 3, 3, 3, 3,
104             /*ASV*/ 0, 3, 1, 3, 1, 1, 1, 3,
105             /*ASO*/ 0, 3, 1, 3, 1, 1, 3, 3,
106             };
107              
108 4           void nsLatin1Prober::Reset(void)
109             {
110 4           mState = eDetecting;
111 4           mLastCharClass = OTH;
112 20 100         for (int i = 0; i < FREQ_CAT_NUM; i++)
113 16           mFreqCounter[i] = 0;
114 4           }
115              
116              
117 4           nsProbingState nsLatin1Prober::HandleData(const char* aBuf, PRUint32 aLen)
118             {
119 4           char *newBuf1 = 0;
120 4           PRUint32 newLen1 = 0;
121              
122 4 50         if (!FilterWithEnglishLetters(aBuf, aLen, &newBuf1, newLen1)) {
123 0           newBuf1 = (char*)aBuf;
124 4           newLen1 = aLen;
125             }
126            
127             unsigned char charClass;
128             unsigned char freq;
129 50 100         for (PRUint32 i = 0; i < newLen1; i++)
130             {
131 46           charClass = Latin1_CharToClass[(unsigned char)newBuf1[i]];
132 46           freq = Latin1ClassModel[mLastCharClass*CLASS_NUM + charClass];
133 46 50         if (freq == 0) {
134 0           mState = eNotMe;
135 0           break;
136             }
137 46           mFreqCounter[freq]++;
138 46           mLastCharClass = charClass;
139             }
140              
141 4 50         if (newBuf1 != aBuf)
142 4 50         PR_FREEIF(newBuf1);
143              
144 4           return mState;
145             }
146              
147 4           float nsLatin1Prober::GetConfidence(void)
148             {
149 4 50         if (mState == eNotMe)
150             return 0.01f;
151            
152             float confidence;
153             PRUint32 total = 0;
154 20 100         for (PRInt32 i = 0; i < FREQ_CAT_NUM; i++)
155 16           total += mFreqCounter[i];
156              
157 4 50         if(!total)
158             confidence = 0.0f;
159             else
160             {
161 4           confidence = mFreqCounter[3]*1.0f / total;
162 4           confidence -= mFreqCounter[1]*20.0f/total;
163             }
164              
165 4 100         if (confidence < 0.0f)
166             confidence = 0.0f;
167            
168             // lower the confidence of latin1 so that other more accurate detector
169             // can take priority.
170 4           confidence *= 0.50f;
171              
172 4           return confidence;
173             }
174              
175             #ifdef DEBUG_chardet
176             void nsLatin1Prober::DumpStatus()
177             {
178             printf(" Latin1Prober: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName());
179             }
180             #endif
181              
182