File Coverage

src/nsSBCSGroupProber.cpp
Criterion Covered Total %
statement 51 70 72.8
branch 37 76 48.6
condition n/a
subroutine n/a
pod n/a
total 88 146 60.2


line stmt bran cond sub pod time code
1             /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2             /* ***** BEGIN LICENSE BLOCK *****
3             * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4             *
5             * The contents of this file are subject to the Mozilla Public License Version
6             * 1.1 (the "License"); you may not use this file except in compliance with
7             * the License. You may obtain a copy of the License at
8             * http://www.mozilla.org/MPL/
9             *
10             * Software distributed under the License is distributed on an "AS IS" basis,
11             * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12             * for the specific language governing rights and limitations under the
13             * License.
14             *
15             * The Original Code is Mozilla Universal charset detector code.
16             *
17             * The Initial Developer of the Original Code is
18             * Netscape Communications Corporation.
19             * Portions created by the Initial Developer are Copyright (C) 2001
20             * the Initial Developer. All Rights Reserved.
21             *
22             * Contributor(s):
23             * Shy Shalom <shooshX@gmail.com>
24             *
25             * Alternatively, the contents of this file may be used under the terms of
26             * either the GNU General Public License Version 2 or later (the "GPL"), or
27             * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28             * in which case the provisions of the GPL or the LGPL are applicable instead
29             * of those above. If you wish to allow use of your version of this file only
30             * under the terms of either the GPL or the LGPL, and not to allow others to
31             * use your version of this file under the terms of the MPL, indicate your
32             * decision by deleting the provisions above and replace them with the notice
33             * and other provisions required by the GPL or the LGPL. If you do not delete
34             * the provisions above, a recipient may use your version of this file under
35             * the terms of any one of the MPL, the GPL or the LGPL.
36             *
37             * ***** END LICENSE BLOCK ***** */
38              
39             #include <stdio.h>
40             #include "prmem.h"
41              
42             #include "nsSBCharSetProber.h"
43             #include "nsSBCSGroupProber.h"
44              
45             #include "nsHebrewProber.h"
46              
47 4           nsSBCSGroupProber::nsSBCSGroupProber()
48             {
49 4 50         mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model);
50 4 50         mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel);
51 4 50         mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model);
52 4 50         mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel);
53 4 50         mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model);
54 4 50         mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model);
55 4 50         mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model);
56 4 50         mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model);
57 4 50         mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
58 4 50         mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
59              
60 4 50         nsHebrewProber *hebprober = new nsHebrewProber();
61             // Notice: Any change in these indexes - 10,11,12 must be reflected
62             // in the code below as well.
63 4           mProbers[10] = hebprober;
64 4 50         mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew
65 4 50         mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew
66             // Tell the Hebrew prober about the logical and visual probers
67 4 50         if (mProbers[10] && mProbers[11] && mProbers[12]) // all are not null
    50          
    50          
68             {
69             hebprober->SetModelProbers(mProbers[11], mProbers[12]);
70             }
71             else // One or more is null. avoid any Hebrew probing, null them all
72             {
73 0 0         for (PRUint32 i = 10; i <= 12; ++i)
74             {
75 0 0         delete mProbers[i];
    0          
76 0           mProbers[i] = 0;
77             }
78             }
79              
80             // disable latin2 before latin1 is available, otherwise all latin1
81             // will be detected as latin2 because of their similarity.
82             //mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
83             //mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
84              
85 4 50         Reset();
86 4           }
87              
88 12           nsSBCSGroupProber::~nsSBCSGroupProber()
89             {
90 56 100         for (PRUint32 i = 0; i < NUM_OF_SBCS_PROBERS; i++)
91             {
92 52 50         delete mProbers[i];
    50          
93             }
94 8           }
95              
96              
97 0           const char* nsSBCSGroupProber::GetCharSetName()
98             {
99             //if we have no answer yet
100 0 0         if (mBestGuess == -1)
101             {
102 0           GetConfidence();
103             //no charset seems positive
104 0 0         if (mBestGuess == -1)
105             //we will use default.
106 0           mBestGuess = 0;
107             }
108 0           return mProbers[mBestGuess]->GetCharSetName();
109             }
110              
111 4           void nsSBCSGroupProber::Reset(void)
112             {
113 4           mActiveNum = 0;
114 56 100         for (PRUint32 i = 0; i < NUM_OF_SBCS_PROBERS; i++)
115             {
116 52 50         if (mProbers[i]) // not null
117             {
118 52           mProbers[i]->Reset();
119 52           mIsActive[i] = PR_TRUE;
120 52           ++mActiveNum;
121             }
122             else
123 0           mIsActive[i] = PR_FALSE;
124             }
125 4           mBestGuess = -1;
126 4           mState = eDetecting;
127 4           }
128              
129              
130 4           nsProbingState nsSBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen)
131             {
132             nsProbingState st;
133             PRUint32 i;
134 4           char *newBuf1 = 0;
135 4           PRUint32 newLen1 = 0;
136              
137             //apply filter to original buffer, and we got new buffer back
138             //depend on what script it is, we will feed them the new buffer
139             //we got after applying proper filter
140             //this is done without any consideration to KeepEnglishLetters
141             //of each prober since as of now, there are no probers here which
142             //recognize languages with English characters.
143 4 50         if (!FilterWithoutEnglishLetters(aBuf, aLen, &newBuf1, newLen1))
144             goto done;
145            
146 4 50         if (newLen1 == 0)
147             goto done; // Nothing to see here, move on.
148              
149 56 100         for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
150             {
151 52 50         if (!mIsActive[i])
152             continue;
153 52           st = mProbers[i]->HandleData(newBuf1, newLen1);
154 52 50         if (st == eFoundIt)
155             {
156 0           mBestGuess = i;
157 0           mState = eFoundIt;
158 0           break;
159             }
160 52 50         else if (st == eNotMe)
161             {
162 0           mIsActive[i] = PR_FALSE;
163 0           mActiveNum--;
164 0 0         if (mActiveNum <= 0)
165             {
166 0           mState = eNotMe;
167 0           break;
168             }
169             }
170             }
171              
172             done:
173 4 50         PR_FREEIF(newBuf1);
174              
175 4           return mState;
176             }
177              
178 4           float nsSBCSGroupProber::GetConfidence(void)
179             {
180             PRUint32 i;
181             float bestConf = 0.0, cf;
182              
183 4           switch (mState)
184             {
185             case eFoundIt:
186             return (float)0.99; //sure yes
187             case eNotMe:
188 0           return (float)0.01; //sure no
189             default:
190 56 100         for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
191             {
192 52 50         if (!mIsActive[i])
193             continue;
194 52           cf = mProbers[i]->GetConfidence();
195 52 100         if (bestConf < cf)
196             {
197             bestConf = cf;
198 4           mBestGuess = i;
199             }
200             }
201             }
202             return bestConf;
203             }
204              
205             #ifdef DEBUG_chardet
206             void nsSBCSGroupProber::DumpStatus()
207             {
208             PRUint32 i;
209             float cf;
210            
211             cf = GetConfidence();
212             printf(" SBCS Group Prober --------begin status \r\n");
213             for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
214             {
215             if (!mIsActive[i])
216             printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName());
217             else
218             mProbers[i]->DumpStatus();
219             }
220             printf(" SBCS Group found best match [%s] confidence %f.\r\n",
221             mProbers[mBestGuess]->GetCharSetName(), cf);
222             }
223             #endif