| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ | 
| 2 |  |  |  |  |  |  | /* ***** BEGIN LICENSE BLOCK ***** | 
| 3 |  |  |  |  |  |  | * Version: MPL 1.1/GPL 2.0/LGPL 2.1 | 
| 4 |  |  |  |  |  |  | * | 
| 5 |  |  |  |  |  |  | * The contents of this file are subject to the Mozilla Public License Version | 
| 6 |  |  |  |  |  |  | * 1.1 (the "License"); you may not use this file except in compliance with | 
| 7 |  |  |  |  |  |  | * the License. You may obtain a copy of the License at | 
| 8 |  |  |  |  |  |  | * http://www.mozilla.org/MPL/ | 
| 9 |  |  |  |  |  |  | * | 
| 10 |  |  |  |  |  |  | * Software distributed under the License is distributed on an "AS IS" basis, | 
| 11 |  |  |  |  |  |  | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License | 
| 12 |  |  |  |  |  |  | * for the specific language governing rights and limitations under the | 
| 13 |  |  |  |  |  |  | * License. | 
| 14 |  |  |  |  |  |  | * | 
| 15 |  |  |  |  |  |  | * The Original Code is Mozilla Communicator client code. | 
| 16 |  |  |  |  |  |  | * | 
| 17 |  |  |  |  |  |  | * The Initial Developer of the Original Code is | 
| 18 |  |  |  |  |  |  | * Netscape Communications Corporation. | 
| 19 |  |  |  |  |  |  | * Portions created by the Initial Developer are Copyright (C) 1998 | 
| 20 |  |  |  |  |  |  | * the Initial Developer. All Rights Reserved. | 
| 21 |  |  |  |  |  |  | * | 
| 22 |  |  |  |  |  |  | * Contributor(s): | 
| 23 |  |  |  |  |  |  | * | 
| 24 |  |  |  |  |  |  | * Alternatively, the contents of this file may be used under the terms of | 
| 25 |  |  |  |  |  |  | * either the GNU General Public License Version 2 or later (the "GPL"), or | 
| 26 |  |  |  |  |  |  | * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), | 
| 27 |  |  |  |  |  |  | * in which case the provisions of the GPL or the LGPL are applicable instead | 
| 28 |  |  |  |  |  |  | * of those above. If you wish to allow use of your version of this file only | 
| 29 |  |  |  |  |  |  | * under the terms of either the GPL or the LGPL, and not to allow others to | 
| 30 |  |  |  |  |  |  | * use your version of this file under the terms of the MPL, indicate your | 
| 31 |  |  |  |  |  |  | * decision by deleting the provisions above and replace them with the notice | 
| 32 |  |  |  |  |  |  | * and other provisions required by the GPL or the LGPL. If you do not delete | 
| 33 |  |  |  |  |  |  | * the provisions above, a recipient may use your version of this file under | 
| 34 |  |  |  |  |  |  | * the terms of any one of the MPL, the GPL or the LGPL. | 
| 35 |  |  |  |  |  |  | * | 
| 36 |  |  |  |  |  |  | * ***** END LICENSE BLOCK ***** */ | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | #ifndef __JPCNTX_H__ | 
| 39 |  |  |  |  |  |  | #define __JPCNTX_H__ | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | #define NUM_OF_CATEGORY 6 | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | #include "nscore.h" | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | #define ENOUGH_REL_THRESHOLD  100 | 
| 46 |  |  |  |  |  |  | #define MAX_REL_THRESHOLD     1000 | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  | //hiragana frequency category table | 
| 49 |  |  |  |  |  |  | extern char jp2CharContext[83][83]; | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | class JapaneseContextAnalysis | 
| 52 |  |  |  |  |  |  | { | 
| 53 |  |  |  |  |  |  | public: | 
| 54 |  |  |  |  |  |  | JapaneseContextAnalysis() {Reset();}; | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | void HandleData(const char* aBuf, PRUint32 aLen); | 
| 57 |  |  |  |  |  |  |  | 
| 58 | 20 |  |  |  |  |  | void HandleOneChar(const char* aStr, PRUint32 aCharLen) | 
| 59 |  |  |  |  |  |  | { | 
| 60 |  |  |  |  |  |  | PRInt32 order; | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | //if we received enough data, stop here | 
| 63 | 20 | 50 |  |  |  |  | if (mTotalRel > MAX_REL_THRESHOLD)   mDone = PR_TRUE; | 
| 64 | 20 | 50 |  |  |  |  | if (mDone)       return; | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  | //Only 2-bytes characters are of our interest | 
| 67 | 20 | 100 |  |  |  |  | order = (aCharLen == 2) ? GetOrder(aStr) : -1; | 
| 68 | 20 | 100 |  |  |  |  | if (order != -1 && mLastCharOrder != -1) | 
|  |  | 100 |  |  |  |  |  | 
| 69 |  |  |  |  |  |  | { | 
| 70 | 10 |  |  |  |  |  | mTotalRel++; | 
| 71 |  |  |  |  |  |  | //count this sequence to its category counter | 
| 72 | 10 |  |  |  |  |  | mRelSample[jp2CharContext[mLastCharOrder][order]]++; | 
| 73 |  |  |  |  |  |  | } | 
| 74 | 20 |  |  |  |  |  | mLastCharOrder = order; | 
| 75 |  |  |  |  |  |  | }; | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  | float GetConfidence(); | 
| 78 |  |  |  |  |  |  | void      Reset(void); | 
| 79 |  |  |  |  |  |  | void      SetOpion(){}; | 
| 80 |  |  |  |  |  |  | PRBool GotEnoughData() {return mTotalRel > ENOUGH_REL_THRESHOLD;}; | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  | protected: | 
| 83 |  |  |  |  |  |  | virtual PRInt32 GetOrder(const char* str, PRUint32 *charLen) = 0; | 
| 84 |  |  |  |  |  |  | virtual PRInt32 GetOrder(const char* str) = 0; | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | //category counters, each interger counts sequence in its category | 
| 87 |  |  |  |  |  |  | PRUint32 mRelSample[NUM_OF_CATEGORY]; | 
| 88 |  |  |  |  |  |  |  | 
| 89 |  |  |  |  |  |  | //total sequence received | 
| 90 |  |  |  |  |  |  | PRUint32 mTotalRel; | 
| 91 |  |  |  |  |  |  |  | 
| 92 |  |  |  |  |  |  | //The order of previous char | 
| 93 |  |  |  |  |  |  | PRInt32  mLastCharOrder; | 
| 94 |  |  |  |  |  |  |  | 
| 95 |  |  |  |  |  |  | //if last byte in current buffer is not the last byte of a character, we | 
| 96 |  |  |  |  |  |  | //need to know how many byte to skip in next buffer. | 
| 97 |  |  |  |  |  |  | PRUint32 mNeedToSkipCharNum; | 
| 98 |  |  |  |  |  |  |  | 
| 99 |  |  |  |  |  |  | //If this flag is set to PR_TRUE, detection is done and conclusion has been made | 
| 100 |  |  |  |  |  |  | PRBool   mDone; | 
| 101 |  |  |  |  |  |  | }; | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | class SJISContextAnalysis : public JapaneseContextAnalysis | 
| 105 |  |  |  |  |  |  | { | 
| 106 |  |  |  |  |  |  | //SJISContextAnalysis(){}; | 
| 107 |  |  |  |  |  |  | protected: | 
| 108 |  |  |  |  |  |  | PRInt32 GetOrder(const char* str, PRUint32 *charLen); | 
| 109 |  |  |  |  |  |  |  | 
| 110 | 14 |  |  |  |  |  | PRInt32 GetOrder(const char* str) | 
| 111 |  |  |  |  |  |  | { | 
| 112 |  |  |  |  |  |  | //We only interested in Hiragana, so first byte is '\202' | 
| 113 | 14 | 100 |  |  |  |  | if (*str == '\202' && | 
|  |  | 50 |  |  |  |  |  | 
| 114 | 12 | 50 |  |  |  |  | (unsigned char)*(str+1) >= (unsigned char)0x9f && | 
| 115 |  |  |  |  |  |  | (unsigned char)*(str+1) <= (unsigned char)0xf1) | 
| 116 | 12 |  |  |  |  |  | return (unsigned char)*(str+1) - (unsigned char)0x9f; | 
| 117 |  |  |  |  |  |  | return -1; | 
| 118 |  |  |  |  |  |  | }; | 
| 119 |  |  |  |  |  |  | }; | 
| 120 |  |  |  |  |  |  |  | 
| 121 |  |  |  |  |  |  | class EUCJPContextAnalysis : public JapaneseContextAnalysis | 
| 122 |  |  |  |  |  |  | { | 
| 123 |  |  |  |  |  |  | protected: | 
| 124 |  |  |  |  |  |  | PRInt32 GetOrder(const char* str, PRUint32 *charLen); | 
| 125 | 0 |  |  |  |  |  | PRInt32 GetOrder(const char* str) | 
| 126 |  |  |  |  |  |  | //We only interested in Hiragana, so first byte is '\244' | 
| 127 |  |  |  |  |  |  | { | 
| 128 | 0 | 0 |  |  |  |  | if (*str == '\244' && | 
|  |  | 0 |  |  |  |  |  | 
| 129 | 0 | 0 |  |  |  |  | (unsigned char)*(str+1) >= (unsigned char)0xa1 && | 
| 130 |  |  |  |  |  |  | (unsigned char)*(str+1) <= (unsigned char)0xf3) | 
| 131 | 0 |  |  |  |  |  | return (unsigned char)*(str+1) - (unsigned char)0xa1; | 
| 132 |  |  |  |  |  |  | return -1; | 
| 133 |  |  |  |  |  |  | }; | 
| 134 |  |  |  |  |  |  | }; | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | #endif /* __JPCNTX_H__ */ | 
| 137 |  |  |  |  |  |  |  |