00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include <stdio.h>
00033 #include <stdlib.h>
00034 #include <string.h>
00035
00036 #include <string>
00037 #include <vector>
00038
00039 #include "sp_spell.h"
00040 #include "ispell_checker.h"
00041
00042 #include <qmap.h>
00043 #include <qdir.h>
00044 #include <qfileinfo.h>
00045
00046
00047
00048 typedef struct str_ispell_map
00049 {
00050 const char * lang;
00051 const char * dict;
00052 const char * enc;
00053 } IspellMap;
00054
00055 static const char *ispell_dirs [] = {
00056 "/usr/lib/ispell",
00057 "/usr/local/lib/ispell",
00058 "/usr/local/share/ispell",
00059 "/usr/share/ispell",
00060 "/usr/pkg/lib",
00061 0
00062 };
00063 static const IspellMap ispell_map [] = {
00064 {"ca" ,"catala.hash" ,"iso-8859-1" },
00065 {"ca_ES" ,"catala.hash" ,"iso-8859-1" },
00066 {"cs" ,"czech.hash" ,"iso-8859-2" },
00067 {"cs_CZ" ,"czech.hash" ,"iso-8859-2" },
00068 {"da" ,"dansk.hash" ,"iso-8859-1" },
00069 {"da_DK" ,"dansk.hash" ,"iso-8859-1" },
00070 {"de" ,"deutsch.hash" ,"iso-8859-1" },
00071 {"de_CH" ,"swiss.hash" ,"iso-8859-1" },
00072 {"de_AT" ,"deutsch.hash" ,"iso-8859-1" },
00073 {"de_DE" ,"deutsch.hash" ,"iso-8859-1" },
00074 {"el" ,"ellhnika.hash" ,"iso-8859-7" },
00075 {"el_GR" ,"ellhnika.hash" ,"iso-8859-7" },
00076 {"en" ,"british.hash" ,"iso-8859-1" },
00077 {"en_AU" ,"british.hash" ,"iso-8859-1" },
00078 {"en_BZ" ,"british.hash" ,"iso-8859-1" },
00079 {"en_CA" ,"british.hash" ,"iso-8859-1" },
00080 {"en_GB" ,"british.hash" ,"iso-8859-1" },
00081 {"en_IE" ,"british.hash" ,"iso-8859-1" },
00082 {"en_JM" ,"british.hash" ,"iso-8859-1" },
00083 {"en_NZ" ,"british.hash" ,"iso-8859-1" },
00084 {"en_TT" ,"british.hash" ,"iso-8859-1" },
00085 {"en_ZA" ,"british.hash" ,"iso-8859-1" },
00086 {"en_ZW" ,"british.hash" ,"iso-8859-1" },
00087 {"en_PH" ,"american.hash" ,"iso-8859-1" },
00088 {"en_US" ,"american.hash" ,"iso-8859-1" },
00089 {"eo" ,"esperanto.hash" ,"iso-8859-3" },
00090 {"es" ,"espanol.hash" ,"iso-8859-1" },
00091 {"es_AR" ,"espanol.hash" ,"iso-8859-1" },
00092 {"es_BO" ,"espanol.hash" ,"iso-8859-1" },
00093 {"es_CL" ,"espanol.hash" ,"iso-8859-1" },
00094 {"es_CO" ,"espanol.hash" ,"iso-8859-1" },
00095 {"es_CR" ,"espanol.hash" ,"iso-8859-1" },
00096 {"es_DO" ,"espanol.hash" ,"iso-8859-1" },
00097 {"es_EC" ,"espanol.hash" ,"iso-8859-1" },
00098 {"es_ES" ,"espanol.hash" ,"iso-8859-1" },
00099 {"es_GT" ,"espanol.hash" ,"iso-8859-1" },
00100 {"es_HN" ,"espanol.hash" ,"iso-8859-1" },
00101 {"es_MX" ,"espanol.hash" ,"iso-8859-1" },
00102 {"es_NI" ,"espanol.hash" ,"iso-8859-1" },
00103 {"es_PA" ,"espanol.hash" ,"iso-8859-1" },
00104 {"es_PE" ,"espanol.hash" ,"iso-8859-1" },
00105 {"es_PR" ,"espanol.hash" ,"iso-8859-1" },
00106 {"es_PY" ,"espanol.hash" ,"iso-8859-1" },
00107 {"es_SV" ,"espanol.hash" ,"iso-8859-1" },
00108 {"es_UY" ,"espanol.hash" ,"iso-8859-1" },
00109 {"es_VE" ,"espanol.hash" ,"iso-8859-1" },
00110 {"fi" ,"finnish.hash" ,"iso-8859-1" },
00111 {"fi_FI" ,"finnish.hash" ,"iso-8859-1" },
00112 {"fr" ,"francais.hash" ,"iso-8859-1" },
00113 {"fr_BE" ,"francais.hash" ,"iso-8859-1" },
00114 {"fr_CA" ,"francais.hash" ,"iso-8859-1" },
00115 {"fr_CH" ,"francais.hash" ,"iso-8859-1" },
00116 {"fr_FR" ,"francais.hash" ,"iso-8859-1" },
00117 {"fr_LU" ,"francais.hash" ,"iso-8859-1" },
00118 {"fr_MC" ,"francais.hash" ,"iso-8859-1" },
00119 {"hu" ,"hungarian.hash" ,"iso-8859-2" },
00120 {"hu_HU" ,"hungarian.hash" ,"iso-8859-2" },
00121 {"ga" ,"irish.hash" ,"iso-8859-1" },
00122 {"ga_IE" ,"irish.hash" ,"iso-8859-1" },
00123 {"gl" ,"galician.hash" ,"iso-8859-1" },
00124 {"gl_ES" ,"galician.hash" ,"iso-8859-1" },
00125 {"ia" ,"interlingua.hash" ,"iso-8859-1" },
00126 {"it" ,"italian.hash" ,"iso-8859-1" },
00127 {"it_IT" ,"italian.hash" ,"iso-8859-1" },
00128 {"it_CH" ,"italian.hash" ,"iso-8859-1" },
00129 {"la" ,"mlatin.hash" ,"iso-8859-1" },
00130 {"la_IT" ,"mlatin.hash" ,"iso-8859-1" },
00131 {"lt" ,"lietuviu.hash" ,"iso-8859-13" },
00132 {"lt_LT" ,"lietuviu.hash" ,"iso-8859-13" },
00133 {"nl" ,"nederlands.hash" ,"iso-8859-1" },
00134 {"nl_NL" ,"nederlands.hash" ,"iso-8859-1" },
00135 {"nl_BE" ,"nederlands.hash" ,"iso-8859-1" },
00136 {"nb" ,"norsk.hash" ,"iso-8859-1" },
00137 {"nb_NO" ,"norsk.hash" ,"iso-8859-1" },
00138 {"nn" ,"nynorsk.hash" ,"iso-8859-1" },
00139 {"nn_NO" ,"nynorsk.hash" ,"iso-8859-1" },
00140 {"no" ,"norsk.hash" ,"iso-8859-1" },
00141 {"no_NO" ,"norsk.hash" ,"iso-8859-1" },
00142 {"pl" ,"polish.hash" ,"iso-8859-2" },
00143 {"pl_PL" ,"polish.hash" ,"iso-8859-2" },
00144 {"pt" ,"brazilian.hash" ,"iso-8859-1" },
00145 {"pt_BR" ,"brazilian.hash" ,"iso-8859-1" },
00146 {"pt_PT" ,"portugues.hash" ,"iso-8859-1" },
00147 {"ru" ,"russian.hash" ,"koi8-r" },
00148 {"ru_MD" ,"russian.hash" ,"koi8-r" },
00149 {"ru_RU" ,"russian.hash" ,"koi8-r" },
00150 {"sc" ,"sardinian.hash" ,"iso-8859-1" },
00151 {"sc_IT" ,"sardinian.hash" ,"iso-8859-1" },
00152 {"sk" ,"slovak.hash" ,"iso-8859-2" },
00153 {"sk_SK" ,"slovak.hash" ,"iso-8859-2" },
00154 {"sl" ,"slovensko.hash" ,"iso-8859-2" },
00155 {"sl_SI" ,"slovensko.hash" ,"iso-8859-2" },
00156 {"sv" ,"svenska.hash" ,"iso-8859-1" },
00157 {"sv_SE" ,"svenska.hash" ,"iso-8859-1" },
00158 {"uk" ,"ukrainian.hash" ,"koi8-u" },
00159 {"uk_UA" ,"ukrainian.hash" ,"koi8-u" },
00160 {"yi" ,"yiddish-yivo.hash" ,"utf-8" }
00161 };
00162
00163 static const size_t size_ispell_map = ( sizeof(ispell_map) / sizeof((ispell_map)[0]) );
00164 static QMap<QString, QString> ispell_dict_map;
00165
00166
00167 void
00168 ISpellChecker::try_autodetect_charset(const char * const inEncoding)
00169 {
00170 if (inEncoding && strlen(inEncoding))
00171 {
00172 m_translate_in = QTextCodec::codecForName(inEncoding);
00173 }
00174 }
00175
00176
00177
00178
00179 ISpellChecker::ISpellChecker()
00180 : deftflag(-1),
00181 prefstringchar(-1),
00182 m_bSuccessfulInit(false),
00183 m_BC(NULL),
00184 m_cd(NULL),
00185 m_cl(NULL),
00186 m_cm(NULL),
00187 m_ho(NULL),
00188 m_nd(NULL),
00189 m_so(NULL),
00190 m_se(NULL),
00191 m_ti(NULL),
00192 m_te(NULL),
00193 m_hashstrings(NULL),
00194 m_hashtbl(NULL),
00195 m_pflaglist(NULL),
00196 m_sflaglist(NULL),
00197 m_chartypes(NULL),
00198 m_infile(NULL),
00199 m_outfile(NULL),
00200 m_askfilename(NULL),
00201 m_Trynum(0),
00202 m_translate_in(0)
00203 {
00204 memset(m_sflagindex,0,sizeof(m_sflagindex));
00205 memset(m_pflagindex,0,sizeof(m_pflagindex));
00206 }
00207
00208 #ifndef FREEP
00209 #define FREEP(p) do { if (p) free(p); } while (0)
00210 #endif
00211
00212 ISpellChecker::~ISpellChecker()
00213 {
00214 if (m_bSuccessfulInit) {
00215
00216
00217 clearindex (m_pflagindex);
00218 clearindex (m_sflagindex);
00219 }
00220
00221 FREEP(m_hashtbl);
00222 FREEP(m_hashstrings);
00223 FREEP(m_sflaglist);
00224 FREEP(m_chartypes);
00225
00226 delete m_translate_in;
00227 m_translate_in = 0;
00228 }
00229
00230 bool
00231 ISpellChecker::checkWord( const QString& utf8Word )
00232 {
00233 ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
00234 if (!m_bSuccessfulInit)
00235 return false;
00236
00237 if (!utf8Word || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) || utf8Word.isEmpty())
00238 return false;
00239
00240 bool retVal = false;
00241 QCString out;
00242 if (!m_translate_in)
00243 return false;
00244 else {
00245
00246 int len_out = utf8Word.length();
00247
00248 out = m_translate_in->fromUnicode( utf8Word, len_out );
00249 }
00250
00251 if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
00252 {
00253 if (good(iWord, 0, 0, 1, 0) == 1 ||
00254 compoundgood(iWord, 1) == 1)
00255 {
00256 retVal = true;
00257 }
00258 }
00259
00260 return retVal;
00261 }
00262
00263 QStringList
00264 ISpellChecker::suggestWord(const QString& utf8Word)
00265 {
00266 ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
00267 int c;
00268
00269 if (!m_bSuccessfulInit)
00270 return QStringList();
00271
00272 if (utf8Word.isEmpty() || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) ||
00273 utf8Word.length() == 0)
00274 return QStringList();
00275
00276 QCString out;
00277 if (!m_translate_in)
00278 return QStringList();
00279 else
00280 {
00281
00282
00283 int len_out = utf8Word.length();
00284 out = m_translate_in->fromUnicode( utf8Word, len_out );
00285 }
00286
00287 if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
00288 makepossibilities(iWord);
00289 else
00290 return QStringList();
00291
00292 QStringList sugg_arr;
00293 for (c = 0; c < m_pcount; c++)
00294 {
00295 QString utf8Word;
00296
00297 if (!m_translate_in)
00298 {
00299
00300 utf8Word = QString::fromUtf8( m_possibilities[c] );
00301 }
00302 else
00303 {
00304
00305 utf8Word = m_translate_in->toUnicode( m_possibilities[c] );
00306 }
00307
00308 sugg_arr.append( utf8Word );
00309 }
00310
00311 return sugg_arr;
00312 }
00313
00314 static void
00315 s_buildHashNames (std::vector<std::string> & names, const char * dict)
00316 {
00317 const char * tmp = 0;
00318 int i = 0;
00319
00320 names.clear ();
00321
00322 while ( (tmp = ispell_dirs[i++]) ) {
00323 QCString maybeFile = QCString( tmp ) + '/';
00324 maybeFile += dict;
00325 names.push_back( maybeFile.data() );
00326 }
00327 }
00328
00329 static void
00330 s_allDics()
00331 {
00332 const char * tmp = 0;
00333 int i = 0;
00334
00335 while ( (tmp = ispell_dirs[i++]) ) {
00336 QDir dir( tmp );
00337 QStringList lst = dir.entryList( "*.hash" );
00338 for ( QStringList::Iterator it = lst.begin(); it != lst.end(); ++it ) {
00339 QFileInfo info( *it );
00340 for (size_t i = 0; i < size_ispell_map; i++)
00341 {
00342 const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
00343 if (!strcmp (info.fileName().latin1(), mapping->dict))
00344 {
00345 ispell_dict_map.insert( mapping->lang, *it );
00346 }
00347 }
00348 }
00349 }
00350 }
00351
00352 QValueList<QString>
00353 ISpellChecker::allDics()
00354 {
00355 if ( ispell_dict_map.empty() )
00356 s_allDics();
00357
00358 return ispell_dict_map.keys();
00359 }
00360
00361 QString
00362 ISpellChecker::loadDictionary (const char * szdict)
00363 {
00364 std::vector<std::string> dict_names;
00365
00366 s_buildHashNames (dict_names, szdict);
00367
00368 for (size_t i = 0; i < dict_names.size(); i++)
00369 {
00370 if (linit(const_cast<char*>(dict_names[i].c_str())) >= 0)
00371 return dict_names[i].c_str();
00372 }
00373
00374 return QString::null;
00375 }
00376
00383 bool
00384 ISpellChecker::loadDictionaryForLanguage ( const char * szLang )
00385 {
00386 QString hashname;
00387
00388 const char * encoding = NULL;
00389 const char * szFile = NULL;
00390
00391 for (size_t i = 0; i < size_ispell_map; i++)
00392 {
00393 const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
00394 if (!strcmp (szLang, mapping->lang))
00395 {
00396 szFile = mapping->dict;
00397 encoding = mapping->enc;
00398 break;
00399 }
00400 }
00401
00402 if (!szFile || !strlen(szFile))
00403 return false;
00404
00405 alloc_ispell_struct();
00406
00407 hashname = loadDictionary(szFile);
00408 if (hashname.isEmpty())
00409 return false;
00410
00411
00412 setDictionaryEncoding (hashname, encoding);
00413
00414 return true;
00415 }
00416
00417 void
00418 ISpellChecker::setDictionaryEncoding( const QString& hashname, const char * encoding )
00419 {
00420
00421 try_autodetect_charset(encoding);
00422
00423 if (m_translate_in)
00424 {
00425
00426 prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag
00427 : static_cast<int *>(NULL));
00428
00429 if (prefstringchar < 0)
00430 {
00431 std::string teststring;
00432 for(int n1 = 1; n1 <= 15; n1++)
00433 {
00434 teststring = "latin" + n1;
00435 prefstringchar = findfiletype(teststring.c_str(), 1,
00436 deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
00437 if (prefstringchar >= 0)
00438 break;
00439 }
00440 }
00441
00442 return;
00443 }
00444
00445
00446 prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
00447 if (prefstringchar >= 0)
00448 {
00449 m_translate_in = QTextCodec::codecForName("utf8");
00450 }
00451
00452 if (m_translate_in)
00453 return;
00454
00455
00456 if (!m_translate_in)
00457 {
00458
00459 for(int n1 = 1; n1 <= 15; n1++)
00460 {
00461 QString teststring = QString("latin%1").arg(n1);
00462 prefstringchar = findfiletype(teststring.latin1(), 1,
00463 deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
00464 if (prefstringchar >= 0)
00465 {
00466
00467 m_translate_in = QTextCodec::codecForName( teststring.latin1() );
00468 break;
00469 }
00470 }
00471 }
00472
00473
00474 if (!m_translate_in)
00475 {
00476 m_translate_in = QTextCodec::codecForName("latin1");
00477 }
00478 }
00479
00480 bool
00481 ISpellChecker::requestDictionary(const char *szLang)
00482 {
00483 if (!loadDictionaryForLanguage (szLang))
00484 {
00485
00486 std::string shortened_dict (szLang);
00487 size_t uscore_pos;
00488
00489 if ((uscore_pos = shortened_dict.rfind ('_')) != ((size_t)-1)) {
00490 shortened_dict = shortened_dict.substr(0, uscore_pos);
00491 if (!loadDictionaryForLanguage (shortened_dict.c_str()))
00492 return false;
00493 } else
00494 return false;
00495 }
00496
00497 m_bSuccessfulInit = true;
00498
00499 if (prefstringchar < 0)
00500 m_defdupchar = 0;
00501 else
00502 m_defdupchar = prefstringchar;
00503
00504 return true;
00505 }