correct.cpp

00001 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
00002 /* enchant
00003  * Copyright (C) 2003 Dom Lachowicz
00004  *
00005  * This library is free software; you can redistribute it and/or
00006  * modify it under the terms of the GNU Lesser General Public
00007  * License as published by the Free Software Foundation; either
00008  * version 2.1 of the License, or (at your option) any later version.
00009  *
00010  * This library is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * Lesser General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU Lesser General Public
00016  * License along with this library; if not, write to the
00017  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018  * Boston, MA 02110-1301, USA.
00019  *
00020  * In addition, as a special exception, Dom Lachowicz
00021  * gives permission to link the code of this program with
00022  * non-LGPL Spelling Provider libraries (eg: a MSFT Office
00023  * spell checker backend) and distribute linked combinations including
00024  * the two.  You must obey the GNU Lesser General Public License in all
00025  * respects for all of the code used other than said providers.  If you modify
00026  * this file, you may extend this exception to your version of the
00027  * file, but you are not obligated to do so.  If you do not wish to
00028  * do so, delete this exception statement from your version.
00029  */
00030 
00031 /*
00032  * correct.c - Routines to manage the higher-level aspects of spell-checking
00033  *
00034  * This code originally resided in ispell.c, but was moved here to keep
00035  * file sizes smaller.
00036  *
00037  * Copyright (c), 1983, by Pace Willisson
00038  *
00039  * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
00040  * All rights reserved.
00041  *
00042  * Redistribution and use in source and binary forms, with or without
00043  * modification, are permitted provided that the following conditions
00044  * are met:
00045  *
00046  * 1. Redistributions of source code must retain the above copyright
00047  *    notice, this list of conditions and the following disclaimer.
00048  * 2. Redistributions in binary form must reproduce the above copyright
00049  *    notice, this list of conditions and the following disclaimer in the
00050  *    documentation and/or other materials provided with the distribution.
00051  * 3. All modifications to the source code must be clearly marked as
00052  *    such.  Binary redistributions based on modified source code
00053  *    must be clearly marked as modified versions in the documentation
00054  *    and/or other materials provided with the distribution.
00055  * 4. All advertising materials mentioning features or use of this software
00056  *    must display the following acknowledgment:
00057  *      This product includes software developed by Geoff Kuenning and
00058  *      other unpaid contributors.
00059  * 5. The name of Geoff Kuenning may not be used to endorse or promote
00060  *    products derived from this software without specific prior
00061  *    written permission.
00062  *
00063  * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
00064  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00065  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00066  * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
00067  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00068  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00069  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00070  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00071  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00072  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00073  * SUCH DAMAGE.
00074  */
00075 
00076 /*
00077  * $Log$
00078  * Revision 1.1  2004/01/31 16:44:12  zrusin
00079  * ISpell plugin.
00080  *
00081  * Revision 1.4  2003/08/14 17:51:26  dom
00082  * update license - exception clause should be Lesser GPL
00083  *
00084  * Revision 1.3  2003/07/28 20:40:25  dom
00085  * fix up the license clause, further win32-registry proof some directory getting functions
00086  *
00087  * Revision 1.2  2003/07/16 22:52:35  dom
00088  * LGPL + exception license
00089  *
00090  * Revision 1.1  2003/07/15 01:15:04  dom
00091  * ispell enchant backend
00092  *
00093  * Revision 1.2  2003/01/29 05:50:11  hippietrail
00094  *
00095  * Fixed my mess in EncodingManager.
00096  * Changed many C casts to C++ casts.
00097  *
00098  * Revision 1.1  2003/01/24 05:52:31  hippietrail
00099  *
00100  * Refactored ispell code. Old ispell global variables had been put into
00101  * an allocated structure, a pointer to which was passed to many functions.
00102  * I have now made all such functions and variables private members of the
00103  * ISpellChecker class. It was C OO, now it's C++ OO.
00104  *
00105  * I've fixed the makefiles and tested compilation but am unable to test
00106  * operation. Please back out my changes if they cause problems which
00107  * are not obvious or easy to fix.
00108  *
00109  * Revision 1.7  2002/09/19 05:31:15  hippietrail
00110  *
00111  * More Ispell cleanup.  Conditional globals and DEREF macros are removed.
00112  * K&R function declarations removed, converted to Doxygen style comments
00113  * where possible.  No code has been changed (I hope).  Compiles for me but
00114  * unable to test.
00115  *
00116  * Revision 1.6  2002/09/17 03:03:28  hippietrail
00117  *
00118  * After seeking permission on the developer list I've reformatted all the
00119  * spelling source which seemed to have parts which used 2, 3, 4, and 8
00120  * spaces for tabs.  It should all look good with our standard 4-space
00121  * tabs now.
00122  * I've concentrated just on indentation in the actual code.  More prettying
00123  * could be done.
00124  * * NO code changes were made *
00125  *
00126  * Revision 1.5  2002/09/13 17:20:12  mpritchett
00127  * Fix more warnings for Linux build
00128  *
00129  * Revision 1.4  2002/03/06 08:27:16  fjfranklin
00130  * o Only activate compound handling when the hash file says so (Per Larsson)
00131  *
00132  * Revision 1.3  2001/05/14 09:52:50  hub
00133  * Removed newMain.c from GNUmakefile.am
00134  *
00135  * C++ comments are not C comment. Changed to C comments
00136  *
00137  * Revision 1.2  2001/05/12 16:05:42  thomasf
00138  * Big pseudo changes to ispell to make it pass around a structure rather
00139  * than rely on all sorts of gloabals willy nilly here and there.  Also
00140  * fixed our spelling class to work with accepting suggestions once more.
00141  * This code is dirty, gross and ugly (not to mention still not supporting
00142  * multiple hash sized just yet) but it works on my machine and will no
00143  * doubt break other machines.
00144  *
00145  * Revision 1.1  2001/04/15 16:01:24  tomas_f
00146  * moving to spell/xp
00147  *
00148  * Revision 1.2  1999/10/05 16:17:28  paul
00149  * Fixed build, and other tidyness.
00150  * Spell dialog enabled by default, with keyboard binding of F7.
00151  *
00152  * Revision 1.1  1999/09/29 23:33:32  justin
00153  * Updates to the underlying ispell-based code to support suggested corrections.
00154  *
00155  * Revision 1.59  1995/08/05  23:19:43  geoff
00156  * Fix a bug that caused offsets for long lines to be confused if the
00157  * line started with a quoting uparrow.
00158  *
00159  * Revision 1.58  1994/11/02  06:56:00  geoff
00160  * Remove the anyword feature, which I've decided is a bad idea.
00161  *
00162  * Revision 1.57  1994/10/26  05:12:39  geoff
00163  * Try boundary characters when inserting or substituting letters, except
00164  * (naturally) at word boundaries.
00165  *
00166  * Revision 1.56  1994/10/25  05:46:30  geoff
00167  * Fix an assignment inside a conditional that could generate spurious
00168  * warnings (as well as being bad style).  Add support for the FF_ANYWORD
00169  * option.
00170  *
00171  * Revision 1.55  1994/09/16  04:48:24  geoff
00172  * Don't pass newlines from the input to various other routines, and
00173  * don't assume that those routines leave the input unchanged.
00174  *
00175  * Revision 1.54  1994/09/01  06:06:41  geoff
00176  * Change erasechar/killchar to uerasechar/ukillchar to avoid
00177  * shared-library problems on HP systems.
00178  *
00179  * Revision 1.53  1994/08/31  05:58:38  geoff
00180  * Add code to handle extremely long lines in -a mode without splitting
00181  * words or reporting incorrect offsets.
00182  *
00183  * Revision 1.52  1994/05/25  04:29:24  geoff
00184  * Fix a bug that caused line widths to be calculated incorrectly when
00185  * displaying lines containing tabs.  Fix a couple of places where
00186  * characters were sign-extended incorrectly, which could cause 8-bit
00187  * characters to be displayed wrong.
00188  *
00189  * Revision 1.51  1994/05/17  06:44:05  geoff
00190  * Add support for controlled compound formation and the COMPOUNDONLY
00191  * option to affix flags.
00192  *
00193  * Revision 1.50  1994/04/27  05:20:14  geoff
00194  * Allow compound words to be formed from more than two components
00195  *
00196  * Revision 1.49  1994/04/27  01:50:31  geoff
00197  * Add support to correctly capitalize words generated as a result of a
00198  * missing-space suggestion.
00199  *
00200  * Revision 1.48  1994/04/03  23:23:02  geoff
00201  * Clean up the code in missingspace() to be a bit simpler and more
00202  * efficient.
00203  *
00204  * Revision 1.47  1994/03/15  06:24:23  geoff
00205  * Fix the +/-/~ commands to be independent.  Allow the + command to
00206  * receive a suffix which is a deformatter type (currently hardwired to
00207  * be either tex or nroff/troff).
00208  *
00209  * Revision 1.46  1994/02/21  00:20:03  geoff
00210  * Fix some bugs that could cause bad displays in the interaction between
00211  * TeX parsing and string characters.  Show_char now will not overrun
00212  * the inverse-video display area by accident.
00213  *
00214  * Revision 1.45  1994/02/14  00:34:51  geoff
00215  * Fix correct to accept length parameters for ctok and itok, so that it
00216  * can pass them to the to/from ichar routines.
00217  *
00218  * Revision 1.44  1994/01/25  07:11:22  geoff
00219  * Get rid of all old RCS log lines in preparation for the 3.1 release.
00220  *
00221  */
00222 
00223 #include <stdlib.h>
00224 #include <string.h>
00225 #include <ctype.h>
00226 #include "ispell_checker.h"
00227 #include "msgs.h"
00228 
00229 /*
00230 extern void upcase P ((ichar_t * string));
00231 extern void lowcase P ((ichar_t * string));
00232 extern ichar_t * strtosichar P ((char * in, int canonical));
00233 
00234 int compoundflag = COMPOUND_CONTROLLED;
00235 */
00236 
00237 /*
00238  * \param a
00239  * \param b
00240  * \param canonical NZ for canonical string chars
00241  *
00242  * \return
00243  */
00244 int
00245 ISpellChecker::casecmp (char *a, char *b, int canonical)
00246 {
00247     register ichar_t *  ap;
00248     register ichar_t *  bp;
00249     ichar_t     inta[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
00250     ichar_t     intb[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
00251 
00252     strtoichar (inta, a, sizeof inta, canonical);
00253     strtoichar (intb, b, sizeof intb, canonical);
00254     for (ap = inta, bp = intb;  *ap != 0;  ap++, bp++)
00255     {
00256         if (*ap != *bp)
00257         {
00258             if (*bp == '\0')
00259                 return m_hashheader.sortorder[*ap];
00260             else if (mylower (*ap))
00261             {
00262                 if (mylower (*bp)  ||  mytoupper (*ap) != *bp)
00263                     return static_cast<int>(m_hashheader.sortorder[*ap])
00264                       - static_cast<int>(m_hashheader.sortorder[*bp]);
00265             }
00266             else
00267             {
00268                 if (myupper (*bp)  ||  mytolower (*ap) != *bp)
00269                     return static_cast<int>(m_hashheader.sortorder[*ap])
00270                       - static_cast<int>(m_hashheader.sortorder[*bp]);
00271             }
00272         }
00273     }
00274     if (*bp != '\0')
00275         return -static_cast<int>(m_hashheader.sortorder[*bp]);
00276     for (ap = inta, bp = intb;  *ap;  ap++, bp++)
00277     {
00278         if (*ap != *bp)
00279         {
00280             return static_cast<int>(m_hashheader.sortorder[*ap])
00281               - static_cast<int>(m_hashheader.sortorder[*bp]);
00282         }
00283     }
00284     return 0;
00285 }
00286 
00287 /*
00288  * \param word
00289  */
00290 void
00291 ISpellChecker::makepossibilities (ichar_t *word)
00292 {
00293     register int    i;
00294 
00295     for (i = 0; i < MAXPOSSIBLE; i++)
00296     m_possibilities[i][0] = 0;
00297     m_pcount = 0;
00298     m_maxposslen = 0;
00299     m_easypossibilities = 0;
00300 
00301 #ifndef NO_CAPITALIZATION_SUPPORT
00302     wrongcapital (word);
00303 #endif
00304 
00305 /* 
00306  * according to Pollock and Zamora, CACM April 1984 (V. 27, No. 4),
00307  * page 363, the correct order for this is:
00308  * OMISSION = TRANSPOSITION > INSERTION > SUBSTITUTION
00309  * thus, it was exactly backwards in the old version. -- PWP
00310  */
00311 
00312     if (m_pcount < MAXPOSSIBLE)
00313         missingletter (word);       /* omission */
00314     if (m_pcount < MAXPOSSIBLE)
00315         transposedletter (word);    /* transposition */
00316     if (m_pcount < MAXPOSSIBLE)
00317         extraletter (word);     /* insertion */
00318     if (m_pcount < MAXPOSSIBLE)
00319         wrongletter (word);     /* substitution */
00320 
00321     if ((m_hashheader.compoundflag != COMPOUND_ANYTIME)  &&
00322           m_pcount < MAXPOSSIBLE)
00323         missingspace (word);    /* two words */
00324 
00325 }
00326 
00327 /*
00328  * \param word
00329  *
00330  * \return
00331  */
00332 int
00333 ISpellChecker::insert (ichar_t *word)
00334 {
00335     register int    i;
00336     register char * realword;
00337 
00338     realword = ichartosstr (word, 0);
00339     for (i = 0; i < m_pcount; i++)
00340     {
00341         if (strcmp (m_possibilities[i], realword) == 0)
00342             return (0);
00343     }
00344 
00345     strcpy (m_possibilities[m_pcount++], realword);
00346     i = strlen (realword);
00347     if (i > m_maxposslen)
00348         m_maxposslen = i;
00349     if (m_pcount >= MAXPOSSIBLE)
00350         return (-1);
00351     else
00352         return (0);
00353 }
00354 
00355 #ifndef NO_CAPITALIZATION_SUPPORT
00356 /*
00357  * \param word
00358  */
00359 void
00360 ISpellChecker::wrongcapital (ichar_t *word)
00361 {
00362     ichar_t     newword[INPUTWORDLEN + MAXAFFIXLEN];
00363 
00364     /*
00365     ** When the third parameter to "good" is nonzero, it ignores
00366     ** case.  If the word matches this way, "ins_cap" will recapitalize
00367     ** it correctly.
00368     */
00369     if (good (word, 0, 1, 0, 0))
00370     {
00371         icharcpy (newword, word);
00372         upcase (newword);
00373         ins_cap (newword, word);
00374     }
00375 }
00376 #endif
00377 
00378 /*
00379  * \param word
00380  */
00381 void
00382 ISpellChecker::wrongletter (ichar_t *word)
00383 {
00384     register int    i;
00385     register int    j;
00386     register int    n;
00387     ichar_t     savechar;
00388     ichar_t     newword[INPUTWORDLEN + MAXAFFIXLEN];
00389 
00390     n = icharlen (word);
00391     icharcpy (newword, word);
00392 #ifndef NO_CAPITALIZATION_SUPPORT
00393     upcase (newword);
00394 #endif
00395 
00396     for (i = 0; i < n; i++)
00397     {
00398         savechar = newword[i];
00399         for (j=0; j < m_Trynum; ++j)
00400         {
00401             if (m_Try[j] == savechar)
00402                 continue;
00403             else if (isboundarych (m_Try[j])  &&  (i == 0  ||  i == n - 1))
00404                 continue;
00405             newword[i] = m_Try[j];
00406             if (good (newword, 0, 1, 0, 0))
00407             {
00408                 if (ins_cap (newword, word) < 0)
00409                     return;
00410             }
00411         }
00412         newword[i] = savechar;
00413     }
00414 }
00415 
00416 /*
00417  * \param word
00418  */
00419 void
00420 ISpellChecker::extraletter (ichar_t *word)
00421 {
00422     ichar_t     newword[INPUTWORDLEN + MAXAFFIXLEN];
00423     register ichar_t *  p;
00424     register ichar_t *  r;
00425 
00426     if (icharlen (word) < 2)
00427         return;
00428 
00429     icharcpy (newword, word + 1);
00430     for (p = word, r = newword;  *p != 0;  )
00431     {
00432         if (good (newword, 0, 1, 0, 0))
00433         {
00434             if (ins_cap (newword, word) < 0)
00435                 return;
00436         }
00437         *r++ = *p++;
00438     }
00439 }
00440 
00441 /*
00442  * \param word
00443  */
00444 void
00445 ISpellChecker::missingletter (ichar_t *word)
00446 {
00447     ichar_t     newword[INPUTWORDLEN + MAXAFFIXLEN + 1];
00448     register ichar_t *  p;
00449     register ichar_t *  r;
00450     register int    i;
00451 
00452     icharcpy (newword + 1, word);
00453     for (p = word, r = newword;  *p != 0;  )
00454     {
00455         for (i = 0;  i < m_Trynum;  i++)
00456         {
00457             if (isboundarych (m_Try[i])  &&  r == newword)
00458                 continue;
00459             *r = m_Try[i];
00460             if (good (newword, 0, 1, 0, 0))
00461             {
00462                 if (ins_cap (newword, word) < 0)
00463                     return;
00464             }
00465         }
00466         *r++ = *p++;
00467     }
00468     for (i = 0;  i < m_Trynum;  i++)
00469     {
00470         if (isboundarych (m_Try[i]))
00471             continue;
00472         *r = m_Try[i];
00473         if (good (newword, 0, 1, 0, 0))
00474         {
00475             if (ins_cap (newword, word) < 0)
00476                 return;
00477         }
00478     }
00479 }
00480 
00481 /*
00482  * \param word
00483  */
00484 void ISpellChecker::missingspace (ichar_t *word)
00485 {
00486     ichar_t     firsthalf[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
00487     int         firstno;    /* Index into first */
00488     ichar_t *       firstp;     /* Ptr into current firsthalf word */
00489     ichar_t     newword[INPUTWORDLEN + MAXAFFIXLEN + 1];
00490     int         nfirsthalf; /* No. words saved in 1st half */
00491     int         nsecondhalf;    /* No. words saved in 2nd half */
00492     register ichar_t *  p;
00493     ichar_t     secondhalf[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
00494     int         secondno;   /* Index into second */
00495 
00496     /*
00497     ** We don't do words of length less than 3;  this keeps us from
00498     ** splitting all two-letter words into two single letters.  We
00499     ** also don't do maximum-length words, since adding the space
00500     ** would exceed the size of the "possibilities" array.
00501     */
00502     nfirsthalf = icharlen (word);
00503     if (nfirsthalf < 3  ||  nfirsthalf >= INPUTWORDLEN + MAXAFFIXLEN - 1)
00504         return;
00505     icharcpy (newword + 1, word);
00506     for (p = newword + 1;  p[1] != '\0';  p++)
00507     {
00508         p[-1] = *p;
00509         *p = '\0';
00510         if (good (newword, 0, 1, 0, 0))
00511         {
00512             /*
00513              * Save_cap must be called before good() is called on the
00514              * second half, because it uses state left around by
00515              * good().  This is unfortunate because it wastes a bit of
00516              * time, but I don't think it's a significant performance
00517              * problem.
00518              */
00519             nfirsthalf = save_cap (newword, word, firsthalf);
00520             if (good (p + 1, 0, 1, 0, 0))
00521             {
00522                 nsecondhalf = save_cap (p + 1, p + 1, secondhalf);
00523                 for (firstno = 0;  firstno < nfirsthalf;  firstno++)
00524                 {
00525                     firstp = &firsthalf[firstno][p - newword];
00526                     for (secondno = 0;  secondno < nsecondhalf;  secondno++)
00527                     {
00528                         *firstp = ' ';
00529                         icharcpy (firstp + 1, secondhalf[secondno]);
00530                         if (insert (firsthalf[firstno]) < 0)
00531                             return;
00532                         *firstp = '-';
00533                         if (insert (firsthalf[firstno]) < 0)
00534                             return;
00535                     }
00536                 }
00537             }
00538         }
00539     }
00540 }
00541 
00542 /*
00543  * \param word
00544  * \param pfxopts Options to apply to prefixes
00545  */
00546 int
00547 ISpellChecker::compoundgood (ichar_t *word, int pfxopts)
00548 {
00549     ichar_t     newword[INPUTWORDLEN + MAXAFFIXLEN];
00550     register ichar_t *  p;
00551     register ichar_t    savech;
00552     long        secondcap;  /* Capitalization of 2nd half */
00553 
00554     /*
00555     ** If compoundflag is COMPOUND_NEVER, compound words are never ok.
00556     */
00557     if (m_hashheader.compoundflag == COMPOUND_NEVER)
00558         return 0;
00559     /*
00560     ** Test for a possible compound word (for languages like German that
00561     ** form lots of compounds).
00562     **
00563     ** This is similar to missingspace, except we quit on the first hit,
00564     ** and we won't allow either member of the compound to be a single
00565     ** letter.
00566     **
00567     ** We don't do words of length less than 2 * compoundmin, since
00568     ** both halves must at least compoundmin letters.
00569     */
00570     if (icharlen (word) < 2 * m_hashheader.compoundmin)
00571         return 0;
00572     icharcpy (newword, word);
00573     p = newword + m_hashheader.compoundmin;
00574     for (  ;  p[m_hashheader.compoundmin - 1] != 0;  p++)
00575     {
00576         savech = *p;
00577         *p = 0;
00578         if (good (newword, 0, 0, pfxopts, FF_COMPOUNDONLY))
00579         {
00580             *p = savech;
00581             if (good (p, 0, 1, FF_COMPOUNDONLY, 0)
00582               ||  compoundgood (p, FF_COMPOUNDONLY))
00583             {
00584                 secondcap = whatcap (p);
00585                 switch (whatcap (newword))
00586                 {
00587                 case ANYCASE:
00588                 case CAPITALIZED:
00589                 case FOLLOWCASE:    /* Followcase can have l.c. suffix */
00590                     return secondcap == ANYCASE;
00591                 case ALLCAPS:
00592                     return secondcap == ALLCAPS;
00593                 }
00594             }
00595         }
00596         else
00597             *p = savech;
00598     }
00599     return 0;
00600 }
00601 
00602 /*
00603  * \param word
00604  */
00605 void
00606 ISpellChecker::transposedletter (ichar_t *word)
00607 {
00608     ichar_t     newword[INPUTWORDLEN + MAXAFFIXLEN];
00609     register ichar_t *  p;
00610     register ichar_t    temp;
00611 
00612     icharcpy (newword, word);
00613     for (p = newword;  p[1] != 0;  p++)
00614     {
00615         temp = *p;
00616         *p = p[1];
00617         p[1] = temp;
00618         if (good (newword, 0, 1, 0, 0))
00619         {
00620             if (ins_cap (newword, word) < 0)
00621                 return;
00622         }
00623         temp = *p;
00624         *p = p[1];
00625         p[1] = temp;
00626     }
00627 }
00628 
00637 int
00638 ISpellChecker::ins_cap (ichar_t *word, ichar_t *pattern)
00639 {
00640     int         i;      /* Index into savearea */
00641     int         nsaved;     /* No. of words saved */
00642     ichar_t     savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
00643 
00644     nsaved = save_cap (word, pattern, savearea);
00645     for (i = 0;  i < nsaved;  i++)
00646     {
00647         if (insert (savearea[i]) < 0)
00648             return -1;
00649     }
00650     return 0;
00651 }
00652 
00662 int
00663 ISpellChecker::save_cap (ichar_t *word, ichar_t *pattern, 
00664                     ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN])
00665 {
00666     int         hitno;      /* Index into hits array */
00667     int         nsaved;     /* Number of words saved */
00668     int         preadd;     /* No. chars added to front of root */
00669     int         prestrip;   /* No. chars stripped from front */
00670     int         sufadd;     /* No. chars added to back of root */
00671     int         sufstrip;   /* No. chars stripped from back */
00672 
00673     if (*word == 0)
00674         return 0;
00675 
00676     for (hitno = m_numhits, nsaved = 0;  --hitno >= 0  &&  nsaved < MAX_CAPS;  )
00677     {
00678         if (m_hits[hitno].prefix)
00679         {
00680             prestrip = m_hits[hitno].prefix->stripl;
00681             preadd = m_hits[hitno].prefix->affl;
00682         }
00683         else
00684             prestrip = preadd = 0;
00685         if (m_hits[hitno].suffix)
00686         {
00687             sufstrip = m_hits[hitno].suffix->stripl;
00688             sufadd = m_hits[hitno].suffix->affl;
00689         }
00690         else
00691             sufadd = sufstrip = 0;
00692         save_root_cap (word, pattern, prestrip, preadd,
00693             sufstrip, sufadd,
00694             m_hits[hitno].dictent, m_hits[hitno].prefix, m_hits[hitno].suffix,
00695             savearea, &nsaved);
00696     }
00697     return nsaved;
00698 }
00699 
00700 /*
00701  * \param word
00702  * \param pattern
00703  * \param prestrip
00704  * \param preadd
00705  * \param sufstrip
00706  * \param sufadd
00707  * \param firstdent
00708  * \param pfxent
00709  * \param sufent
00710  *
00711  * \return
00712  */
00713 int
00714 ISpellChecker::ins_root_cap (ichar_t *word, ichar_t *pattern, 
00715                  int prestrip, int preadd, int sufstrip, int sufadd,
00716                  struct dent *firstdent, struct flagent *pfxent, struct flagent *sufent)
00717 {
00718     int         i;      /* Index into savearea */
00719     ichar_t     savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN];
00720     int         nsaved;     /* Number of words saved */
00721 
00722     nsaved = 0;
00723     save_root_cap (word, pattern, prestrip, preadd, sufstrip, sufadd,
00724       firstdent, pfxent, sufent, savearea, &nsaved);
00725     for (i = 0;  i < nsaved;  i++)
00726     {
00727         if (insert (savearea[i]) < 0)
00728             return -1;
00729     }
00730     return 0;
00731 }
00732 
00733 /* ARGSUSED */
00747 void
00748 ISpellChecker::save_root_cap (ichar_t *word, ichar_t *pattern, 
00749                           int prestrip, int preadd, int sufstrip, int sufadd,
00750                           struct dent *firstdent, struct flagent *pfxent, struct flagent *sufent, 
00751                           ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN], 
00752                           int * nsaved)
00753 {
00754 #ifndef NO_CAPITALIZATION_SUPPORT
00755     register struct dent * dent;
00756 #endif /* NO_CAPITALIZATION_SUPPORT */
00757     int         firstisupper;
00758     ichar_t     newword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
00759 #ifndef NO_CAPITALIZATION_SUPPORT
00760     register ichar_t *  p;
00761     int         len;
00762     int         i;
00763     int         limit;
00764 #endif /* NO_CAPITALIZATION_SUPPORT */
00765 
00766     if (*nsaved >= MAX_CAPS)
00767         return;
00768     icharcpy (newword, word);
00769     firstisupper = myupper (pattern[0]);
00770 #ifdef NO_CAPITALIZATION_SUPPORT
00771     /*
00772     ** Apply the old, simple-minded capitalization rules.
00773     */
00774     if (firstisupper)
00775     {
00776         if (myupper (pattern[1]))
00777             upcase (newword);
00778         else
00779         {
00780             lowcase (newword);
00781             newword[0] = mytoupper (newword[0]);
00782         }
00783     }
00784     else
00785         lowcase (newword);
00786     icharcpy (savearea[*nsaved], newword);
00787     (*nsaved)++;
00788     return;
00789 #else /* NO_CAPITALIZATION_SUPPORT */
00790 #define flagsareok(dent)    \
00791     ((pfxent == NULL \
00792     ||  TSTMASKBIT (dent->mask, pfxent->flagbit)) \
00793       &&  (sufent == NULL \
00794     ||  TSTMASKBIT (dent->mask, sufent->flagbit)))
00795 
00796     dent = firstdent;
00797     if ((dent->flagfield & (CAPTYPEMASK | MOREVARIANTS)) == ALLCAPS)
00798     {
00799         upcase (newword);   /* Uppercase required */
00800         icharcpy (savearea[*nsaved], newword);
00801         (*nsaved)++;
00802         return;
00803     }
00804     for (p = pattern;  *p;  p++)
00805     {
00806         if (mylower (*p))
00807             break;
00808     }
00809     if (*p == 0)
00810     {
00811         upcase (newword);   /* Pattern was all caps */
00812         icharcpy (savearea[*nsaved], newword);
00813         (*nsaved)++;
00814         return;
00815     }
00816     for (p = pattern + 1;  *p;  p++)
00817     {
00818         if (myupper (*p))
00819             break;
00820     }
00821     if (*p == 0)
00822     {
00823         /*
00824         ** The pattern was all-lower or capitalized.  If that's
00825         ** legal, insert only that version.
00826         */
00827         if (firstisupper)
00828         {
00829             if (captype (dent->flagfield) == CAPITALIZED
00830               ||  captype (dent->flagfield) == ANYCASE)
00831             {
00832                 lowcase (newword);
00833                 newword[0] = mytoupper (newword[0]);
00834                 icharcpy (savearea[*nsaved], newword);
00835                 (*nsaved)++;
00836                 return;
00837             }
00838         }
00839         else
00840         {
00841             if (captype (dent->flagfield) == ANYCASE)
00842             {
00843                 lowcase (newword);
00844                 icharcpy (savearea[*nsaved], newword);
00845                 (*nsaved)++;
00846                 return;
00847             }
00848         }
00849         while (dent->flagfield & MOREVARIANTS)
00850         {
00851             dent = dent->next;
00852             if (captype (dent->flagfield) == FOLLOWCASE
00853               ||  !flagsareok (dent))
00854                 continue;
00855             if (firstisupper)
00856             {
00857                 if (captype (dent->flagfield) == CAPITALIZED)
00858                 {
00859                     lowcase (newword);
00860                     newword[0] = mytoupper (newword[0]);
00861                     icharcpy (savearea[*nsaved], newword);
00862                     (*nsaved)++;
00863                     return;
00864                 }
00865             }
00866             else
00867             {
00868                 if (captype (dent->flagfield) == ANYCASE)
00869                 {
00870                     lowcase (newword);
00871                     icharcpy (savearea[*nsaved], newword);
00872                     (*nsaved)++;
00873                     return;
00874                 }
00875             }
00876         }
00877     }
00878     /*
00879     ** Either the sample had complex capitalization, or the simple
00880     ** capitalizations (all-lower or capitalized) are illegal.
00881     ** Insert all legal capitalizations, including those that are
00882     ** all-lower or capitalized.  If the prototype is capitalized,
00883     ** capitalized all-lower samples.  Watch out for affixes.
00884     */
00885     dent = firstdent;
00886     p = strtosichar (dent->word, 1);
00887     len = icharlen (p);
00888     if (dent->flagfield & MOREVARIANTS)
00889         dent = dent->next;  /* Skip place-holder entry */
00890     for (  ;  ;  )
00891     {
00892         if (flagsareok (dent))
00893         {
00894             if (captype (dent->flagfield) != FOLLOWCASE)
00895             {
00896                 lowcase (newword);
00897                 if (firstisupper  ||  captype (dent->flagfield) == CAPITALIZED)
00898                     newword[0] = mytoupper (newword[0]);
00899                 icharcpy (savearea[*nsaved], newword);
00900                 (*nsaved)++;
00901                 if (*nsaved >= MAX_CAPS)
00902                     return;
00903             }
00904             else
00905             {
00906                 /* Followcase is the tough one. */
00907                 p = strtosichar (dent->word, 1);
00908                 memmove (
00909                   reinterpret_cast<char *>(newword + preadd),
00910                   reinterpret_cast<char *>(p + prestrip),
00911                   (len - prestrip - sufstrip) * sizeof (ichar_t));
00912                 if (myupper (p[prestrip]))
00913                 {
00914                     for (i = 0;  i < preadd;  i++)
00915                         newword[i] = mytoupper (newword[i]);
00916                 }
00917                 else
00918                 {
00919                     for (i = 0;  i < preadd;  i++)
00920                         newword[i] = mytolower (newword[i]);
00921                 }
00922                 limit = len + preadd + sufadd - prestrip - sufstrip;
00923                 i = len + preadd - prestrip - sufstrip;
00924                 p += len - sufstrip - 1;
00925                 if (myupper (*p))
00926                 {
00927                     for (p = newword + i;  i < limit;  i++, p++)
00928                         *p = mytoupper (*p);
00929                 }
00930                 else
00931                 {
00932                     for (p = newword + i;  i < limit;  i++, p++)
00933                         *p = mytolower (*p);
00934                 }
00935                 icharcpy (savearea[*nsaved], newword);
00936                 (*nsaved)++;
00937                 if (*nsaved >= MAX_CAPS)
00938                     return;
00939             }
00940         }
00941         if ((dent->flagfield & MOREVARIANTS) == 0)
00942             break;      /* End of the line */
00943         dent = dent->next;
00944     }
00945     return;
00946 #endif /* NO_CAPITALIZATION_SUPPORT */
00947 }
00948 
00949 
KDE Home | KDE Accessibility Home | Description of Access Keys