ispell.h

00001 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
00002 /* enchant
00003  * Copyright (C) 2003 Dom Lachowicz
00004  *
00005  * This library is free software; you can redistribute it and/or
00006  * modify it under the terms of the GNU Lesser General Public
00007  * License as published by the Free Software Foundation; either
00008  * version 2.1 of the License, or (at your option) any later version.
00009  *
00010  * This library is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * Lesser General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU Lesser General Public
00016  * License along with this library; if not, write to the
00017  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018  * Boston, MA 02110-1301, USA.
00019  *
00020  * In addition, as a special exception, Dom Lachowicz
00021  * gives permission to link the code of this program with
00022  * non-LGPL Spelling Provider libraries (eg: a MSFT Office
00023  * spell checker backend) and distribute linked combinations including
00024  * the two.  You must obey the GNU Lesser General Public License in all
00025  * respects for all of the code used other than said providers.  If you modify
00026  * this file, you may extend this exception to your version of the
00027  * file, but you are not obligated to do so.  If you do not wish to
00028  * do so, delete this exception statement from your version.
00029  */
00030 
00031 #ifndef ISPELL_H
00032 #define ISPELL_H
00033 
00034 #include <sys/types.h>
00035 
00036 /*
00037  * $Id: ispell.h 465272 2005-09-29 09:47:40Z mueller $
00038  */
00039 
00040 /*
00041  * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
00042  * All rights reserved.
00043  *
00044  * Redistribution and use in source and binary forms, with or without
00045  * modification, are permitted provided that the following conditions
00046  * are met:
00047  *
00048  * 1. Redistributions of source code must retain the above copyright
00049  *    notice, this list of conditions and the following disclaimer.
00050  * 2. Redistributions in binary form must reproduce the above copyright
00051  *    notice, this list of conditions and the following disclaimer in the
00052  *    documentation and/or other materials provided with the distribution.
00053  * 3. All modifications to the source code must be clearly marked as
00054  *    such.  Binary redistributions based on modified source code
00055  *    must be clearly marked as modified versions in the documentation
00056  *    and/or other materials provided with the distribution.
00057  * 4. All advertising materials mentioning features or use of this software
00058  *    must display the following acknowledgment:
00059  *      This product includes software developed by Geoff Kuenning and
00060  *      other unpaid contributors.
00061  * 5. The name of Geoff Kuenning may not be used to endorse or promote
00062  *    products derived from this software without specific prior
00063  *    written permission.
00064  *
00065  * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
00066  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00067  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00068  * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
00069  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00070  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00071  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00072  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00073  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00074  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00075  * SUCH DAMAGE.
00076  */
00077 
00078 /*
00079  * $Log$
00080  * Revision 1.1  2004/01/31 16:44:12  zrusin
00081  * ISpell plugin.
00082  *
00083  * Revision 1.4  2003/08/14 17:51:27  dom
00084  * update license - exception clause should be Lesser GPL
00085  *
00086  * Revision 1.3  2003/07/28 20:40:26  dom
00087  * fix up the license clause, further win32-registry proof some directory getting functions
00088  *
00089  * Revision 1.2  2003/07/16 22:52:40  dom
00090  * LGPL + exception license
00091  *
00092  * Revision 1.1  2003/07/15 01:15:06  dom
00093  * ispell enchant backend
00094  *
00095  * Revision 1.10  2003/01/24 05:52:33  hippietrail
00096  *
00097  * Refactored ispell code. Old ispell global variables had been put into
00098  * an allocated structure, a pointer to which was passed to many functions.
00099  * I have now made all such functions and variables private members of the
00100  * ISpellChecker class. It was C OO, now it's C++ OO.
00101  *
00102  * I've fixed the makefiles and tested compilation but am unable to test
00103  * operation. Please back out my changes if they cause problems which
00104  * are not obvious or easy to fix.
00105  *
00106  * Revision 1.9  2002/09/19 05:31:15  hippietrail
00107  *
00108  * More Ispell cleanup.  Conditional globals and DEREF macros are removed.
00109  * K&R function declarations removed, converted to Doxygen style comments
00110  * where possible.  No code has been changed (I hope).  Compiles for me but
00111  * unable to test.
00112  *
00113  * Revision 1.8  2002/09/17 03:03:29  hippietrail
00114  *
00115  * After seeking permission on the developer list I've reformatted all the
00116  * spelling source which seemed to have parts which used 2, 3, 4, and 8
00117  * spaces for tabs.  It should all look good with our standard 4-space
00118  * tabs now.
00119  * I've concentrated just on indentation in the actual code.  More prettying
00120  * could be done.
00121  * * NO code changes were made *
00122  *
00123  * Revision 1.7  2002/03/22 14:31:57  dom
00124  * fix mg's compile problem
00125  *
00126  * Revision 1.6  2002/03/05 16:55:52  dom
00127  * compound word support, tested against swedish
00128  *
00129  * Revision 1.5  2001/08/10 18:32:40  dom
00130  * Spelling and iconv updates. god, i hate iconv
00131  *
00132  * Revision 1.4  2001/06/26 16:33:27  dom
00133  * 128 StringChars and some other stuff
00134  *
00135  * Revision 1.3  2001/05/12 16:05:42  thomasf
00136  * Big pseudo changes to ispell to make it pass around a structure rather
00137  * than rely on all sorts of gloabals willy nilly here and there.  Also
00138  * fixed our spelling class to work with accepting suggestions once more.
00139  * This code is dirty, gross and ugly (not to mention still not supporting
00140  * multiple hash sized just yet) but it works on my machine and will no
00141  * doubt break other machines.
00142  *
00143  * Revision 1.2  2001/04/18 00:59:36  thomasf
00144  * Removed the duplicate declarations of variables that was causing build
00145  * to bail.  This new ispell stuff is a total mess.
00146  *
00147  * Revision 1.1  2001/04/15 16:01:24  tomas_f
00148  * moving to spell/xp
00149  *
00150  * Revision 1.13  2001/04/13 12:33:12  tamlin
00151  * ispell can now be used from C++
00152  *
00153  * Revision 1.12  2001/03/25 01:30:02  tomb
00154  * 1. Fixed ispell #define problems on Win32
00155  * 2. Changed the way that togglable toolbars are tracked so that Full
00156  * Screen mode works right on Windows
00157  * 3. Fixed SET_GATHER macro in ap_Win32Dialog_Options.h
00158  * 4. Fixed Toggle Case dialog to default to Sentence Case when loaded
00159  * 5. Added #define for Auto Save checkbox (though I haven't updated the
00160  * Prefs dialog yet)
00161  *
00162  * Revision 1.11  2001/03/24 23:28:41  dom
00163  * Make C++ aware and watch out for VOID on Win32
00164  *
00165  * Revision 1.10  1999/12/21 18:46:29  sterwill
00166  * ispell patch for non-English dictionaries by Henrik Berg <henrik@lansen.se>
00167  *
00168  * Revision 1.9  1999/10/20 03:19:35  paul
00169  * Hacked ispell code to ignore any characters that don't fit in the lookup tables loaded from the dictionary.  It ain't pretty, but at least we don't crash there any more.
00170  *
00171  * Revision 1.8  1999/09/29 23:33:32  justin
00172  * Updates to the underlying ispell-based code to support suggested corrections.
00173  *
00174  * Revision 1.7  1999/04/13 17:12:51  jeff
00175  * Applied "Darren O. Benham" <gecko@benham.net> spell check changes.
00176  * Fixed crash on Win32 with the new code.
00177  *
00178  * Revision 1.6  1999/01/07 05:14:22  sterwill
00179  * So it builds on Unix... it might break win32 in ispell, since ut_types
00180  * is no longer included.  This is a temporary solution to a larger problem
00181  * of including C++ headers in C source files.
00182  *
00183  * Revision 1.6  1999/01/07 05:14:22  sterwill
00184  * So it builds on Unix... it might break win32 in ispell, since ut_types
00185  * is no longer included.  This is a temporary solution to a larger problem
00186  * of including C++ headers in C source files.
00187  *
00188  * Revision 1.5  1999/01/07 05:02:25  sterwill
00189  * Checking in half-broken to avoid tree lossage
00190  *
00191  * Revision 1.4  1999/01/07 01:07:48  paul
00192  * Fixed spell leaks.
00193  *
00194  * Revision 1.3  1998/12/29 15:03:54  eric
00195  *
00196  * minor fix to ispell.h to get things to compile on Linux again.
00197  *
00198  * Revision 1.2  1998/12/29 14:55:33  eric
00199  *
00200  * I've doctored the ispell code pretty extensively here.  It is now
00201  * warning-free on Win32.  It also *works* on Win32 now, since I
00202  * replaced all the I/O calls with ANSI standard ones.
00203  *
00204  * Revision 1.1  1998/12/28 18:04:43  davet
00205  * Spell checker code stripped from ispell.  At this point, there are
00206  * two external routines...  the Init routine, and a check-a-word routine
00207  * which returns a boolean value, and takes a 16 bit char string.
00208  * The code resembles the ispell code as much as possible still.
00209  *
00210  * Revision 1.68  1995/03/06  02:42:41  geoff
00211  * Be vastly more paranoid about parenthesizing macro arguments.  This
00212  * fixes a bug in defmt.c where a complex argument was passed to
00213  * isstringch.
00214  *
00215  * Revision 1.67  1995/01/03  19:24:12  geoff
00216  * Get rid of a non-global declaration.
00217  *
00218  * Revision 1.66  1994/12/27  23:08:49  geoff
00219  * Fix a lot of subtly bad assumptions about the widths of ints and longs
00220  * which only show up on 64-bit machines like the Cray and the DEC Alpha.
00221  *
00222  * Revision 1.65  1994/11/02  06:56:10  geoff
00223  * Remove the anyword feature, which I've decided is a bad idea.
00224  *
00225  * Revision 1.64  1994/10/25  05:46:18  geoff
00226  * Add the FF_ANYWORD flag for defining an affix that will apply to any
00227  * word, even if not explicitly specified.  (Good for French.)
00228  *
00229  * Revision 1.63  1994/09/16  04:48:28  geoff
00230  * Make stringdups and laststringch unsigned ints, and dupnos a plain
00231  * int, so that we can handle more than 128 stringchars and stringchar
00232  * types.
00233  *
00234  * Revision 1.62  1994/09/01  06:06:39  geoff
00235  * Change erasechar/killchar to uerasechar/ukillchar to avoid
00236  * shared-library problems on HP systems.
00237  *
00238  * Revision 1.61  1994/08/31  05:58:35  geoff
00239  * Add contextoffset, used in -a mode to handle extremely long lines.
00240  *
00241  * Revision 1.60  1994/05/17  06:44:15  geoff
00242  * Add support for controlled compound formation and the COMPOUNDONLY
00243  * option to affix flags.
00244  *
00245  * Revision 1.59  1994/03/15  06:25:16  geoff
00246  * Change deftflag's initialization so we can tell if -t/-n appeared.
00247  *
00248  * Revision 1.58  1994/02/07  05:53:28  geoff
00249  * Add typecasts to the the 7-bit versions of ichar* routines
00250  *
00251  * Revision 1.57  1994/01/25  07:11:48  geoff
00252  * Get rid of all old RCS log lines in preparation for the 3.1 release.
00253  *
00254  */
00255 
00256 #include <stdio.h>
00257 /*  #include "ut_types.h" */
00258 
00259 #include "ispell_def.h"
00260 
00261 #ifdef __cplusplus
00262 extern "C" {
00263 #endif /* c++ */
00264 
00265 /* largest amount that a word might be extended by adding affixes */
00266 #ifndef MAXAFFIXLEN
00267 #define MAXAFFIXLEN 20
00268 #endif
00269 
00270 /*
00271 ** Number of mask bits (affix flags) supported.  Must be 32, 64, 128, or
00272 ** 256.  If MASKBITS is 32 or 64, there are really only 26 or 58 flags
00273 ** available, respectively.  If it is 32, the flags are named with the
00274 ** 26 English uppercase letters;  lowercase will be converted to uppercase.
00275 ** If MASKBITS is 64, the 58 flags are named 'A' through 'z' in ASCII
00276 ** order, including the 6 special characters from 'Z' to 'a': "[\]^_`".
00277 ** If MASKBITS is 128 or 256, all the 7-bit or 8-bit characters,
00278 ** respectively, are theoretically available, though a few (newline, slash,
00279 ** null byte) are pretty hard to actually use successfully.
00280 **
00281 ** Note that a number of non-English affix files depend on having a
00282 ** larger value for MASKBITS.  See the affix files for more
00283 ** information.
00284 */
00285 
00286 #ifndef MASKBITS
00287 #define MASKBITS    64
00288 #endif
00289 
00290 extern int      gnMaskBits;
00291 
00292 /*
00293 ** C type to use for masks.  This should be a type that the processor
00294 ** accesses efficiently.
00295 **
00296 ** MASKTYPE_WIDTH must correctly reflect the number of bits in a
00297 ** MASKTYPE.  Unfortunately, it is also required to be a constant at
00298 ** preprocessor time, which means you can't use the sizeof operator to
00299 ** define it.
00300 **
00301 ** Note that MASKTYPE *must* match MASKTYPE_WIDTH or you may get
00302 ** division-by-zero errors! 
00303 */
00304 #ifndef MASKTYPE
00305 #define MASKTYPE    long
00306 #endif
00307 #ifndef MASKTYPE_WIDTH
00308 #define MASKTYPE_WIDTH  32
00309 #endif
00310 
00311   /* program: this should be coded now in init */
00312 
00313 #if MASKBITS < MASKTYPE_WIDTH
00314 #undef MASKBITS
00315 #define MASKBITS    MASKTYPE_WIDTH
00316 #endif /* MASKBITS < MASKTYPE_WIDTH */
00317 
00318 /*
00319 ** Maximum hash table fullness percentage.  Larger numbers trade space
00320 ** for time.
00321 **/
00322 #ifndef MAXPCT
00323 #define MAXPCT  70      /* Expand table when 70% full */
00324 #endif
00325 
00326 /*
00327 ** Maximum number of "string" characters that can be defined in a
00328 ** language (affix) file.  Don't forget that an upper/lower string
00329 ** character counts as two!
00330 */
00331 #ifndef MAXSTRINGCHARS
00332 #define MAXSTRINGCHARS 128
00333 #endif /* MAXSTRINGCHARS */
00334 
00335 /*
00336 ** Maximum length of a "string" character.  The default is appropriate for
00337 ** nroff-style characters starting with a backslash.
00338 */
00339 #ifndef MAXSTRINGCHARLEN
00340 #define MAXSTRINGCHARLEN 10
00341 #endif /* MAXSTRINGCHARLEN */
00342 
00343 /*
00344 ** Maximum number of "hits" expected on a word.  This is basically the
00345 ** number of different ways different affixes can produce the same word.
00346 ** For example, with "english.aff", "brothers" can be produced 3 ways:
00347 ** "brothers," "brother+s", or "broth+ers".  If this is too low, no major
00348 ** harm will be done, but ispell may occasionally forget a capitalization.
00349 */
00350 #ifndef MAX_HITS
00351 #define MAX_HITS    10
00352 #endif
00353 
00354 /*
00355 ** Maximum number of capitalization variations expected in any word.
00356 ** Besides the obvious all-lower, all-upper, and capitalized versions,
00357 ** this includes followcase variants.  If this is too low, no real
00358 ** harm will be done, but ispell may occasionally fail to suggest a
00359 ** correct capitalization.
00360 */
00361 #ifndef MAX_CAPS
00362 #define MAX_CAPS    10
00363 #endif /* MAX_CAPS */
00364 
00365 /* buffer size to use for file names if not in sys/param.h */
00366 #ifndef MAXPATHLEN
00367 #define MAXPATHLEN 512
00368 #endif
00369 
00370 /*
00371 ** Maximum language-table search size.  Smaller numbers make ispell
00372 ** run faster, at the expense of more memory (the lowest reasonable value
00373 ** is 2).  If a given character appears in a significant position in
00374 ** more than MAXSEARCH suffixes, it will be given its own index table.
00375 ** If you change this, define INDEXDUMP in lookup.c to be sure your
00376 ** index table looks reasonable.
00377 */
00378 #ifndef MAXSEARCH
00379 #define MAXSEARCH 4
00380 #endif
00381 
00382 #if defined(__STDC__) || defined(__cplusplus)
00383 #define P(x)    x
00384  #ifndef VOID
00385    #define VOID void
00386  #endif
00387 #else /* __STDC__ */
00388 #define P(x)    ()
00389  #ifndef VOID
00390    #define VOID char
00391  #endif
00392 #define const
00393 #endif /* __STDC__ */
00394 
00395 #ifdef NO8BIT
00396 #define SET_SIZE    128
00397 #else
00398 #define SET_SIZE    256
00399 #endif
00400 
00401 #define MASKSIZE    (gnMaskBits / MASKTYPE_WIDTH)
00402 
00403 #ifdef lint
00404 extern int  TSTMASKBIT P ((MASKTYPE * mask, int bit));
00405 #else /* lint */
00406 /* The following is really testing for MASKSIZE <= 1, but cpp can't do that */
00407 #define TSTMASKBIT(mask, bit) \
00408             ((mask)[(bit) / MASKTYPE_WIDTH] & \
00409               ((MASKTYPE) 1 << ((bit) & (MASKTYPE_WIDTH - 1))))
00410 #endif /* lint */
00411 
00412 #if MASKBITS > 64
00413 #define FULLMASKSET
00414 #endif
00415 
00416 #if MASKBITS <= 32
00417     #define FLAGBASE    ((MASKTYPE_WIDTH) - 6)
00418 #else
00419     # if MASKBITS <= 64
00420         #define FLAGBASE    ((MASKTYPE_WIDTH) - 6)
00421     # else
00422         #define FLAGBASE    0
00423     # endif
00424 #endif
00425 
00426 /*
00427 ** Data type for internal word storage.  If necessary, we use shorts rather
00428 ** than chars so that string characters can be encoded as a single unit.
00429 */
00430 #if (SET_SIZE + MAXSTRINGCHARS) <= 256
00431 #ifndef lint
00432 #define ICHAR_IS_CHAR
00433 #endif /* lint */
00434 #endif
00435 
00436 #ifdef ICHAR_IS_CHAR
00437 typedef unsigned char   ichar_t;    /* Internal character */
00438 #define icharlen(s) strlen ((char *) (s))
00439 #define icharcpy(a, b)  strcpy ((char *) (a), (char *) (b))
00440 #define icharcmp(a, b)  strcmp ((char *) (a), (char *) (b))
00441 #define icharncmp(a, b, n) strncmp ((char *) (a), (char *) (b), (n))
00442 #define chartoichar(x)  ((ichar_t) (x))
00443 #else
00444 typedef unsigned short  ichar_t;    /* Internal character */
00445 #define chartoichar(x)  ((ichar_t) (unsigned char) (x))
00446 
00447 /*
00448  * Structure used to record data about successful lookups; these values
00449  * are used in the ins_root_cap routine to produce correct capitalizations.
00450  */
00451 struct success
00452 {
00453     struct dent *       dictent;    /* Header of dict entry chain for wd */
00454     struct flagent *    prefix;     /* Prefix flag used, or NULL */
00455     struct flagent *    suffix;     /* Suffix flag used, or NULL */
00456 };
00457 
00458 ichar_t* icharcpy (ichar_t* out, ichar_t* in);
00459 int icharlen (ichar_t* in);
00460 int icharcmp (ichar_t* s1, ichar_t* s2);
00461 int icharncmp (ichar_t* s1, ichar_t* s2, int n);
00462 
00463 #endif
00464 
00465 struct dent
00466 {
00467     struct dent *   next;
00468     char *          word;
00469     MASKTYPE        mask[2];
00470 #ifdef FULLMASKSET
00471     char            flags;
00472 #endif
00473 };
00474 
00475 /*
00476 ** Flags in the directory entry.  If FULLMASKSET is undefined, these are
00477 ** stored in the highest bits of the last longword of the mask field.  If
00478 ** FULLMASKSET is defined, they are stored in the extra "flags" field.
00479 #ifndef NO_CAPITALIZATION_SUPPORT
00480 **
00481 ** If a word has only one capitalization form, and that form is not
00482 ** FOLLOWCASE, it will have exactly one entry in the dictionary.  The
00483 ** legal capitalizations will be indicated by the 2-bit capitalization
00484 ** field, as follows:
00485 **
00486 **  ALLCAPS     The word must appear in all capitals.
00487 **  CAPITALIZED The word must be capitalized (e.g., London).
00488 **          It will also be accepted in all capitals.
00489 **  ANYCASE     The word may appear in lowercase, capitalized,
00490 **          or all-capitals.
00491 **
00492 ** Regardless of the capitalization flags, the "word" field of the entry
00493 ** will point to an all-uppercase copy of the word.  This is to simplify
00494 ** the large portion of the code that doesn't care about capitalization.
00495 ** Ispell will generate the correct version when needed.
00496 **
00497 ** If a word has more than one capitalization, there will be multiple
00498 ** entries for it, linked together by the "next" field.  The initial
00499 ** entry for such words will be a dummy entry, primarily for use by code
00500 ** that ignores capitalization.  The "word" field of this entry will
00501 ** again point to an all-uppercase copy of the word.  The "mask" field
00502 ** will contain the logical OR of the mask fields of all variants.
00503 ** A header entry is indicated by a capitalization type of ALLCAPS,
00504 ** with the MOREVARIANTS bit set.
00505 **
00506 ** The following entries will define the individual variants.  Each
00507 ** entry except the last has the MOREVARIANTS flag set, and each
00508 ** contains one of the following capitalization options:
00509 **
00510 **  ALLCAPS     The word must appear in all capitals.
00511 **  CAPITALIZED The word must be capitalized (e.g., London).
00512 **          It will also be accepted in all capitals.
00513 **  FOLLOWCASE  The word must be capitalized exactly like the
00514 **          sample in the entry.  Prefix (suffix) characters
00515 **          must be rendered in the case of the first (last)
00516 **          "alphabetic" character.  It will also be accepted
00517 **          in all capitals.  ("Alphabetic" means "mentioned
00518 **          in a 'casechars' statement".)
00519 **  ANYCASE     The word may appear in lowercase, capitalized,
00520 **          or all-capitals.
00521 **
00522 ** The "mask" field for the entry contains only the affix flag bits that
00523 ** are legal for that capitalization.  The "word" field will be null
00524 ** except for FOLLOWCASE entries, where it will point to the
00525 ** correctly-capitalized spelling of the root word.
00526 **
00527 ** It is worth discussing why the ALLCAPS option is used in
00528 ** the header entry.  The header entry accepts an all-capitals
00529 ** version of the root plus every affix (this is always legal, since
00530 ** words get capitalized in headers and so forth).  Further, all of
00531 ** the following variant entries will reject any all-capitals form
00532 ** that is illegal due to an affix.
00533 **
00534 ** Finally, note that variations in the KEEP flag can cause a multiple-variant
00535 ** entry as well.  For example, if the personal dictionary contains "ALPHA",
00536 ** (KEEP flag set) and the user adds "alpha" with the KEEP flag clear, a
00537 ** multiple-variant entry will be created so that "alpha" will be accepted
00538 ** but only "ALPHA" will actually be kept.
00539 #endif
00540 */
00541 #ifdef FULLMASKSET
00542 #define flagfield   flags
00543 #else
00544 #define flagfield   mask[1]
00545 #endif
00546 #define USED        ((MASKTYPE) 1 << (FLAGBASE + 0))
00547 #define KEEP        ((MASKTYPE) 1 << (FLAGBASE + 1))
00548 #ifdef NO_CAPITALIZATION_SUPPORT
00549 #define ALLFLAGS    (USED | KEEP)
00550 #else /* NO_CAPITALIZATION_SUPPORT */
00551 #define ANYCASE     ((MASKTYPE) 0 << (FLAGBASE + 2))
00552 #define ALLCAPS     ((MASKTYPE) 1 << (FLAGBASE + 2))
00553 #define CAPITALIZED ((MASKTYPE) 2 << (FLAGBASE + 2))
00554 #define FOLLOWCASE  ((MASKTYPE) 3 << (FLAGBASE + 2))
00555 #define CAPTYPEMASK ((MASKTYPE) 3 << (FLAGBASE + 2))
00556 #define MOREVARIANTS    ((MASKTYPE) 1 << (FLAGBASE + 4))
00557 #define ALLFLAGS    (USED | KEEP | CAPTYPEMASK | MOREVARIANTS)
00558 #define captype(x)  ((x) & CAPTYPEMASK)
00559 #endif /* NO_CAPITALIZATION_SUPPORT */
00560 
00561 /*
00562  * Language tables used to encode prefix and suffix information.
00563  */
00564 struct flagent
00565 {
00566     ichar_t *       strip;      /* String to strip off */
00567     ichar_t *       affix;      /* Affix to append */
00568     short       flagbit;        /* Flag bit this ent matches */
00569     short       stripl;         /* Length of strip */
00570     short       affl;           /* Length of affix */
00571     short       numconds;       /* Number of char conditions */
00572     short       flagflags;      /* Modifiers on this flag */
00573     char        conds[SET_SIZE + MAXSTRINGCHARS]; /* Adj. char conds */
00574 };
00575 
00576 /*
00577  * Bits in flagflags
00578  */
00579 #define FF_CROSSPRODUCT (1 << 0)        /* Affix does cross-products */
00580 #define FF_COMPOUNDONLY (1 << 1)        /* Afx works in compounds */
00581 
00582 union ptr_union                 /* Aid for building flg ptrs */
00583 {
00584     struct flagptr *    fp;         /* Pointer to more indexing */
00585     struct flagent *    ent;        /* First of a list of ents */
00586 };
00587 
00588 struct flagptr
00589 {
00590     union ptr_union pu;         /* Ent list or more indexes */
00591     int         numents;        /* If zero, pu.fp is valid */
00592 };
00593 
00594 /*
00595  * Description of a single string character type.
00596  */
00597 struct strchartype
00598 {
00599     char *      name;           /* Name of the type */
00600     char *      deformatter;    /* Deformatter to use */
00601     char *      suffixes;       /* File suffixes, null seps */
00602 };
00603 
00604 /*
00605  * Header placed at the beginning of the hash file.
00606  */
00607 struct hashheader
00608 {
00609     unsigned short magic;                       /* Magic number for ID */
00610     unsigned short compileoptions;              /* How we were compiled */
00611     short maxstringchars;                       /* Max # strchrs we support */
00612     short maxstringcharlen;                     /* Max strchr len supported */
00613     short compoundmin;                          /* Min lth of compound parts */
00614     short compoundbit;                          /* Flag 4 compounding roots */
00615     int stringsize;                             /* Size of string table */
00616     int lstringsize;                            /* Size of lang. str tbl */
00617     int tblsize;                                /* No. entries in hash tbl */
00618     int stblsize;                               /* No. entries in sfx tbl */
00619     int ptblsize;                               /* No. entries in pfx tbl */
00620     int sortval;                                /* Largest sort ID assigned */
00621     int nstrchars;                              /* No. strchars defined */
00622     int nstrchartype;                           /* No. strchar types */
00623     int strtypestart;                           /* Start of strtype table */
00624     char nrchars[5];                            /* Nroff special characters */
00625     char texchars[13];                          /* TeX special characters */
00626     char compoundflag;                          /* Compund-word handling */
00627     char defhardflag;                           /* Default tryveryhard flag */
00628     char flagmarker;                            /* "Start-of-flags" char */
00629     unsigned short sortorder[SET_SIZE + MAXSTRINGCHARS]; /* Sort ordering */
00630     ichar_t lowerconv[SET_SIZE + MAXSTRINGCHARS]; /* Lower-conversion table */
00631     ichar_t upperconv[SET_SIZE + MAXSTRINGCHARS]; /* Upper-conversion table */
00632     char wordchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for chars found in wrds */
00633     char upperchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for uppercase chars */
00634     char lowerchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for lowercase chars */
00635     char boundarychars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for boundary chars */
00636     char stringstarts[SET_SIZE];        /* NZ if char can start str */
00637     char stringchars[MAXSTRINGCHARS][MAXSTRINGCHARLEN + 1]; /* String chars */
00638     unsigned int stringdups[MAXSTRINGCHARS];    /* No. of "base" char */
00639     int dupnos[MAXSTRINGCHARS];         /* Dup char ID # */
00640     unsigned short magic2;          /* Second magic for dbl chk */
00641 };
00642 
00643 /* hash table magic number */
00644 #define MAGIC           0x9602
00645 
00646 /* compile options, put in the hash header for consistency checking */
00647 #ifdef NO8BIT
00648 # define MAGIC8BIT      0x01
00649 #else
00650 # define MAGIC8BIT      0x00
00651 #endif
00652 #ifdef NO_CAPITALIZATION_SUPPORT
00653 # define MAGICCAPITALIZATION    0x00
00654 #else
00655 # define MAGICCAPITALIZATION    0x02
00656 #endif
00657 #  define MAGICMASKSET      0x04
00658 
00659 #if MASKBITS <= 32
00660 # define MAGICMASKSET       0x00
00661 #else
00662 # if MASKBITS <= 64
00663 # else
00664 #  if MASKBITS <= 128
00665 #   define MAGICMASKSET     0x08
00666 #  else
00667 #   define MAGICMASKSET     0x0C
00668 #  endif
00669 # endif
00670 #endif
00671 
00672 #define COMPILEOPTIONS  (MAGIC8BIT | MAGICCAPITALIZATION | MAGICMASKSET)
00673 
00674 /*
00675 ** Offsets into the nroff special-character array
00676 */
00677 #define NRLEFTPAREN     hashheader.nrchars[0]
00678 #define NRRIGHTPAREN    hashheader.nrchars[1]
00679 #define NRDOT           hashheader.nrchars[2]
00680 #define NRBACKSLASH     hashheader.nrchars[3]
00681 #define NRSTAR          hashheader.nrchars[4]
00682 
00683 /*
00684 ** Offsets into the TeX special-character array
00685 */
00686 #define TEXLEFTPAREN    hashheader.texchars[0]
00687 #define TEXRIGHTPAREN   hashheader.texchars[1]
00688 #define TEXLEFTSQUARE   hashheader.texchars[2]
00689 #define TEXRIGHTSQUARE  hashheader.texchars[3]
00690 #define TEXLEFTCURLY    hashheader.texchars[4]
00691 #define TEXRIGHTCURLY   hashheader.texchars[5]
00692 #define TEXLEFTANGLE    hashheader.texchars[6]
00693 #define TEXRIGHTANGLE   hashheader.texchars[7]
00694 #define TEXBACKSLASH    hashheader.texchars[8]
00695 #define TEXDOLLAR       hashheader.texchars[9]
00696 #define TEXSTAR         hashheader.texchars[10]
00697 #define TEXDOT          hashheader.texchars[11]
00698 #define TEXPERCENT      hashheader.texchars[12]
00699 
00700 /*
00701 ** Values for compoundflag
00702 */
00703 #define COMPOUND_NEVER      0   /* Compound words are never good */
00704 #define COMPOUND_ANYTIME    1   /* Accept run-together words */
00705 #define COMPOUND_CONTROLLED 2   /* Compounds controlled by afx flags */
00706 /*
00707 ** These macros are similar to the ones above, but they take into account
00708 ** the possibility of string characters.  Note well that they take a POINTER,
00709 ** not a character.
00710 **
00711 ** The "l_" versions set "len" to the length of the string character as a
00712 ** handy side effect.  (Note that the global "laststringch" is also set,
00713 ** and sometimes used, by these macros.)
00714 **
00715 ** The "l1_" versions go one step further and guarantee that the "len"
00716 ** field is valid for *all* characters, being set to 1 even if the macro
00717 ** returns false.  This macro is a great example of how NOT to write
00718 ** readable C.
00719 */
00720 /*TF NOTE: This is actually defined in code (makedent) now */
00721 #if 0 
00722 #define isstringch(ptr, canon)  (isstringstart (*(ptr)) \
00723                   &&  stringcharlen ((ptr), (canon)) > 0)
00724 #define l_isstringch(ptr, len, canon)   \
00725                 (isstringstart (*(ptr)) \
00726                   &&  (len = stringcharlen ((ptr), (canon))) \
00727                     > 0)
00728 #define l1_isstringch(ptr, len, canon)  \
00729                 (len = 1, \
00730                   isstringstart ((unsigned char)(*(ptr))) \
00731                     &&  ((len = \
00732                       stringcharlen ((ptr), (canon))) \
00733                     > 0 \
00734                       ? 1 : (len = 1, 0)))
00735 #endif
00736 
00737 /*
00738  * Sizes of buffers returned by ichartosstr/strtosichar.
00739  */
00740 #define ICHARTOSSTR_SIZE (INPUTWORDLEN + 4 * MAXAFFIXLEN + 4)
00741 #define STRTOSICHAR_SIZE ((INPUTWORDLEN + 4 * MAXAFFIXLEN + 4) \
00742               * sizeof (ichar_t))
00743 /* TF CHANGE: We should fill this as a structure
00744               and then use it throughout.
00745 */
00746 
00747 /*
00748  * Initialized variables.  These are generated using macros so that they
00749  * may be consistently declared in all programs.  Numerous examples of
00750  * usage are given below.
00751  */
00752 #ifdef MAIN
00753 #define INIT(decl, init)    decl = init
00754 #else
00755 #define INIT(decl, init)    extern decl
00756 #endif
00757 
00758 #ifdef MINIMENU
00759 INIT (int minimenusize, 2);     /* MUST be either 2 or zero */
00760 #else /* MINIMENU */
00761 INIT (int minimenusize, 0);     /* MUST be either 2 or zero */
00762 #endif /* MINIMENU */
00763 
00764 INIT (int eflag, 0);            /* NZ for expand mode */
00765 INIT (int dumpflag, 0);         /* NZ to do dump mode */
00766 INIT (int fflag, 0);            /* NZ if -f specified */
00767 #ifndef USG
00768 INIT (int sflag, 0);            /* NZ to stop self after EOF */
00769 #endif
00770 INIT (int vflag, 0);            /* NZ to display characters as M-xxx */
00771 INIT (int xflag, DEFNOBACKUPFLAG);  /* NZ to suppress backups */
00772 INIT (int deftflag, -1);        /* NZ for TeX mode by default */
00773 INIT (int tflag, DEFTEXFLAG);       /* NZ for TeX mode in current file */
00774 INIT (int prefstringchar, -1);      /* Preferred string character type */
00775 
00776 INIT (int terse, 0);            /* NZ for "terse" mode */
00777 
00778 INIT (char tempfile[MAXPATHLEN], "");   /* Name of file we're spelling into */
00779 
00780 INIT (int minword, MINWORD);        /* Longest always-legal word */
00781 INIT (int sortit, 1);           /* Sort suggestions alphabetically */
00782 INIT (int compoundflag, -1);        /* How to treat compounds: see above */
00783 INIT (int tryhardflag, -1);     /* Always call tryveryhard */
00784 
00785 INIT (char * currentfile, NULL);    /* Name of current input file */
00786 
00787 /* Odd numbers for math mode in LaTeX; even for LR or paragraph mode */
00788 INIT (int math_mode, 0);
00789 /* P -- paragraph or LR mode
00790  * b -- parsing a \begin statement
00791  * e -- parsing an \end statement
00792  * r -- parsing a \ref type of argument.
00793  * m -- looking for a \begin{minipage} argument.
00794  */
00795 INIT (char LaTeX_Mode, 'P');
00796 
00797 #ifdef __cplusplus
00798 }
00799 #endif /* c++ */
00800 
00801 #endif /* ISPELL_H */
KDE Home | KDE Accessibility Home | Description of Access Keys