hash.cpp

00001 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
00002 /* enchant
00003  * Copyright (C) 2003 Dom Lachowicz
00004  *
00005  * This library is free software; you can redistribute it and/or
00006  * modify it under the terms of the GNU Lesser General Public
00007  * License as published by the Free Software Foundation; either
00008  * version 2.1 of the License, or (at your option) any later version.
00009  *
00010  * This library is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * Lesser General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU Lesser General Public
00016  * License along with this library; if not, write to the
00017  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018  * Boston, MA 02110-1301, USA.
00019  *
00020  * In addition, as a special exception, Dom Lachowicz
00021  * gives permission to link the code of this program with
00022  * non-LGPL Spelling Provider libraries (eg: a MSFT Office
00023  * spell checker backend) and distribute linked combinations including
00024  * the two.  You must obey the GNU Lesser General Public License in all
00025  * respects for all of the code used other than said providers.  If you modify
00026  * this file, you may extend this exception to your version of the
00027  * file, but you are not obligated to do so.  If you do not wish to
00028  * do so, delete this exception statement from your version.
00029  */
00030 
00031 /*
00032  * hash.c - a simple hash function for ispell
00033  *
00034  * Pace Willisson, 1983
00035  *
00036  * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
00037  * All rights reserved.
00038  *
00039  * Redistribution and use in source and binary forms, with or without
00040  * modification, are permitted provided that the following conditions
00041  * are met:
00042  *
00043  * 1. Redistributions of source code must retain the above copyright
00044  *    notice, this list of conditions and the following disclaimer.
00045  * 2. Redistributions in binary form must reproduce the above copyright
00046  *    notice, this list of conditions and the following disclaimer in the
00047  *    documentation and/or other materials provided with the distribution.
00048  * 3. All modifications to the source code must be clearly marked as
00049  *    such.  Binary redistributions based on modified source code
00050  *    must be clearly marked as modified versions in the documentation
00051  *    and/or other materials provided with the distribution.
00052  * 4. All advertising materials mentioning features or use of this software
00053  *    must display the following acknowledgment:
00054  *      This product includes software developed by Geoff Kuenning and
00055  *      other unpaid contributors.
00056  * 5. The name of Geoff Kuenning may not be used to endorse or promote
00057  *    products derived from this software without specific prior
00058  *    written permission.
00059  *
00060  * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
00061  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00062  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00063  * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
00064  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00065  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00066  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00067  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00068  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00069  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00070  * SUCH DAMAGE.
00071  */
00072 
00073 /*
00074  * $Log$
00075  * Revision 1.1  2004/01/31 16:44:12  zrusin
00076  * ISpell plugin.
00077  *
00078  * Revision 1.4  2003/08/14 17:51:27  dom
00079  * update license - exception clause should be Lesser GPL
00080  *
00081  * Revision 1.3  2003/07/28 20:40:26  dom
00082  * fix up the license clause, further win32-registry proof some directory getting functions
00083  *
00084  * Revision 1.2  2003/07/16 22:52:39  dom
00085  * LGPL + exception license
00086  *
00087  * Revision 1.1  2003/07/15 01:15:05  dom
00088  * ispell enchant backend
00089  *
00090  * Revision 1.2  2003/01/29 05:50:11  hippietrail
00091  *
00092  * Fixed my mess in EncodingManager.
00093  * Changed many C casts to C++ casts.
00094  *
00095  * Revision 1.1  2003/01/24 05:52:33  hippietrail
00096  *
00097  * Refactored ispell code. Old ispell global variables had been put into
00098  * an allocated structure, a pointer to which was passed to many functions.
00099  * I have now made all such functions and variables private members of the
00100  * ISpellChecker class. It was C OO, now it's C++ OO.
00101  *
00102  * I've fixed the makefiles and tested compilation but am unable to test
00103  * operation. Please back out my changes if they cause problems which
00104  * are not obvious or easy to fix.
00105  *
00106  * Revision 1.5  2002/09/19 05:31:15  hippietrail
00107  *
00108  * More Ispell cleanup.  Conditional globals and DEREF macros are removed.
00109  * K&R function declarations removed, converted to Doxygen style comments
00110  * where possible.  No code has been changed (I hope).  Compiles for me but
00111  * unable to test.
00112  *
00113  * Revision 1.4  2002/09/17 03:03:29  hippietrail
00114  *
00115  * After seeking permission on the developer list I've reformatted all the
00116  * spelling source which seemed to have parts which used 2, 3, 4, and 8
00117  * spaces for tabs.  It should all look good with our standard 4-space
00118  * tabs now.
00119  * I've concentrated just on indentation in the actual code.  More prettying
00120  * could be done.
00121  * * NO code changes were made *
00122  *
00123  * Revision 1.3  2002/09/13 17:20:13  mpritchett
00124  * Fix more warnings for Linux build
00125  *
00126  * Revision 1.2  2001/05/12 16:05:42  thomasf
00127  * Big pseudo changes to ispell to make it pass around a structure rather
00128  * than rely on all sorts of gloabals willy nilly here and there.  Also
00129  * fixed our spelling class to work with accepting suggestions once more.
00130  * This code is dirty, gross and ugly (not to mention still not supporting
00131  * multiple hash sized just yet) but it works on my machine and will no
00132  * doubt break other machines.
00133  *
00134  * Revision 1.1  2001/04/15 16:01:24  tomas_f
00135  * moving to spell/xp
00136  *
00137  * Revision 1.3  1998/12/29 14:55:33  eric
00138  *
00139  * I've doctored the ispell code pretty extensively here.  It is now
00140  * warning-free on Win32.  It also *works* on Win32 now, since I
00141  * replaced all the I/O calls with ANSI standard ones.
00142  *
00143  * Revision 1.2  1998/12/28 23:11:30  eric
00144  *
00145  * modified spell code and integration to build on Windows.
00146  * This is still a hack.
00147  *
00148  * Actually, it doesn't yet WORK on Windows.  It just builds.
00149  * SpellCheckInit is failing for some reason.
00150  *
00151  * Revision 1.1  1998/12/28 18:04:43  davet
00152  * Spell checker code stripped from ispell.  At this point, there are
00153  * two external routines...  the Init routine, and a check-a-word routine
00154  * which returns a boolean value, and takes a 16 bit char string.
00155  * The code resembles the ispell code as much as possible still.
00156  *
00157  * Revision 1.20  1994/01/25  07:11:34  geoff
00158  * Get rid of all old RCS log lines in preparation for the 3.1 release.
00159  *
00160  */
00161 
00162 #include "ispell_checker.h"
00163 
00164 /*
00165  * The following hash algorithm is due to Ian Dall, with slight modifications
00166  * by Geoff Kuenning to reflect the results of testing with the English
00167  * dictionaries actually distributed with ispell.
00168  */
00169 #define HASHSHIFT   5
00170 
00171 #ifdef NO_CAPITALIZATION_SUPPORT
00172 #define HASHUPPER(c)    c
00173 #else /* NO_CAPITALIZATION_SUPPORT */
00174 #define HASHUPPER(c)    mytoupper(c)
00175 #endif /* NO_CAPITALIZATION_SUPPORT */
00176 
00177 /*
00178  * \param s
00179  * \param hashtblsize
00180  */
00181 int ISpellChecker::hash (ichar_t *s, int hashtblsize)
00182 {
00183     register long   h = 0;
00184     register int    i;
00185 
00186 #ifdef ICHAR_IS_CHAR
00187     for (i = 4;  i--  &&  *s != 0;  )
00188         h = (h << 8) | HASHUPPER (*s++);
00189 #else /* ICHAR_IS_CHAR */
00190     for (i = 2;  i--  &&  *s != 0;  )
00191         h = (h << 16) | HASHUPPER (*s++);
00192 #endif /* ICHAR_IS_CHAR */
00193     while (*s != 0)
00194     {
00195         /*
00196          * We have to do circular shifts the hard way, since C doesn't
00197          * have them even though the hardware probably does.  Oh, well.
00198          */
00199         h = (h << HASHSHIFT)
00200           | ((h >> (32 - HASHSHIFT)) & ((1 << HASHSHIFT) - 1));
00201         h ^= HASHUPPER (*s++);
00202     }
00203     return static_cast<unsigned long>(h) % hashtblsize;
00204 }
KDE Home | KDE Accessibility Home | Description of Access Keys