GCC build with Myspell support (mingw-cygwin)

This commit is contained in:
Stas Degteff 2006-03-12 07:57:03 +00:00
parent 99aedb1707
commit df1529fd6a
19 changed files with 280 additions and 163 deletions

View File

@ -76,6 +76,6 @@
# Uncomment to disable Synchronet BBS support # Uncomment to disable Synchronet BBS support
#CPPFLAGS+=-DGCFG_NOSYNCHRONET #CPPFLAGS+=-DGCFG_NOSYNCHRONET
# Uncomment to disable MS Office spellchecker support (win32 only) # Uncomment to disable MS Office spellchecker support (win32 only)
CPPFLAGS+=-DGCFG_NO_MSSPELL #CPPFLAGS+=-DGCFG_NO_MSSPELL
# Uncomment to disable MySpell spellchecker support (multiplatform) # Uncomment to disable MySpell spellchecker support (multiplatform)
CPPFLAGS+=-DGCFG_NO_MYSPELL #CPPFLAGS+=-DGCFG_NO_MYSPELL

View File

@ -16,7 +16,7 @@ FOBJPATH=$(TOP)/$(OBJPATH)/$(PLATFORM)/$(TARGET)
FDEPPATH=$(TOP)/$(OBJPATH)/$(PLATFORM)/$(TARGET) FDEPPATH=$(TOP)/$(OBJPATH)/$(PLATFORM)/$(TARGET)
FLIBPATH=$(TOP)/$(LIBPATH)/$(PLATFORM) FLIBPATH=$(TOP)/$(LIBPATH)/$(PLATFORM)
.SUFFIXES: .c .cpp .all .rc .SUFFIXES: .c .cpp .all .rc .cxx
sourcelist: bld$(PLATFORM).inc sourcelist: bld$(PLATFORM).inc
@ -33,6 +33,14 @@ $(FOBJPATH)/%$(OBJEXT): %.cpp
$(CAT) $(patsubst %.cpp,%.d,$<)>>$(FDEPPATH)/$(patsubst %.cpp,%.d,$<) &&\ $(CAT) $(patsubst %.cpp,%.d,$<)>>$(FDEPPATH)/$(patsubst %.cpp,%.d,$<) &&\
rm $(patsubst %.cpp,%.d,$<) || true" rm $(patsubst %.cpp,%.d,$<) || true"
$(FOBJPATH)/%$(OBJEXT): %.cxx
@echo building $(basename $<)$(OBJEXT)
$(CXX) -c -MD $(CPPFLAGS) -o $@ $<
@$(SHELL) -c "[ -s $(patsubst %.cxx,%.d,$<) ] && \
echo -n $(FOBJPATH)/>$(FDEPPATH)/$(patsubst %.cxx,%.d,$<) && \
$(CAT) $(patsubst %.cxx,%.d,$<)>>$(FDEPPATH)/$(patsubst %.cxx,%.d,$<) &&\
rm $(patsubst %.cxx,%.d,$<) || true"
$(FOBJPATH)/%$(OBJEXT): %.c $(FOBJPATH)/%$(OBJEXT): %.c
@echo building $(basename $<)$(OBJEXT) @echo building $(basename $<)$(OBJEXT)
$(CC) -c -MD $(CFLAGS) -o $@ $< $(CC) -c -MD $(CFLAGS) -o $@ $<
@ -59,12 +67,12 @@ include bld$(PLATFORM).inc
endif endif
ifeq ($(PLATFORM),cyg) ifeq ($(PLATFORM),cyg)
OBJS=$(addprefix $(FOBJPATH)/,$(patsubst %.rc,%$(OBJEXT),$(patsubst %.c,%$(OBJEXT),$(patsubst %.cpp,%$(OBJEXT),$(filter %.c %.cpp %.rc,$(SOURCES)))))) OBJS=$(addprefix $(FOBJPATH)/,$(patsubst %.rc,%$(OBJEXT),$(patsubst %.c,%$(OBJEXT),$(patsubst %.cpp,%$(OBJEXT),$(patsubst %.cxx,%$(OBJEXT),$(filter %.c %.cpp %.cxx %.rc,$(SOURCES)))))))
else else
ifeq ($(PLATFORM),emx) ifeq ($(PLATFORM),emx)
OBJS=$(addprefix $(FOBJPATH)/,$(patsubst %.rc,%.res,$(patsubst %.c,%$(OBJEXT),$(patsubst %.cpp,%$(OBJEXT),$(filter %.c %.cpp %.rc,$(SOURCES)))))) OBJS=$(addprefix $(FOBJPATH)/,$(patsubst %.rc,%.res,$(patsubst %.c,%$(OBJEXT),$(patsubst %.cpp,%$(OBJEXT),$(patsubst %.cxx,%$(OBJEXT),$(filter %.c %.cpp %.cxx %.rc,$(SOURCES)))))))
else else
OBJS=$(addprefix $(FOBJPATH)/,$(patsubst %.c,%$(OBJEXT),$(patsubst %.cpp,%$(OBJEXT),$(filter %.c %.cpp %.rc,$(SOURCES))))) OBJS=$(addprefix $(FOBJPATH)/,$(patsubst %.c,%$(OBJEXT),$(patsubst %.cpp,%$(OBJEXT),$(patsubst %.cxx,%$(OBJEXT),$(filter %.c %.cpp %.cxx %.rc,$(SOURCES))))))
endif endif
endif endif
DEPS = $(wildcard $(FDEPPATH)/*.d) DEPS = $(wildcard $(FDEPPATH)/*.d)

View File

@ -1,10 +1,15 @@
# -*- makefile -*- # -*- makefile -*-
include GNUmakef.def include GNUmakef.def
include Config.def
.PHONY: all clean distclean dirs sourcelists deps docs .PHONY: all clean distclean dirs sourcelists deps docs
LIBS=gall gcfg gmb3 glibc uulib smblib msgidlib myspell LIBS=gall gcfg gmb3 glibc uulib smblib msgidlib
ifneq ($(findstring GCFG_NO_MYSPELL, $(CPPFLAGS)), GCFG_NO_MYSPELL)
LIBS+=myspell
endif
EXECUTABLES=golded3 goldnode rddt EXECUTABLES=golded3 goldnode rddt
all: sourcelists all: sourcelists

View File

@ -300,6 +300,7 @@ REPLYLINKLIST
REPLYLINKSHOWALWAYS REPLYLINKSHOWALWAYS
ROBOTNAME ROBOTNAME
SCHECKERDEFLANG SCHECKERDEFLANG
SCHECKERDICPATH
SCHECKERENABLED SCHECKERENABLED
SCHECKERUSERDIC SCHECKERUSERDIC
SCREENBLANKER SCREENBLANKER
@ -354,6 +355,7 @@ TIMEOUTSAVEMSG
TIMESLICE TIMESLICE
TIMESREAD TIMESREAD
TITLESTATUS TITLESTATUS
TRANSLATE
TWITMODE TWITMODE
TWITNAME TWITNAME
TWITSUBJ TWITSUBJ

View File

@ -3,6 +3,7 @@
* These macroses may be used in: * These macroses may be used in:
* template file, externutil command line, tearline and tagline. * template file, externutil command line, tearline and tagline.
========================================================================== ==========================================================================
@align
@areaname @areaname
@areapath @areapath
@areatype @areatype
@ -57,7 +58,9 @@
@otime @otime
@oto @oto
@otzoffset @otzoffset
@pad
@pid @pid
@pipe
@pseudo @pseudo
@rev @rev
@subject @subject
@ -69,6 +72,7 @@
@tlname @tlname
@tname @tname
@tpseudo @tpseudo
@tr
@ver @ver
@version @version
@widepid @widepid

View File

@ -9,9 +9,14 @@ endif
endif endif
TOP=.. TOP=..
include $(TOP)/Config.def
SHORTTARGET=ged SHORTTARGET=ged
TARGET=golded3 TARGET=golded3
GLIBS=gmb3 gall gcfg uulib smblib msgidlib GLIBS=gmb3 gall gcfg uulib smblib msgidlib
ifneq ($(findstring GCFG_NO_MYSPELL, $(CPPFLAGS)), GCFG_NO_MYSPELL)
GLIBS+=myspell
endif
INCS=-I. -I$(TOP)/goldlib/gall -I$(TOP)/goldlib/gcfg -I$(TOP)/goldlib/gmb3 -I$(TOP)/goldlib/uulib -I$(TOP)/goldlib/smblib -I$(TOP)/goldlib/msgidlib INCS=-I. -I$(TOP)/goldlib/gall -I$(TOP)/goldlib/gcfg -I$(TOP)/goldlib/gmb3 -I$(TOP)/goldlib/uulib -I$(TOP)/goldlib/smblib -I$(TOP)/goldlib/msgidlib
ifeq ($(findstring EMX, $(PATH)), EMX) ifeq ($(findstring EMX, $(PATH)), EMX)

View File

@ -19,6 +19,11 @@ INCS+=-I$(TOP)/goldlib/glibc
endif endif
endif endif
include $(TOP)/Config.def
ifneq ($(findstring GCFG_NO_MYSPELL, $(CPPFLAGS)), GCFG_NO_MYSPELL)
INCS+=-I$(TOP)/goldlib/myspell
endif
include $(TOP)/GNUmakef.inc include $(TOP)/GNUmakef.inc
ifeq ($(PLATFORM),emx) ifeq ($(PLATFORM),emx)

View File

@ -35,7 +35,9 @@
#include <gdirposx.h> #include <gdirposx.h>
#include <gstrall.h> #include <gstrall.h>
#if !defined(GCFG_NO_MYSPELL)
#include <myspell.hxx> #include <myspell.hxx>
#endif
#include <gespell.h> #include <gespell.h>
typedef char XlatName[17]; typedef char XlatName[17];
@ -455,7 +457,7 @@ bool CMSSpellLang::AddWord(const char *text)
bool CMYSpellLang::Init(const gdirentry *entry) bool CMYSpellLang::Init(const gdirentry *entry)
{ {
gposixdir dir(entry->dirname); gposixdir dir(entry->dirname);
std::string affname = entry->name.substr(0, entry->name.length()-4); std::string affname = entry->name.substr(0, entry->name.length()-4);
strcpy(mLangCode, affname.c_str()); strcpy(mLangCode, affname.c_str());

9
goldlib/myspell/Makefile Normal file
View File

@ -0,0 +1,9 @@
# -*- makefile -*-
TOP=../..
TARGET=myspell
INCS=-I$(TOP)/goldlib/myspell -I$(TOP)/goldlib/gall
CFLAGS=
include $(TOP)/GNUmakef.inc
include $(TOP)/GNUmakef.lib

View File

@ -1,4 +1,4 @@
#include "license.readme" #include "license.rea"
#include <cctype> #include <cctype>
@ -21,7 +21,7 @@ PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
pmyMgr = pmgr; pmyMgr = pmgr;
// set up its intial values // set up its intial values
achar = dp->achar; // char flag achar = dp->achar; // char flag
strip = dp->strip; // string to strip strip = dp->strip; // string to strip
appnd = dp->appnd; // string to append appnd = dp->appnd; // string to append
stripl = dp->stripl; // length of strip string stripl = dp->stripl; // length of strip string
@ -43,7 +43,7 @@ PfxEntry::~PfxEntry()
if (strip)free(strip); if (strip)free(strip);
pmyMgr = NULL; pmyMgr = NULL;
appnd = NULL; appnd = NULL;
strip = NULL; strip = NULL;
} }
@ -67,19 +67,19 @@ char * PfxEntry::add(const char * word, int len)
if (appndl) { if (appndl) {
strcpy(tword,appnd); strcpy(tword,appnd);
tlen += appndl; tlen += appndl;
} }
char * pp = tword + tlen; char * pp = tword + tlen;
strcpy(pp, (word + stripl)); strcpy(pp, (word + stripl));
return mystrdup(tword); return mystrdup(tword);
} }
} }
return NULL; return NULL;
} }
// check if this prefix entry matches // check if this prefix entry matches
struct hentry * PfxEntry::check(const char * word, int len) struct hentry * PfxEntry::check(const char * word, int len)
{ {
int cond; // condition number being examined int cond; // condition number being examined
@ -123,8 +123,8 @@ struct hentry * PfxEntry::check(const char * word, int len)
if (TESTAFF(he->astr, achar, he->alen)) return he; if (TESTAFF(he->astr, achar, he->alen)) return he;
} }
// prefix matched but no root word was found // prefix matched but no root word was found
// if XPRODUCT is allowed, try again but now // if XPRODUCT is allowed, try again but now
// ross checked combined with a suffix // ross checked combined with a suffix
if (xpflg & XPRODUCT) { if (xpflg & XPRODUCT) {
@ -144,7 +144,7 @@ SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
pmyMgr = pmgr; pmyMgr = pmgr;
// set up its intial values // set up its intial values
achar = dp->achar; // char flag achar = dp->achar; // char flag
strip = dp->strip; // string to strip strip = dp->strip; // string to strip
appnd = dp->appnd; // string to append appnd = dp->appnd; // string to append
stripl = dp->stripl; // length of strip string stripl = dp->stripl; // length of strip string
@ -167,7 +167,7 @@ SfxEntry::~SfxEntry()
if (strip) free(strip); if (strip) free(strip);
pmyMgr = NULL; pmyMgr = NULL;
appnd = NULL; appnd = NULL;
strip = NULL; strip = NULL;
} }
@ -205,10 +205,10 @@ char * SfxEntry::add(const char * word, int len)
// see if this suffix is present in the word // see if this suffix is present in the word
struct hentry * SfxEntry::check(const char * word, int len, int optflags, AffEntry* ppfx) struct hentry * SfxEntry::check(const char * word, int len, int optflags, AffEntry* ppfx)
{ {
int tmpl; // length of tmpword int tmpl; // length of tmpword
int cond; // condition beng examined int cond; // condition beng examined
struct hentry * he; // hash entry pointer struct hentry * he; // hash entry pointer
unsigned char * cp; unsigned char * cp;
@ -257,10 +257,10 @@ struct hentry * SfxEntry::check(const char * word, int len, int optflags, AffEnt
if (cond < 0) { if (cond < 0) {
if ((he = pmyMgr->lookup(tmpword)) != NULL) { if ((he = pmyMgr->lookup(tmpword)) != NULL) {
if (TESTAFF(he->astr, achar , he->alen) && if (TESTAFF(he->astr, achar , he->alen) &&
((optflags & XPRODUCT) == 0 || ((optflags & XPRODUCT) == 0 ||
TESTAFF(he->astr, ep->getFlag(), he->alen))) return he; TESTAFF(he->astr, ep->getFlag(), he->alen))) return he;
} }
} }
} }
return NULL; return NULL;
@ -274,12 +274,12 @@ struct hentry * SfxEntry::check(const char * word, int len, int optflags, AffEnt
Appendix: Understanding Affix Code Appendix: Understanding Affix Code
An affix is either a prefix or a suffix attached to root words to make An affix is either a prefix or a suffix attached to root words to make
other words. other words.
Basically a Prefix or a Suffix is set of AffEntry objects Basically a Prefix or a Suffix is set of AffEntry objects
which store information about the prefix or suffix along which store information about the prefix or suffix along
with supporting routines to check if a word has a particular with supporting routines to check if a word has a particular
prefix or suffix or a combination. prefix or suffix or a combination.
The structure affentry is defined as follows: The structure affentry is defined as follows:
@ -292,15 +292,15 @@ struct affentry
short stripl; // length of the strip string short stripl; // length of the strip string
short appndl; // length of the affix string short appndl; // length of the affix string
short numconds; // the number of conditions that must be met short numconds; // the number of conditions that must be met
short xpflg; // flag: XPRODUCT- combine both prefix and suffix short xpflg; // flag: XPRODUCT- combine both prefix and suffix
char conds[SETSIZE]; // array which encodes the conditions to be met char conds[SETSIZE]; // array which encodes the conditions to be met
}; };
Here is a suffix borrowed from the en_US.aff file. This file Here is a suffix borrowed from the en_US.aff file. This file
is whitespace delimited. is whitespace delimited.
SFX D Y 4 SFX D Y 4
SFX D 0 e d SFX D 0 e d
SFX D y ied [^aeiou]y SFX D y ied [^aeiou]y
SFX D 0 ed [^ey] SFX D 0 ed [^ey]
@ -318,7 +318,7 @@ Field
4 4 - indicates that sequence of 4 affentry structures are needed to 4 4 - indicates that sequence of 4 affentry structures are needed to
properly store the affix information properly store the affix information
The remaining lines describe the unique information for the 4 SfxEntry The remaining lines describe the unique information for the 4 SfxEntry
objects that make up this affix. Each line can be interpreted objects that make up this affix. Each line can be interpreted
as follows: (note fields 1 and 2 are as a check against line 1 info) as follows: (note fields 1 and 2 are as a check against line 1 info)
@ -333,57 +333,57 @@ Field
can be applied can be applied
Field 5 is interesting. Since this is a suffix, field 5 tells us that Field 5 is interesting. Since this is a suffix, field 5 tells us that
there are 2 conditions that must be met. The first condition is that there are 2 conditions that must be met. The first condition is that
the next to the last character in the word must *NOT* be any of the the next to the last character in the word must *NOT* be any of the
following "a", "e", "i", "o" or "u". The second condition is that following "a", "e", "i", "o" or "u". The second condition is that
the last character of the word must end in "y". the last character of the word must end in "y".
So how can we encode this information concisely and be able to So how can we encode this information concisely and be able to
test for both conditions in a fast manner? The answer is found test for both conditions in a fast manner? The answer is found
but studying the wonderful ispell code of Geoff Kuenning, et.al. but studying the wonderful ispell code of Geoff Kuenning, et.al.
(now available under a normal BSD license). (now available under a normal BSD license).
If we set up a conds array of 256 bytes indexed (0 to 255) and access it If we set up a conds array of 256 bytes indexed (0 to 255) and access it
using a character (cast to an unsigned char) of a string, we have 8 bits using a character (cast to an unsigned char) of a string, we have 8 bits
of information we can store about that character. Specifically we of information we can store about that character. Specifically we
could use each bit to say if that character is allowed in any of the could use each bit to say if that character is allowed in any of the
last (or first for prefixes) 8 characters of the word. last (or first for prefixes) 8 characters of the word.
Basically, each character at one end of the word (up to the number Basically, each character at one end of the word (up to the number
of conditions) is used to index into the conds array and the resulting of conditions) is used to index into the conds array and the resulting
value found there says whether the that character is valid for a value found there says whether the that character is valid for a
specific character position in the word. specific character position in the word.
For prefixes, it does this by setting bit 0 if that char is valid For prefixes, it does this by setting bit 0 if that char is valid
in the first position, bit 1 if valid in the second position, and so on. in the first position, bit 1 if valid in the second position, and so on.
If a bit is not set, then that char is not valid for that postion in the If a bit is not set, then that char is not valid for that postion in the
word. word.
If working with suffixes bit 0 is used for the character closest If working with suffixes bit 0 is used for the character closest
to the front, bit 1 for the next character towards the end, ..., to the front, bit 1 for the next character towards the end, ...,
with bit numconds-1 representing the last char at the end of the string. with bit numconds-1 representing the last char at the end of the string.
Note: since entries in the conds[] are 8 bits, only 8 conditions Note: since entries in the conds[] are 8 bits, only 8 conditions
(read that only 8 character positions) can be examined at one (read that only 8 character positions) can be examined at one
end of a word (the beginning for prefixes and the end for suffixes. end of a word (the beginning for prefixes and the end for suffixes.
So to make this clearer, lets encode the conds array values for the So to make this clearer, lets encode the conds array values for the
first two affentries for the suffix D described earlier. first two affentries for the suffix D described earlier.
For the first affentry: For the first affentry:
numconds = 1 (only examine the last character) numconds = 1 (only examine the last character)
conds['e'] = (1 << 0) (the word must end in an E) conds['e'] = (1 << 0) (the word must end in an E)
all others are all 0 all others are all 0
For the second affentry: For the second affentry:
numconds = 2 (only examine the last two characters) numconds = 2 (only examine the last two characters)
conds[X] = conds[X] | (1 << 0) (aeiou are not allowed) conds[X] = conds[X] | (1 << 0) (aeiou are not allowed)
where X is all characters *but* a, e, i, o, or u where X is all characters *but* a, e, i, o, or u
conds['y'] = (1 << 1) (the last char must be a y) conds['y'] = (1 << 1) (the last char must be a y)
all other bits for all other entries in the conds array are zero all other bits for all other entries in the conds array are zero

View File

@ -2,7 +2,7 @@
#define _AFFIX_HXX_ #define _AFFIX_HXX_
#include "atypes.hxx" #include "atypes.hxx"
#include "baseaffix.hxx" #include "baseaffi.hxx"
#include "affixmgr.hxx" #include "affixmgr.hxx"
@ -26,7 +26,7 @@ public:
inline bool allowCross() { return ((xpflg & XPRODUCT) != 0); } inline bool allowCross() { return ((xpflg & XPRODUCT) != 0); }
inline unsigned char getFlag() { return achar; } inline unsigned char getFlag() { return achar; }
inline const char * getKey() { return appnd; } inline const char * getKey() { return appnd; }
char * add(const char * word, int len); char * add(const char * word, int len);
inline PfxEntry * getNext() { return next; } inline PfxEntry * getNext() { return next; }
@ -60,12 +60,12 @@ public:
SfxEntry(AffixMgr* pmgr, affentry* dp ); SfxEntry(AffixMgr* pmgr, affentry* dp );
~SfxEntry(); ~SfxEntry();
struct hentry * check(const char * word, int len, int optflags, struct hentry * check(const char * word, int len, int optflags,
AffEntry* ppfx); AffEntry* ppfx);
inline bool allowCross() { return ((xpflg & XPRODUCT) != 0); } inline bool allowCross() { return ((xpflg & XPRODUCT) != 0); }
inline unsigned char getFlag() { return achar; } inline unsigned char getFlag() { return achar; }
inline const char * getKey() { return rappnd; } inline const char * getKey() { return rappnd; }
char * add(const char * word, int len); char * add(const char * word, int len);
inline SfxEntry * getNext() { return next; } inline SfxEntry * getNext() { return next; }

View File

@ -1,4 +1,4 @@
#include "license.readme" #include "license.rea"
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
@ -17,10 +17,10 @@ extern void mychomp(char * s);
extern char * mystrdup(const char * s); extern char * mystrdup(const char * s);
extern char * myrevstrdup(const char * s); extern char * myrevstrdup(const char * s);
extern char * mystrsep(char ** sptr, const char delim); extern char * mystrsep(char ** sptr, const char delim);
extern int isSubset(const char * s1, const char * s2); extern int isSubset(const char * s1, const char * s2);
AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr) AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr)
{ {
// register hash manager and load affix data from aff file // register hash manager and load affix data from aff file
pHMgr = ptr; pHMgr = ptr;
@ -47,9 +47,9 @@ AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr)
} }
AffixMgr::~AffixMgr() AffixMgr::~AffixMgr()
{ {
// pass through linked prefix entries and clean up // pass through linked prefix entries and clean up
for (int i=0; i < SETSIZE ;i++) { for (int i=0; i < SETSIZE ;i++) {
pFlag[i] = NULL; pFlag[i] = NULL;
@ -60,7 +60,7 @@ AffixMgr::~AffixMgr()
delete(ptr); delete(ptr);
ptr = nptr; ptr = nptr;
nptr = NULL; nptr = NULL;
} }
} }
// pass through linked suffix entries and clean up // pass through linked suffix entries and clean up
@ -73,31 +73,31 @@ AffixMgr::~AffixMgr()
delete(ptr); delete(ptr);
ptr = nptr; ptr = nptr;
nptr = NULL; nptr = NULL;
} }
} }
if (trystring) free(trystring); if (trystring) free(trystring);
trystring=NULL; trystring=NULL;
if (encoding) free(encoding); if (encoding) free(encoding);
encoding=NULL; encoding=NULL;
if (maptable) { if (maptable) {
for (int j=0; j < nummap; j++) { for (int j=0; j < nummap; j++) {
free(maptable[j].set); free(maptable[j].set);
maptable[j].set = NULL; maptable[j].set = NULL;
maptable[j].len = 0; maptable[j].len = 0;
} }
free(maptable); free(maptable);
maptable = NULL; maptable = NULL;
} }
nummap = 0; nummap = 0;
if (reptable) { if (reptable) {
for (int j=0; j < numrep; j++) { for (int j=0; j < numrep; j++) {
free(reptable[j].pattern); free(reptable[j].pattern);
free(reptable[j].replacement); free(reptable[j].replacement);
reptable[j].pattern = NULL; reptable[j].pattern = NULL;
reptable[j].replacement = NULL; reptable[j].replacement = NULL;
} }
free(reptable); free(reptable);
reptable = NULL; reptable = NULL;
} }
numrep = 0; numrep = 0;
@ -108,13 +108,13 @@ AffixMgr::~AffixMgr()
} }
// read in aff file and build up prefix and suffix entry objects // read in aff file and build up prefix and suffix entry objects
int AffixMgr::parse_file(const char * affpath) int AffixMgr::parse_file(const char * affpath)
{ {
// io buffers // io buffers
char line[MAXLNLEN+1]; char line[MAXLNLEN+1];
// affix type // affix type
char ft; char ft;
@ -195,7 +195,7 @@ int AffixMgr::parse_file(const char * affpath)
} }
fclose(afflst); fclose(afflst);
// now we can speed up performance greatly taking advantage of the // now we can speed up performance greatly taking advantage of the
// relationship between the affixes and the idea of "subsets". // relationship between the affixes and the idea of "subsets".
// View each prefix as a potential leading subset of another and view // View each prefix as a potential leading subset of another and view
@ -208,14 +208,14 @@ int AffixMgr::parse_file(const char * affpath)
// The same argument goes for suffix string that are reversed. // The same argument goes for suffix string that are reversed.
// Then to top this off why not examine the first char of the word to quickly // Then to top this off why not examine the first char of the word to quickly
// limit the set of prefixes to examine (i.e. the prefixes to examine must // limit the set of prefixes to examine (i.e. the prefixes to examine must
// be leading supersets of the first character of the word (if they exist) // be leading supersets of the first character of the word (if they exist)
// To take advantage of this "subset" relationship, we need to add two links // To take advantage of this "subset" relationship, we need to add two links
// from entry. One to take next if the current prefix is found (call it nexteq) // from entry. One to take next if the current prefix is found (call it nexteq)
// and one to take next if the current prefix is not found (call it nextne). // and one to take next if the current prefix is not found (call it nextne).
// Since we have built ordered lists, all that remains is to properly intialize // Since we have built ordered lists, all that remains is to properly intialize
// the nextne and nexteq pointers that relate them // the nextne and nexteq pointers that relate them
process_pfx_order(); process_pfx_order();
@ -225,7 +225,7 @@ int AffixMgr::parse_file(const char * affpath)
} }
// we want to be able to quickly access prefix information // we want to be able to quickly access prefix information
// both by prefix flag, and sorted by prefix string itself // both by prefix flag, and sorted by prefix string itself
// so we need to set up two indexes // so we need to set up two indexes
int AffixMgr::build_pfxlist(AffEntry* pfxptr) int AffixMgr::build_pfxlist(AffEntry* pfxptr)
@ -258,7 +258,7 @@ int AffixMgr::build_pfxlist(AffEntry* pfxptr)
// now handle the general case // now handle the general case
unsigned char sp = *((const unsigned char *)key); unsigned char sp = *((const unsigned char *)key);
ptr = (PfxEntry*)pStart[sp]; ptr = (PfxEntry*)pStart[sp];
/* handle the insert at top of list case */ /* handle the insert at top of list case */
if ((!ptr) || ( strcmp( ep->getKey() , ptr->getKey() ) <= 0)) { if ((!ptr) || ( strcmp( ep->getKey() , ptr->getKey() ) <= 0)) {
ep->setNext(ptr); ep->setNext(ptr);
@ -312,7 +312,7 @@ int AffixMgr::build_sfxlist(AffEntry* sfxptr)
// now handle the normal case // now handle the normal case
unsigned char sp = *((const unsigned char *)key); unsigned char sp = *((const unsigned char *)key);
ptr = (SfxEntry*)sStart[sp]; ptr = (SfxEntry*)sStart[sp];
/* handle the insert at top of list case */ /* handle the insert at top of list case */
if ((!ptr) || ( strcmp( ep->getKey() , ptr->getKey() ) <= 0)) { if ((!ptr) || ( strcmp( ep->getKey() , ptr->getKey() ) <= 0)) {
ep->setNext(ptr); ep->setNext(ptr);
@ -344,7 +344,7 @@ int AffixMgr::process_pfx_order()
ptr = (PfxEntry*)pStart[i]; ptr = (PfxEntry*)pStart[i];
// look through the remainder of the list // look through the remainder of the list
// and find next entry with affix that // and find next entry with affix that
// the current one is not a subset of // the current one is not a subset of
// mark that as destination for NextNE // mark that as destination for NextNE
// use next in list that you are a subset // use next in list that you are a subset
@ -358,7 +358,7 @@ int AffixMgr::process_pfx_order()
} }
ptr->setNextNE(nptr); ptr->setNextNE(nptr);
ptr->setNextEQ(NULL); ptr->setNextEQ(NULL);
if ((ptr->getNext()) && isSubset(ptr->getKey() , (ptr->getNext())->getKey())) if ((ptr->getNext()) && isSubset(ptr->getKey() , (ptr->getNext())->getKey()))
ptr->setNextEQ(ptr->getNext()); ptr->setNextEQ(ptr->getNext());
} }
@ -394,7 +394,7 @@ int AffixMgr::process_sfx_order()
ptr = (SfxEntry *) sStart[i]; ptr = (SfxEntry *) sStart[i];
// look through the remainder of the list // look through the remainder of the list
// and find next entry with affix that // and find next entry with affix that
// the current one is not a subset of // the current one is not a subset of
// mark that as destination for NextNE // mark that as destination for NextNE
// use next in list that you are a subset // use next in list that you are a subset
@ -407,7 +407,7 @@ int AffixMgr::process_sfx_order()
} }
ptr->setNextNE(nptr); ptr->setNextNE(nptr);
ptr->setNextEQ(NULL); ptr->setNextEQ(NULL);
if ((ptr->getNext()) && isSubset(ptr->getKey(),(ptr->getNext())->getKey())) if ((ptr->getNext()) && isSubset(ptr->getKey(),(ptr->getNext())->getKey()))
ptr->setNextEQ(ptr->getNext()); ptr->setNextEQ(ptr->getNext());
} }
@ -490,12 +490,12 @@ void AffixMgr::encodeit(struct affentry * ptr, char * cs)
c = 0; c = 0;
} }
// end of condition // end of condition
if (c != 0) { if (c != 0) {
ec = 1; ec = 1;
} }
if (ec) { if (ec) {
if (grp == 1) { if (grp == 1) {
if (neg == 0) { if (neg == 0) {
@ -513,7 +513,7 @@ void AffixMgr::encodeit(struct affentry * ptr, char * cs)
} }
} }
neg = 0; neg = 0;
grp = 0; grp = 0;
nm = 0; nm = 0;
} else { } else {
// not a group so just set the proper bit for this char // not a group so just set the proper bit for this char
@ -521,7 +521,7 @@ void AffixMgr::encodeit(struct affentry * ptr, char * cs)
if (c == '.') { if (c == '.') {
// wild card character so set them all // wild card character so set them all
for (j=0;j<SETSIZE;j++) ptr->conds[j] = ptr->conds[j] | (1 << n); for (j=0;j<SETSIZE;j++) ptr->conds[j] = ptr->conds[j] | (1 << n);
} else { } else {
ptr->conds[(unsigned int) c] = ptr->conds[(unsigned int)c] | (1 << n); ptr->conds[(unsigned int) c] = ptr->conds[(unsigned int)c] | (1 << n);
} }
} }
@ -541,7 +541,7 @@ void AffixMgr::encodeit(struct affentry * ptr, char * cs)
struct hentry * AffixMgr::prefix_check (const char * word, int len) struct hentry * AffixMgr::prefix_check (const char * word, int len)
{ {
struct hentry * rv= NULL; struct hentry * rv= NULL;
// first handle the special case of 0 length prefixes // first handle the special case of 0 length prefixes
PfxEntry * pe = (PfxEntry *) pStart[0]; PfxEntry * pe = (PfxEntry *) pStart[0];
while (pe) { while (pe) {
@ -549,7 +549,7 @@ struct hentry * AffixMgr::prefix_check (const char * word, int len)
if (rv) return rv; if (rv) return rv;
pe = pe->getNext(); pe = pe->getNext();
} }
// now handle the general case // now handle the general case
unsigned char sp = *((const unsigned char *)word); unsigned char sp = *((const unsigned char *)word);
PfxEntry * pptr = (PfxEntry *)pStart[sp]; PfxEntry * pptr = (PfxEntry *)pStart[sp];
@ -563,7 +563,7 @@ struct hentry * AffixMgr::prefix_check (const char * word, int len)
pptr = pptr->getNextNE(); pptr = pptr->getNextNE();
} }
} }
return NULL; return NULL;
} }
@ -574,12 +574,12 @@ struct hentry * AffixMgr::compound_check (const char * word, int len, char compo
struct hentry * rv= NULL; struct hentry * rv= NULL;
char * st; char * st;
char ch; char ch;
// handle case of string too short to be a piece of a compound word // handle case of string too short to be a piece of a compound word
if (len < cpdmin) return NULL; if (len < cpdmin) return NULL;
st = mystrdup(word); st = mystrdup(word);
for (i=cpdmin; i < (len - (cpdmin-1)); i++) { for (i=cpdmin; i < (len - (cpdmin-1)); i++) {
ch = st[i]; ch = st[i];
@ -599,23 +599,23 @@ struct hentry * AffixMgr::compound_check (const char * word, int len, char compo
free(st); free(st);
return rv; return rv;
} }
rv = compound_check((word+i),strlen(word+i),compound_flag); rv = compound_check((word+i),strlen(word+i),compound_flag);
if (rv) { if (rv) {
free(st); free(st);
return rv; return rv;
} }
} }
st[i] = ch; st[i] = ch;
} }
free(st); free(st);
return NULL; return NULL;
} }
// check word for suffixes // check word for suffixes
struct hentry * AffixMgr::suffix_check (const char * word, int len, struct hentry * AffixMgr::suffix_check (const char * word, int len,
int sfxopts, AffEntry * ppfx) int sfxopts, AffEntry * ppfx)
{ {
struct hentry * rv = NULL; struct hentry * rv = NULL;
@ -627,7 +627,7 @@ struct hentry * AffixMgr::suffix_check (const char * word, int len,
if (rv) return rv; if (rv) return rv;
se = se->getNext(); se = se->getNext();
} }
// now handle the general case // now handle the general case
char * tmpword = myrevstrdup(word); char * tmpword = myrevstrdup(word);
unsigned char sp = *((const unsigned char *)tmpword); unsigned char sp = *((const unsigned char *)tmpword);
@ -645,7 +645,7 @@ struct hentry * AffixMgr::suffix_check (const char * word, int len,
sptr = sptr->getNextNE(); sptr = sptr->getNextNE();
} }
} }
free(tmpword); free(tmpword);
return NULL; return NULL;
} }
@ -657,7 +657,7 @@ struct hentry * AffixMgr::affix_check (const char * word, int len)
{ {
struct hentry * rv= NULL; struct hentry * rv= NULL;
// check all prefixes (also crossed with suffixes if allowed) // check all prefixes (also crossed with suffixes if allowed)
rv = prefix_check(word, len); rv = prefix_check(word, len);
if (rv) return rv; if (rv) return rv;
@ -667,7 +667,7 @@ struct hentry * AffixMgr::affix_check (const char * word, int len)
} }
int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, int AffixMgr::expand_rootword(struct guessword * wlst, int maxn,
const char * ts, int wl, const char * ap, int al) const char * ts, int wl, const char * ap, int al)
{ {
@ -741,7 +741,7 @@ int AffixMgr::expand_rootword(struct guessword * wlst, int maxn,
nh++; nh++;
} else { } else {
free(newword); free(newword);
} }
} }
ptr = (PfxEntry *)ptr ->getFlgNxt(); ptr = (PfxEntry *)ptr ->getFlgNxt();
} }
@ -840,7 +840,7 @@ int AffixMgr::parse_try(char * line)
if (np != 2) { if (np != 2) {
fprintf(stderr,"error: missing TRY information\n"); fprintf(stderr,"error: missing TRY information\n");
return 1; return 1;
} }
return 0; return 0;
} }
@ -870,7 +870,7 @@ int AffixMgr::parse_set(char * line)
if (np != 2) { if (np != 2) {
fprintf(stderr,"error: missing SET information\n"); fprintf(stderr,"error: missing SET information\n");
return 1; return 1;
} }
return 0; return 0;
} }
@ -926,7 +926,7 @@ int AffixMgr::parse_cpdmin(char * line)
if (np != 2) { if (np != 2) {
fprintf(stderr,"error: missing compound min information\n"); fprintf(stderr,"error: missing compound min information\n");
return 1; return 1;
} }
if ((cpdmin < 1) || (cpdmin > 50)) cpdmin = 3; if ((cpdmin < 1) || (cpdmin > 50)) cpdmin = 3;
return 0; return 0;
} }
@ -947,7 +947,7 @@ int AffixMgr::parse_reptable(char * line, FILE * af)
if (*piece != '\0') { if (*piece != '\0') {
switch(i) { switch(i) {
case 0: { np++; break; } case 0: { np++; break; }
case 1: { case 1: {
numrep = atoi(piece); numrep = atoi(piece);
if (numrep < 1) { if (numrep < 1) {
fprintf(stderr,"incorrect number of entries in replacement table\n"); fprintf(stderr,"incorrect number of entries in replacement table\n");
@ -967,8 +967,8 @@ int AffixMgr::parse_reptable(char * line, FILE * af)
if (np != 2) { if (np != 2) {
fprintf(stderr,"error: missing replacement table information\n"); fprintf(stderr,"error: missing replacement table information\n");
return 1; return 1;
} }
/* now parse the numrep lines to read in the remainder of the table */ /* now parse the numrep lines to read in the remainder of the table */
char * nl = line; char * nl = line;
for (int j=0; j < numrep; j++) { for (int j=0; j < numrep; j++) {
@ -1022,7 +1022,7 @@ int AffixMgr::parse_maptable(char * line, FILE * af)
if (*piece != '\0') { if (*piece != '\0') {
switch(i) { switch(i) {
case 0: { np++; break; } case 0: { np++; break; }
case 1: { case 1: {
nummap = atoi(piece); nummap = atoi(piece);
if (nummap < 1) { if (nummap < 1) {
fprintf(stderr,"incorrect number of entries in map table\n"); fprintf(stderr,"incorrect number of entries in map table\n");
@ -1042,8 +1042,8 @@ int AffixMgr::parse_maptable(char * line, FILE * af)
if (np != 2) { if (np != 2) {
fprintf(stderr,"error: missing map table information\n"); fprintf(stderr,"error: missing map table information\n");
return 1; return 1;
} }
/* now parse the nummap lines to read in the remainder of the table */ /* now parse the nummap lines to read in the remainder of the table */
char * nl = line; char * nl = line;
for (int j=0; j < nummap; j++) { for (int j=0; j < nummap; j++) {
@ -1064,7 +1064,7 @@ int AffixMgr::parse_maptable(char * line, FILE * af)
} }
break; break;
} }
case 1: { maptable[j].set = mystrdup(piece); case 1: { maptable[j].set = mystrdup(piece);
maptable[j].len = strlen(maptable[j].set); maptable[j].len = strlen(maptable[j].set);
break; } break; }
default: break; default: break;
@ -1105,17 +1105,17 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af)
switch(i) { switch(i) {
// piece 1 - is type of affix // piece 1 - is type of affix
case 0: { np++; break; } case 0: { np++; break; }
// piece 2 - is affix char // piece 2 - is affix char
case 1: { np++; achar = *piece; break; } case 1: { np++; achar = *piece; break; }
// piece 3 - is cross product indicator // piece 3 - is cross product indicator
case 2: { np++; if (*piece == 'Y') ff = XPRODUCT; break; } case 2: { np++; if (*piece == 'Y') ff = XPRODUCT; break; }
// piece 4 - is number of affentries // piece 4 - is number of affentries
case 3: { case 3: {
np++; np++;
numents = atoi(piece); numents = atoi(piece);
ptr = (struct affentry *) malloc(numents * sizeof(struct affentry)); ptr = (struct affentry *) malloc(numents * sizeof(struct affentry));
ptr->xpflg = ff; ptr->xpflg = ff;
ptr->achar = achar; ptr->achar = achar;
@ -1134,7 +1134,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af)
free(ptr); free(ptr);
return 1; return 1;
} }
// store away ptr to first affentry // store away ptr to first affentry
nptr = ptr; nptr = ptr;
@ -1152,14 +1152,14 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af)
switch(i) { switch(i) {
// piece 1 - is type // piece 1 - is type
case 0: { case 0: {
np++; np++;
if (nptr != ptr) nptr->xpflg = ptr->xpflg; if (nptr != ptr) nptr->xpflg = ptr->xpflg;
break; break;
} }
// piece 2 - is affix char // piece 2 - is affix char
case 1: { case 1: {
np++; np++;
if (*piece != achar) { if (*piece != achar) {
fprintf(stderr, "error: affix %c is corrupt near line %s\n",achar,nl); fprintf(stderr, "error: affix %c is corrupt near line %s\n",achar,nl);
@ -1171,8 +1171,8 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af)
break; break;
} }
// piece 3 - is string to strip or 0 for null // piece 3 - is string to strip or 0 for null
case 2: { case 2: {
np++; np++;
nptr->strip = mystrdup(piece); nptr->strip = mystrdup(piece);
nptr->stripl = strlen(nptr->strip); nptr->stripl = strlen(nptr->strip);
@ -1180,12 +1180,12 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af)
free(nptr->strip); free(nptr->strip);
nptr->strip=mystrdup(""); nptr->strip=mystrdup("");
nptr->stripl = 0; nptr->stripl = 0;
} }
break; break;
} }
// piece 4 - is affix string or 0 for null // piece 4 - is affix string or 0 for null
case 3: { case 3: {
np++; np++;
nptr->appnd = mystrdup(piece); nptr->appnd = mystrdup(piece);
nptr->appndl = strlen(nptr->appnd); nptr->appndl = strlen(nptr->appnd);
@ -1193,8 +1193,8 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af)
free(nptr->appnd); free(nptr->appnd);
nptr->appnd=mystrdup(""); nptr->appnd=mystrdup("");
nptr->appndl = 0; nptr->appndl = 0;
} }
break; break;
} }
// piece 5 - is the conditions descriptions // piece 5 - is the conditions descriptions
@ -1214,7 +1214,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af)
} }
nptr++; nptr++;
} }
// now create SfxEntry or PfxEntry objects and use links to // now create SfxEntry or PfxEntry objects and use links to
// build an ordered (sorted by affix string) list // build an ordered (sorted by affix string) list
nptr = ptr; nptr = ptr;
@ -1224,10 +1224,10 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af)
build_pfxlist((AffEntry *)pfxptr); build_pfxlist((AffEntry *)pfxptr);
} else { } else {
SfxEntry * sfxptr = new SfxEntry(this,nptr); SfxEntry * sfxptr = new SfxEntry(this,nptr);
build_sfxlist((AffEntry *)sfxptr); build_sfxlist((AffEntry *)sfxptr);
} }
nptr++; nptr++;
} }
free(ptr); free(ptr);
return 0; return 0;
} }

View File

@ -2,7 +2,7 @@
#define _AFFIXMGR_HXX_ #define _AFFIXMGR_HXX_
#include "atypes.hxx" #include "atypes.hxx"
#include "baseaffix.hxx" #include "baseaffi.hxx"
#include "hashmgr.hxx" #include "hashmgr.hxx"
#include <cstdio> #include <cstdio>
@ -26,13 +26,13 @@ class AffixMgr
public: public:
AffixMgr(const char * affpath, HashMgr * ptr); AffixMgr(const char * affpath, HashMgr * ptr);
~AffixMgr(); ~AffixMgr();
struct hentry * affix_check(const char * word, int len); struct hentry * affix_check(const char * word, int len);
struct hentry * prefix_check(const char * word, int len); struct hentry * prefix_check(const char * word, int len);
struct hentry * suffix_check(const char * word, int len, int sfxopts, AffEntry* ppfx); struct hentry * suffix_check(const char * word, int len, int sfxopts, AffEntry* ppfx);
int expand_rootword(struct guessword * wlst, int maxn, int expand_rootword(struct guessword * wlst, int maxn,
const char * ts, int wl, const char * ap, int al); const char * ts, int wl, const char * ap, int al);
struct hentry * compound_check(const char * word, int len, char compound_flag); struct hentry * compound_check(const char * word, int len, char compound_flag);
struct hentry * lookup(const char * word); struct hentry * lookup(const char * word);
@ -44,7 +44,7 @@ public:
char * get_try_string(); char * get_try_string();
char * get_compound(); char * get_compound();
bool get_nosplitsugs(); bool get_nosplitsugs();
private: private:
int parse_file(const char * affpath); int parse_file(const char * affpath);
int parse_try(char * line); int parse_try(char * line);

View File

@ -0,0 +1,17 @@
#ifndef _BASEAFF_HXX_
#define _BASEAFF_HXX_
class AffEntry
{
protected:
char * appnd;
char * strip;
short appndl;
short stripl;
short numconds;
short xpflg;
char achar;
char conds[SETSIZE];
};
#endif

View File

@ -1,4 +1,4 @@
#include "license.readme" #include "license.rea"
#if !defined(_MSC_VER) #if !defined(_MSC_VER)
#include <unistd.h> #include <unistd.h>
@ -104,14 +104,14 @@ int HashMgr::add_word(const char * word, int wl, const char * aff, int al)
hp->alen = al; hp->alen = al;
hp->word = mystrdup(word); hp->word = mystrdup(word);
hp->astr = mystrdup(aff); hp->astr = mystrdup(aff);
hp->next = NULL; hp->next = NULL;
while (dp->next != NULL) dp=dp->next; while (dp->next != NULL) dp=dp->next;
dp->next = hp; dp->next = hp;
if ((wl) && (hp->word == NULL)) return 1; if ((wl) && (hp->word == NULL)) return 1;
if ((al) && (hp->astr == NULL)) return 1; if ((al) && (hp->astr == NULL)) return 1;
} }
return 0; return 0;
} }
@ -159,7 +159,7 @@ int HashMgr::load_tables(const char * tpath)
if (! fgets(ts, MAXDELEN-1,rawdict)) return 2; if (! fgets(ts, MAXDELEN-1,rawdict)) return 2;
mychomp(ts); mychomp(ts);
tablesize = atoi(ts); tablesize = atoi(ts);
if (!tablesize) return 4; if (!tablesize) return 4;
tablesize = tablesize + 5; tablesize = tablesize + 5;
if ((tablesize %2) == 0) tablesize++; if ((tablesize %2) == 0) tablesize++;
@ -186,7 +186,7 @@ int HashMgr::load_tables(const char * tpath)
wl = strlen(ts); wl = strlen(ts);
// add the word and its index // add the word and its index
if (add_word(ts,wl,ap,al)) if (add_word(ts,wl,ap,al))
return 5;; return 5;;
} }

View File

@ -0,0 +1,61 @@
/*
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
* And Contributors. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. All modifications to the source code must be clearly marked as
* such. Binary redistributions based on modified source code
* must be clearly marked as modified versions in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* NOTE: A special thanks and credit goes to Geoff Kuenning
* the creator of ispell. MySpell's affix algorithms were
* based on those of ispell which should be noted is
* copyright Geoff Kuenning et.al. and now available
* under a BSD style license. For more information on ispell
* and affix compression in general, please see:
* http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
* (the home page for ispell)
*
* An almost complete rewrite of MySpell for use by
* the Mozilla project has been developed by David Einstein
* (Deinst@world.std.com). David and I are now
* working on parallel development tracks to help
* our respective projects (Mozilla and OpenOffice.org
* and we will maintain full affix file and dictionary
* file compatibility and work on merging our versions
* of MySpell back into a single tree. David has been
* a significant help in improving MySpell.
*
* Special thanks also go to La'szlo' Ne'meth
* <nemethl@gyorsposta.hu> who is the author of the
* Hungarian dictionary and who developed and contributed
* the code to support compound words in MySpell
* and fixed numerous problems with the encoding
* case conversion tables.
*
*/

View File

@ -2,7 +2,6 @@ affentry cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun
affixmgr cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun affixmgr cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun
csutil cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun csutil cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun
dictmgr cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun dictmgr cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun
example cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun
hashmgr cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun hashmgr cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun
hashmgr hxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun hashmgr hxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun
myspell cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun myspell cxx all ovl bcd bco bcx wcn wco wcx lnx emx djg rsx cyg be sun

View File

@ -1,4 +1,4 @@
#include "license.readme" #include "license.rea"
#include <cstring> #include <cstring>
#include <cstdlib> #include <cstdlib>
@ -53,12 +53,12 @@ MySpell::~MySpell()
// make a copy of src at destination while removing all leading // make a copy of src at destination while removing all leading
// blanks and removing any trailing periods after recording // blanks and removing any trailing periods after recording
// their presence with the abbreviation flag // their presence with the abbreviation flag
// also since already going through character by character, // also since already going through character by character,
// set the capitalization type // set the capitalization type
// return the length of the "cleaned" word // return the length of the "cleaned" word
int MySpell::cleanword(char * dest, const char * src, int * pcaptype, int * pabbrev) int MySpell::cleanword(char * dest, const char * src, int * pcaptype, int * pabbrev)
{ {
// with the new breakiterator code this should not be needed anymore // with the new breakiterator code this should not be needed anymore
const char * special_chars = "._#$%&()* +,-/:;<=>[]\\^`{|}~\t \x0a\x0d\x01\'\""; const char * special_chars = "._#$%&()* +,-/:;<=>[]\\^`{|}~\t \x0a\x0d\x01\'\"";
@ -68,8 +68,8 @@ int MySpell::cleanword(char * dest, const char * src, int * pcaptype, int * pabb
// first skip over any leading special characters // first skip over any leading special characters
while ((*q != '\0') && (strchr(special_chars,(int)(*q)))) q++; while ((*q != '\0') && (strchr(special_chars,(int)(*q)))) q++;
// now strip off any trailing special characters // now strip off any trailing special characters
// if a period comes after a normal char record its presence // if a period comes after a normal char record its presence
*pabbrev = 0; *pabbrev = 0;
int nl = strlen((const char *)q); int nl = strlen((const char *)q);
@ -77,9 +77,9 @@ int MySpell::cleanword(char * dest, const char * src, int * pcaptype, int * pabb
nl--; nl--;
} }
if ( *(q+nl) == '.' ) *pabbrev = 1; if ( *(q+nl) == '.' ) *pabbrev = 1;
// if no characters are left it can't be an abbreviation and can't be capitalized // if no characters are left it can't be an abbreviation and can't be capitalized
if (nl <= 0) { if (nl <= 0) {
*pcaptype = NOCAP; *pcaptype = NOCAP;
*pabbrev = 0; *pabbrev = 0;
*p = '\0'; *p = '\0';
@ -111,8 +111,8 @@ int MySpell::cleanword(char * dest, const char * src, int * pcaptype, int * pabb
*pcaptype = HUHCAP; *pcaptype = HUHCAP;
} }
return nc; return nc;
} }
int MySpell::spell(const char * word) int MySpell::spell(const char * word)
{ {
@ -129,8 +129,8 @@ int MySpell::spell(const char * word)
switch(captype) { switch(captype) {
case HUHCAP: case HUHCAP:
case NOCAP: { case NOCAP: {
rv = check(cw); rv = check(cw);
if ((abbv) && !(rv)) { if ((abbv) && !(rv)) {
memcpy(wspace,cw,wl); memcpy(wspace,cw,wl);
*(wspace+wl) = '.'; *(wspace+wl) = '.';
@ -155,9 +155,9 @@ int MySpell::spell(const char * word)
*(wspace+wl+1) = '\0'; *(wspace+wl+1) = '\0';
rv = check(wspace); rv = check(wspace);
} }
break; break;
} }
case INITCAP: { case INITCAP: {
memcpy(wspace,cw,(wl+1)); memcpy(wspace,cw,(wl+1));
mkallsmall(wspace, csconv); mkallsmall(wspace, csconv);
rv = check(wspace); rv = check(wspace);
@ -168,7 +168,7 @@ int MySpell::spell(const char * word)
*(wspace+wl+1) = '\0'; *(wspace+wl+1) = '\0';
rv = check(wspace); rv = check(wspace);
} }
break; break;
} }
} }
if (rv) return 1; if (rv) return 1;
@ -216,12 +216,12 @@ int MySpell::suggest(char*** slst, const char * word)
if (wlst == NULL) return 0; if (wlst == NULL) return 0;
switch(captype) { switch(captype) {
case NOCAP: { case NOCAP: {
ns = pSMgr->suggest(wlst, ns, cw); ns = pSMgr->suggest(wlst, ns, cw);
break; break;
} }
case INITCAP: { case INITCAP: {
memcpy(wspace,cw,(wl+1)); memcpy(wspace,cw,(wl+1));
mkallsmall(wspace, csconv); mkallsmall(wspace, csconv);
@ -230,29 +230,29 @@ int MySpell::suggest(char*** slst, const char * word)
for (int j=0; j < ns; j++) for (int j=0; j < ns; j++)
mkinitcap(wlst[j], csconv); mkinitcap(wlst[j], csconv);
} }
ns = pSMgr->suggest(wlst,ns,cw); ns = pSMgr->suggest(wlst,ns,cw);
break; break;
} }
case HUHCAP: { case HUHCAP: {
ns = pSMgr->suggest(wlst, ns, cw); ns = pSMgr->suggest(wlst, ns, cw);
if (ns != -1) { if (ns != -1) {
memcpy(wspace,cw,(wl+1)); memcpy(wspace,cw,(wl+1));
mkallsmall(wspace, csconv); mkallsmall(wspace, csconv);
ns = pSMgr->suggest(wlst, ns, wspace); ns = pSMgr->suggest(wlst, ns, wspace);
} }
break; break;
} }
case ALLCAP: { case ALLCAP: {
memcpy(wspace,cw,(wl+1)); memcpy(wspace,cw,(wl+1));
mkallsmall(wspace, csconv); mkallsmall(wspace, csconv);
ns = pSMgr->suggest(wlst, ns, wspace); ns = pSMgr->suggest(wlst, ns, wspace);
if (ns > 0) { if (ns > 0) {
for (int j=0; j < ns; j++) for (int j=0; j < ns; j++)
mkallcap(wlst[j], csconv); mkallcap(wlst[j], csconv);
} }
if (ns != -1) if (ns != -1)
ns = pSMgr->suggest(wlst, ns , cw); ns = pSMgr->suggest(wlst, ns , cw);
break; break;
} }
@ -262,22 +262,22 @@ int MySpell::suggest(char*** slst, const char * word)
return ns; return ns;
} }
// try ngram approach since found nothing // try ngram approach since found nothing
if (ns == 0) { if (ns == 0) {
ns = pSMgr->ngsuggest(wlst, cw, pHMgr); ns = pSMgr->ngsuggest(wlst, cw, pHMgr);
if (ns) { if (ns) {
switch(captype) { switch(captype) {
case NOCAP: break; case NOCAP: break;
case HUHCAP: break; case HUHCAP: break;
case INITCAP: { case INITCAP: {
for (int j=0; j < ns; j++) for (int j=0; j < ns; j++)
mkinitcap(wlst[j], csconv); mkinitcap(wlst[j], csconv);
} }
break; break;
case ALLCAP: { case ALLCAP: {
for (int j=0; j < ns; j++) for (int j=0; j < ns; j++)
mkallcap(wlst[j], csconv); mkallcap(wlst[j], csconv);
} }
break; break;
} }
*slst = wlst; *slst = wlst;

View File

@ -1,4 +1,4 @@
#include "license.readme" #include "license.rea"
#include <cstdlib> #include <cstdlib>
#include <cctype> #include <cctype>