Added support for MySpell spellchecker

This commit is contained in:
Ianos Gnatiuc
2006-03-11 17:04:06 +00:00
parent dbac4566d8
commit 8a041219e4
24 changed files with 489 additions and 198 deletions

View File

@@ -101,18 +101,23 @@
#define NL "\n"
#endif
/* ----------------------------------------------------------------*/
/* ------------------------------------------------------------------
// Spellchecker */
#if !defined(__WIN32__) && !defined(GCFG_NO_MSSPELL)
#define GCFG_NO_MSSPELL
#endif
#if !defined(GCFG_NO_MSSPELL) && !defined(GCFG_NO_MYSPELL)
#define GCFG_SPELL_INCLUDED
#endif
/* ------------------------------------------------------------------ */
#if defined(_MSC_VER)
#define popen(f,m) _popen(f,m)
#define pclose(fh) _pclose(fh)
#endif
/* ------------------------------------------------------------------
// Spellchecker using dynamic library load */
#if !defined(GCFG_NOSPELLDLL) && !defined(__WIN32__)
#undef GCFG_NOSPELLDLL
#endif
/* ------------------------------------------------------------------
// Special character constants */

View File

@@ -24,8 +24,6 @@
// SpellChecker functions.
// ------------------------------------------------------------------
#if !defined(GCFG_NOSPELLDLL)
#if defined(_MSC_VER)
/* C4786: 'identifier' : identifier was truncated to 'number'
characters in the debug information
@@ -35,13 +33,38 @@
#include <vector>
#include <gdirposx.h>
#include <gstrall.h>
#include <myspell.hxx>
#include <gespell.h>
typedef char XlatName[17];
typedef byte ChsTab[4];
struct Chs
{
long id;
int version;
int level;
XlatName imp; // From Charset
XlatName exp; // To Charset
ChsTab t[256]; // The Translation Table
};
int LoadCharset(const char* imp, const char* exp, int query = 0);
char* XlatStr(char* dest, const char* src, int level, Chs* chrtbl, int qpencoded=false, bool i51=false);
extern Chs* CharTable;
// ------------------------------------------------------------------
#if defined(__WIN32__)
#if defined(GCFG_SPELL_INCLUDED)
// ------------------------------------------------------------------
#if !defined(GCFG_NO_MSSPELL)
// ------------------------------------------------------------------
@@ -50,7 +73,6 @@
#define CHECK_NULL(ptr, jump) if (ptr == NULL) goto jump
#define CHECK_SEC(jump) if ((sec & 0xFF) != SC_SEC_NoErrors) goto jump
// ------------------------------------------------------------------
const char SC_RKEY_Prooftools[] = "Software\\Microsoft\\Shared Tools\\Proofing Tools";
@@ -163,7 +185,7 @@ const dword SC_SO_RussianIO = 0x20000000;
// ------------------------------------------------------------------
bool CSpellLang::Init(HKEY hKey, const char *name)
bool CMSSpellLang::Init(HKEY hKey, const char *name)
{
bool result = false;
int error;
@@ -175,8 +197,13 @@ bool CSpellLang::Init(HKEY hKey, const char *name)
unsigned long dsize = sizeof(dictionary);
mLIDC = atoi(name);
strcpy(mLangCode, name);
error = RegOpenKeyEx(hKey, name, 0, KEY_READ, &hKeyLang);
char name2[1024];
strcpy(name2, name);
strcat(name2, "\\Normal");
error = RegOpenKeyEx(hKey, name2, 0, KEY_READ, &hKeyLang);
CHECK_ERROR(cleanup0);
error = RegQueryValueEx(hKeyLang, "Engine", NULL, NULL, engine, &esize);
@@ -198,7 +225,7 @@ cleanup0:
// ------------------------------------------------------------------
bool CSpellLang::Load(const char *userdic)
bool CMSSpellLang::Load(const char *codeset, const char *userdic)
{
bool result = false;
@@ -267,18 +294,27 @@ cleanup1:
FreeLibrary(mLibrary);
mLibrary = NULL;
cleanup0:
if (result)
{
BuildRTable(codeset);
mIsMdrLoaded = (mSIB.cMdr != 0);
mIsUdrLoaded = (mSIB.cUdr != 0);
}
return result;
}
// ------------------------------------------------------------------
void CSpellLang::UnLoad()
void CMSSpellLang::UnLoad()
{
if (!mLibrary) return;
if (mSIB.cUdr) mSpellCloseUdr(mSLID, mUDR, TRUE);
if (mSIB.cMdr) mSpellCloseMdr(mSLID, &mMDRS);
if (mToDicTable) delete mToDicTable;
if (mToLocTable) delete mToLocTable;
mToDicTable = mToLocTable = NULL;
FreeLibrary(mLibrary);
mLibrary = NULL;
@@ -287,9 +323,74 @@ void CSpellLang::UnLoad()
// ------------------------------------------------------------------
bool CSpellLang::SpellCheck(const char *text)
void CMSSpellLang::BuildRTable(const char *codeset)
{
if (!IsLoaded()) return true;
char codeset2[20];
sprintf(codeset2, "CP%i", GetACP());
LoadCharset(codeset, codeset2);
mToDicTable = new Chs;
memset(mToDicTable, 0, sizeof(Chs));
*mToDicTable = CharTable ? *CharTable : *mToDicTable;
LoadCharset(codeset2, codeset);
mToLocTable = new Chs;
memset(mToLocTable, 0, sizeof(Chs));
*mToLocTable = CharTable ? *CharTable : *mToLocTable;
}
// ------------------------------------------------------------------
void CMSSpellLang::RecodeText(const char *srcText, char *dstText, bool flag)
{
if (flag)
XlatStr(dstText, srcText, mToDicTable->level, mToDicTable);
else
XlatStr(dstText, srcText, mToLocTable->level, mToLocTable);
}
// ------------------------------------------------------------------
void CMSSpellLang::BuildSuggest(const char *text, CSpellSuggestV &suggest)
{
if (!SpellSuggest(text, false)) return;
bool flag = true;
bool more = false;
for (int idx = 0; idx < mSRB.cChrMac; idx++)
{
if (mSZ[idx] == 0) { idx++; flag = true; }
if (flag && mSZ[idx])
{
flag = false;
RecodeText(&mSZ[idx], &mSZ[idx], false);
suggest.push_back(std::pair<byte, std::string>(0, " " + std::string(&mSZ[idx]) + char(' ')));
}
else if (!more && !mSZ[idx])
{
more = true;
if (!SpellSuggest(text, more = true))
return;
else
{
flag = true;
idx = -1;
}
}
}
}
// ------------------------------------------------------------------
bool CMSSpellLang::SpellCheck(const char *text)
{
if (!IsMdrLoaded()) return true;
mSIB.wSpellState = 0;
mSIB.lrgChr = (char*)text;
@@ -309,9 +410,9 @@ bool CSpellLang::SpellCheck(const char *text)
// ------------------------------------------------------------------
bool CSpellLang::SpellSuggest(const char *text, bool more)
bool CMSSpellLang::SpellSuggest(const char *text, bool more)
{
if (!IsLoaded()) return false;
if (!IsMdrLoaded()) return false;
mSIB.wSpellState = 0;
mSIB.lrgChr = (char*)text;
@@ -331,14 +432,146 @@ bool CSpellLang::SpellSuggest(const char *text, bool more)
// ------------------------------------------------------------------
bool CSpellLang::AddWord(const char *text)
bool CMSSpellLang::AddWord(const char *text)
{
if (!IsLoaded()) return false;
if (!IsMdrLoaded()) return false;
SEC error = mSpellAddUdr(mSLID, mUDR, (char*)text);
return (error & 0xFF) == 0;
}
// ------------------------------------------------------------------
#endif //#if !defined(GCFG_NO_MSSPELL)
// ------------------------------------------------------------------
#if !defined(GCFG_NO_MYSPELL)
// ------------------------------------------------------------------
bool CMYSpellLang::Init(const gdirentry *entry)
{
gposixdir dir(entry->dirname);
std::string affname = entry->name.substr(0, entry->name.length()-4);
strcpy(mLangCode, affname.c_str());
const gdirentry *entry2 = dir.nextentry((affname+".aff").c_str(), true);
if (entry2)
{
strcpy(mEngine, entry2->dirname);
strcat(mEngine, "/");
strcat(mEngine, entry2->name.c_str());
strcpy(mDictionary, entry->dirname);
strcat(mDictionary, "/");
strcat(mDictionary, entry->name.c_str());
return true;
}
return false;
}
// ------------------------------------------------------------------
bool CMYSpellLang::Load(const char *codeset, const char *)
{
mMSpell = new MySpell(mEngine, mDictionary);
if (mMSpell)
{
BuildRTable(codeset);
return (mIsMdrLoaded = true);
}
return false;
}
// ------------------------------------------------------------------
void CMYSpellLang::UnLoad()
{
if (!mMSpell) return;
delete mMSpell;
mMSpell = NULL;
if (mToDicTable) delete mToDicTable;
if (mToLocTable) delete mToLocTable;
mToDicTable = mToLocTable = NULL;
}
// ------------------------------------------------------------------
void CMYSpellLang::BuildRTable(const char *codeset)
{
LoadCharset(codeset, mMSpell->get_dic_encoding());
mToDicTable = new Chs;
memset(mToDicTable, 0, sizeof(Chs));
*mToDicTable = CharTable ? *CharTable : *mToDicTable;
LoadCharset(mMSpell->get_dic_encoding(), codeset);
mToLocTable = new Chs;
memset(mToLocTable, 0, sizeof(Chs));
*mToLocTable = CharTable ? *CharTable : *mToLocTable;
}
// ------------------------------------------------------------------
void CMYSpellLang::RecodeText(const char *srcText, char *dstText, bool flag)
{
if (flag)
XlatStr(dstText, srcText, mToDicTable->level, mToDicTable);
else
XlatStr(dstText, srcText, mToLocTable->level, mToLocTable);
}
// ------------------------------------------------------------------
void CMYSpellLang::BuildSuggest(const char *text, CSpellSuggestV &suggest)
{
char ** wlst = NULL;
int ns = mMSpell->suggest(&wlst, text);
for (int i=0; i < ns; i++)
{
char buff[1024];
RecodeText(wlst[i], buff, false);
suggest.push_back(std::pair<byte, std::string>(0, " " + std::string(buff) + char(' ')));
free(wlst[i]);
}
free(wlst);
}
// ------------------------------------------------------------------
bool CMYSpellLang::SpellCheck(const char *text)
{
if (!IsMdrLoaded()) return true;
if (mMSpell->spell(text))
return true;
return false;
}
// ------------------------------------------------------------------
bool CMYSpellLang::SpellSuggest(const char *text, bool more)
{
return false;
}
// ------------------------------------------------------------------
CSpellChecker::CSpellChecker()
@@ -348,10 +581,18 @@ CSpellChecker::CSpellChecker()
mText[0] = 0;
}
// ------------------------------------------------------------------
bool CSpellChecker::Init()
#endif //#if !defined(GCFG_NO_MYSPELL)
// ------------------------------------------------------------------
bool CSpellChecker::Init(const char *codeset, const char *dicPath)
{
#if !defined(GCFG_NO_MSSPELL)
int error;
unsigned long index = 0;
@@ -372,8 +613,12 @@ bool CSpellChecker::Init()
error = RegEnumKeyEx(hKeySpelling, index, name, &nsize, NULL, NULL, NULL, NULL);
if (error == ERROR_SUCCESS)
{
strcat(name, "\\Normal");
AddLanguage(hKeySpelling, name);
CMSSpellLang *lang = new CMSSpellLang;
if (lang->Init(hKeySpelling, name))
mLangs.push_back(lang);
else
delete lang;
index++;
}
}
@@ -383,6 +628,27 @@ bool CSpellChecker::Init()
cleanup1:
RegCloseKey(hKeyPTools);
cleanup0:
#endif //#if !defined(GCFG_NO_MSSPELL)
#if !defined(GCFG_NO_MYSPELL)
gposixdir d(dicPath);
const gdirentry *de;
while ((de = d.nextentry("*.dic", true)) != NULL)
{
CMYSpellLang *lang = new CMYSpellLang;
if (lang->Init(de))
mLangs.push_back(lang);
else
delete lang;
}
#endif //#if !defined(GCFG_NO_MSSPELL)
strcpy(mXlatLocalset, codeset);
return mInited = (mLangs.size() > 0);
}
@@ -400,18 +666,19 @@ void CSpellChecker::Close()
// ------------------------------------------------------------------
bool CSpellChecker::Load(LIDC lidc, const char *userdic)
bool CSpellChecker::Load(const char *langId, const char *userDic)
{
if (!IsInited()) return false;
if (IsLoaded() && (mLang->GetLangCode() == lidc)) return true;
if (IsLoaded() && streql(mLang->GetLangCode(), langId)) return true;
CSpellLangV::iterator it;
for (it = mLangs.begin(); it != mLangs.end(); it++)
{
if ((it->GetLangCode() == lidc) && it->Load(userdic))
if (streql((*it)->GetLangCode(), langId) && (*it)->Load(mXlatLocalset, userDic))
{
UnLoad();
mLang = it;
mLang = *it;
break;
}
}
@@ -436,7 +703,7 @@ bool CSpellChecker::Check(const char *text)
{
if (!IsLoaded()) return true;
OemToChar(text, mText);
mLang->RecodeText(text, mText, true);
return mLang->SpellCheck(mText);
}
@@ -447,34 +714,8 @@ CSpellSuggestV &CSpellChecker::Suggest()
{
mSuggest.clear();
if (!IsLoaded()) return mSuggest;
if (!mLang->SpellSuggest(mText, false)) return mSuggest;
bool flag = true;
bool more = false;
for (int idx = 0; idx < mLang->mSRB.cChrMac; idx++)
{
if (mLang->mSZ[idx] == 0) { idx++; flag = true; }
if (flag && mLang->mSZ[idx])
{
flag = false;
CharToOem(&mLang->mSZ[idx], &mLang->mSZ[idx]);
mSuggest.push_back(std::pair<byte, std::string>(0, " " + std::string(&mLang->mSZ[idx]) + char(' ')));
}
else if (!more && !mLang->mSZ[idx])
{
more = true;
if (!mLang->SpellSuggest(mText, more = true))
return mSuggest;
else
{
flag = true;
idx = -1;
}
}
}
mLang->BuildSuggest(mText, mSuggest);
return mSuggest;
}
@@ -489,11 +730,6 @@ CSpellSuggestV &CSpellChecker::Suggest()
// ------------------------------------------------------------------
#endif // #if defined(__WIN32__)
// ------------------------------------------------------------------
#endif // #if !defined(GCFG_NOSPELLDLL)
#endif //#if defined(GCFG_SPELL_INCLUDED)
// ------------------------------------------------------------------

View File

@@ -30,16 +30,66 @@
// ------------------------------------------------------------------
#if !defined(GCFG_NOSPELLDLL)
#if defined(GCFG_SPELL_INCLUDED)
// ------------------------------------------------------------------
#if defined(__WIN32__)
#if defined (__WIN32__)
#include <windows.h>
#endif
// ------------------------------------------------------------------
typedef std::vector< std::pair<byte, std::string> > CSpellSuggestV;
struct Chs;
class CSpellLang
{
friend class CSpellChecker;
protected:
bool mIsMdrLoaded;
bool mIsUdrLoaded;
char mLangCode[100];
char mEngine[_MAX_PATH];
char mDictionary[_MAX_PATH];
Chs *mToDicTable;
Chs *mToLocTable;
public:
CSpellLang()
{
mIsMdrLoaded = mIsUdrLoaded = false;
mToDicTable = mToLocTable = NULL;
}
virtual ~CSpellLang() {}
void Close() { UnLoad(); }
virtual bool Load(const char *codeset, const char *userdic) = 0;
virtual void UnLoad() = 0;
virtual void BuildRTable(const char *codeset) = 0;
virtual void RecodeText(const char *srcText, char *dstText, bool flag) = 0;
virtual void BuildSuggest(const char *text, CSpellSuggestV &suggest) = 0;
virtual bool SpellCheck(const char *text) = 0;
virtual bool SpellSuggest(const char *text, bool more) = 0;
virtual bool AddWord(const char *text) = 0;
bool IsMdrLoaded() { return mIsMdrLoaded; }
bool IsUdrLoaded() { return mIsUdrLoaded; }
const char *GetLangCode() { return mLangCode; }
};
// ------------------------------------------------------------------
#if !defined(GCFG_NO_MSSPELL)
typedef uint32_t MDR; // Main Dictionary Reference
typedef uint32_t UDR; // User Dictionary Reference
typedef uint16_t SEC; // Spell Error Code
@@ -113,7 +163,10 @@ typedef SEC (*SpellGetSizeUdr_fn ) (SLID, UDR, word*);
typedef SEC (*SpellGetListUdr_fn ) (SLID, UDR, word, SRB*);
typedef SEC (*SpellVerifyMdr_fn ) (char*, LIDC, LIDC*);
class CSpellLang
// ------------------------------------------------------------------
class CMSSpellLang: public CSpellLang
{
friend class CSpellChecker;
@@ -128,9 +181,6 @@ private:
char mSZ[1024];
byte mRate[1024];
char mEngine[_MAX_PATH];
char mDictionary[_MAX_PATH];
HINSTANCE mLibrary;
SpellVer_fn mSpellVer;
@@ -151,56 +201,86 @@ private:
SpellVerifyMdr_fn mSpellVerifyMdr;
public:
CSpellLang() { mLibrary = NULL; }
~CSpellLang() { Close(); }
CMSSpellLang() { mLibrary = NULL; }
~CMSSpellLang() { Close(); }
bool Init(HKEY hKey, const char *name);
void Close() { UnLoad(); }
bool Load(const char *userdic);
void UnLoad();
virtual bool Load(const char *codeset, const char *userdic);
virtual void UnLoad();
bool SpellCheck(const char *text);
bool SpellSuggest(const char *text, bool more);
bool AddWord(const char *text);
virtual void BuildRTable(const char *codeset);
virtual void RecodeText(const char *srcText, char *dstText, bool flag);
LIDC GetLangCode() { return mLIDC; }
virtual void BuildSuggest(const char *text, CSpellSuggestV &suggest);
bool IsLoaded() { return mLibrary != NULL; }
bool IsUdrOpened() { return mSIB.cUdr != 0; }
virtual bool SpellCheck(const char *text);
virtual bool SpellSuggest(const char *text, bool more);
virtual bool AddWord(const char *text);
};
#endif //#if !defined(GCFG_NO_MSSPELL)
typedef std::vector< std::pair<byte, std::string> > CSpellSuggestV;
typedef std::vector<CSpellLang> CSpellLangV;
#if !defined(GCFG_NO_MYSPELL)
class gdirentry;
class MySpell;
class CMYSpellLang: public CSpellLang
{
friend class CSpellChecker;
private:
MySpell *mMSpell;
public:
CMYSpellLang() { mMSpell = NULL; }
~CMYSpellLang() { Close(); }
bool Init(const gdirentry *entry);
virtual bool Load(const char *codeset, const char *);
virtual void UnLoad();
virtual void BuildRTable(const char *codeset);
virtual void RecodeText(const char *srcText, char *dstText, bool flag);
virtual void BuildSuggest(const char *text, CSpellSuggestV &suggest);
virtual bool SpellCheck(const char *text);
virtual bool SpellSuggest(const char *text, bool more);
virtual bool AddWord(const char *) { return false; }
};
#endif //#if !defined(GCFG_NO_MYSPELL)
typedef std::vector<CSpellLang*> CSpellLangV;
class CSpellChecker
{
private:
int mDicType;
bool mInited;
char mText[1024];
char mXlatLocalset[256];
CSpellLang *mLang;
CSpellLangV mLangs;
CSpellSuggestV mSuggest;
private:
void AddLanguage(HKEY hKey, const char *name)
{
CSpellLang lang;
if (lang.Init(hKey, name))
mLangs.push_back(lang);
}
void MSAddLanguage(HKEY hKey, const char *name);
public:
CSpellChecker();
~CSpellChecker() { Close(); }
bool Init();
bool Init(const char *codeset, const char *dicPath);
void Close();
bool Load(LIDC lidc, const char *userdic = NULL);
bool Load(const char *langId, const char *userDic);
void UnLoad();
bool Check(const char *text);
@@ -212,8 +292,8 @@ public:
CSpellSuggestV &GetSuggest() { return mSuggest; }
CSpellLangV &GetLangs() { return mLangs; }
LIDC GetLangCode() { return IsLoaded() ? mLang->GetLangCode() : 0xFFFF; }
bool IsUdrOpened() { return IsLoaded() ? mLang->IsUdrOpened() : false; }
const char *GetLangCode() { return IsLoaded() ? mLang->GetLangCode() : "?*N/A*?"; }
bool IsUdrLoaded() { return IsLoaded() ? mLang->IsUdrLoaded() : false; }
bool IsInited() { return mInited; }
bool IsLoaded() { return mLang != NULL; }
@@ -222,12 +302,7 @@ public:
// ------------------------------------------------------------------
#endif //#if defined(__WIN32__)
// ------------------------------------------------------------------
#endif //#if !defined(GCFG_NOSPELLDLL)
#endif //#if defined(GCFG_SPELL_INCLUDED)
// ------------------------------------------------------------------