This repository has been archived on 2024-04-08. You can view files and clone it, but cannot push or open issues or pull requests.

190 lines
5.3 KiB
C++
Raw Normal View History

2000-02-25 10:15:17 +00:00
// This may look like C code, but it is really -*- C++ -*-
// ------------------------------------------------------------------
// The Goldware Library
// Copyright (C) 1990-1999 Odinn Sorensen
// ------------------------------------------------------------------
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Library General Public
// License as published by the Free Software Foundation; either
// version 2 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Library General Public License for more details.
//
// You should have received a copy of the GNU Library General Public
// License along with this program; if not, write to the Free
// Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
// MA 02111-1307, USA
// ------------------------------------------------------------------
// $Id$
// ------------------------------------------------------------------
// Fuzzy string search.
// ------------------------------------------------------------------
//
// C++ port and cleanup by Odinn Sorensen, August 1992.
// Dusted off and re-used, March 1994.
// Converted to C++ class, December 1997.
//
// Original source: APPROX.C (found in SNIP1091).
// Original author: John Rex, August 1988.
//
// References: (1) Computer Algorithms, by Sara Baase Addison-Wesley,
// 1988, pp 242-4.
// (2) Hall PAV, Dowling GR: "Approximate string match-
// ing", ACM Computing Surveys, 12:381-402, 1980.
//
// Usage:
//
// pattern, string - Search for pattern in text
// degree - Degree of allowed mismatch (no of chars)
//
// init(pattern, degree, casing) - Setup routine
// findfirst(string) - Find first match
// findnext() - Find next match
//
// Searching is finished when findfirst/next() returns false
//
// ------------------------------------------------------------------
#include <gctype.h>
#include <gstrall.h>
#include <gmemdbg.h>
#include <gfuzzy.h>
// ------------------------------------------------------------------
gfuzzy::gfuzzy() {
ldiffs = NULL;
}
// ------------------------------------------------------------------
gfuzzy::~gfuzzy() {
throw_deletearray(ldiffs);
}
// ------------------------------------------------------------------
// Fuzzy search init
void gfuzzy::init(const char* pat, int fuzzydegree, bool case_sensitive) {
casing = case_sensitive;
degree = fuzzydegree;
pattern = pat;
plen = strlen(pattern);
ldiffs = new int [(plen+1)*4];
throw_new(ldiffs);
}
// ------------------------------------------------------------------
bool gfuzzy::findfirst(const char* string) {
textloc = -1;
text = string;
start = text;
ldiff = ldiffs;
rdiff = ldiff + plen + 1;
loffs = rdiff + plen + 1;
roffs = loffs + plen + 1;
for(int i=0; i<=plen; i++) {
rdiff[i] = i; // Initial values for right-hand column
roffs[i] = 1;
}
return findnext();
}
// ------------------------------------------------------------------
// Fuzzy search next
bool gfuzzy::findnext() {
if(start) {
start = NULL;
howclose = -1;
while(start == NULL) { // Start computing columns
if(text[++textloc] == NUL) // Out of text to search!
break;
int* temp = rdiff; // Move right-hand column to left ...
rdiff = ldiff; // ... so that we can compute new ...
ldiff = temp; // ... right-hand column
rdiff[0] = 0; // Top (boundary) row
temp = roffs; // And swap offset arrays, too
roffs = loffs;
loffs = temp;
roffs[1] = 0;
for(int i=0; i<plen; i++) { // Run through pattern
// Compute a, b, & c as the three adjacent cells ...
bool charmatch;
if(casing)
charmatch = pattern[i] == text[textloc];
else
charmatch = toupper(pattern[i]) == toupper(text[textloc]);
int a = ldiff[i] + (charmatch ? 0 : 1);
int b = ldiff[i+1] + 1;
int c = rdiff[i] + 1;
// ... now pick minimum ...
if(b < a)
a = b;
if(c < a)
a = c;
// ... and store
rdiff[i+1] = a;
}
// Now update offset array
// The values in the offset arrays are added to the
// current location to determine the beginning of the
// mismatched substring. (See refs for details)
if(plen > 1) {
for(int i=2; i<=plen; i++) {
if(ldiff[i-1] < rdiff[i])
roffs[i] = loffs[i-1] - 1;
else if(rdiff[i-1] < rdiff[i])
roffs[i] = roffs[i-1];
else if(ldiff[i] < rdiff[i])
roffs[i] = loffs[i] - 1;
else // Then we have ldiff[i-1] == rdiff[i]
roffs[i] = loffs[i-1] - 1;
}
}
// Now, do we have an approximate match?
if(rdiff[plen] <= degree) { // indeed so!
end = text + textloc;
start = end + roffs[plen];
howclose = rdiff[plen];
}
}
}
2005-10-25 06:11:09 +00:00
return make_bool(start);
2000-02-25 10:15:17 +00:00
}
// ------------------------------------------------------------------