/* * wcsdiff.c * * 2002-06-15 Fredrik Roubert * 2004-05-13 Fredrik Roubert * * * Implementing "Dynamic Programming Algorithm (DPA) for Edit-Distance", * as described on: * * http://www.csse.monash.edu.au/~lloyd/tildeAlgDS/Dynamic/Edit/ * */ #include #include #if DMALLOC # define DMALLOC_FUNC_CHECK # include #endif wchar_t * /* returned string should be free()ed */ wcsdiff(const wchar_t *s1, /* string to compare against */ const wchar_t *s2, /* string to be compared */ const wchar_t *s_begin, /* string to insert on begin of error */ const wchar_t *s_end, /* string to insert on end of error */ const wchar_t *s_space, /* string to insert for missing character */ int *distance) /* where to store the edit distance */ { size_t l1, l2, i, j, p, l_begin, l_end, l_space, l_tmp; int *dm, d, c, e; wchar_t *tmp; l1 = wcslen(s1); l2 = wcslen(s2); l_begin = wcslen(s_begin); l_end = wcslen(s_end); l_space = wcslen(s_space); if ((dm = malloc((l1 + 1) * (l2 + 1) * sizeof (int))) == NULL) return NULL; #define DM(X, Y) dm[(l2 + 1) * (X) + (Y)] for (j = 0; j <= l2; j ++) DM(0, j) = j; for (i = 0; i < l1; i ++) { DM(i + 1, 0) = i + 1; for (j = 0; j < l2; j ++) { d = DM(i, j); if (s1[i] != s2[j]) d ++; #define MIN(A, B) ((A) < (B) ? (A) : (B)) DM(i + 1, j + 1) = MIN(d, MIN(DM(i, j + 1), DM(i + 1, j)) + 1); #undef MIN } } /* return the edit distance */ if (distance != NULL) *distance = DM(l1, l2); /* length of temporary string, including terminating null character */ l_tmp = 1 + l2 + DM(l1, l2) * (l_begin + l_end + l_space); if ((tmp = malloc(l_tmp * sizeof (wchar_t))) == NULL) { free(dm); return NULL; } p = l_tmp - 1; tmp[p] = L'\0'; for (i = l1, j = l2, e = 0; i > 0 || j > 0; ) { if (i > 0 && j > 0) { d = DM(i - 1, j - 1); if (s1[i - 1] != s2[j - 1]) { d ++; c = 1; } else c = 0; if (DM(i, j) == d) { i --; j --; if (! e && c) { e = 1; p -= l_end; wmemcpy(tmp + p, s_end, l_end); } if (e && ! c) { e = 0; p -= l_begin; wmemcpy(tmp + p, s_begin, l_begin); } tmp[-- p] = s2[j]; continue; } } if (! e) { e = 1; p -= l_end; wmemcpy(tmp + p, s_end, l_end); } if (j == 0 || (i > 0 && DM(i, j) == DM(i - 1, j) + 1)) { i --; p -= l_space; wmemcpy(tmp + p, s_space, l_space); } else { j --; tmp[-- p] = s2[j]; } } if (e) { p -= l_begin; wmemcpy(tmp + p, s_begin, l_begin); } free(dm); #undef DM if (p > 0) { l_tmp -= p; wmemmove(tmp, tmp + p, l_tmp); tmp = realloc(tmp, l_tmp * sizeof (wchar_t)); } return tmp; }