(define (edit-distance source target)
; Return the minimum edit distance that changes the given source string into
; the given target string. Matches cost nothing; insertions and deletions
; cost 1.
(let*
((slen (string-length source))
(tlen (string-length target))
(infinity (+ slen tlen 1))
(tbl (make-table slen tlen))
(src (list->vector (string->list source)))
(tgt (list->vector (string->list target))))
(do ((s 0 (+ s 1))) ((>= s slen) (tbl 'get slen tlen))
(let
((s-incr (+ s 1))
(s-char (vector-ref src s)))
(do ((t 0 (+ t 1))) ((>= t tlen))
(let
((t-incr (+ t 1)))
(tbl 'set! s-incr t-incr
(min (if (char=? s-char (vector-ref tgt t))
(tbl 'get s t) infinity)
(+ (tbl 'get s t-incr) 1)
(+ (tbl 'get s-incr t) 1)))))))))
Table is an auxiliary data structure storing intermediate values used during the edit-distance calculation. A table is a matrix sized and initialized for finding edit distances and implemented as a vector with the usual index addressing. A table is packaged as a quasi-object using the simple-object idiom.
(define (make-table rows cols)
; Return a table object initialized for computing minimum edit distances
; using dynamic programming.
(let*
((rws (+ 1 rows))
(cls (+ 1 cols))
(tbl (make-vector (* rws cls)))
(idx (lambda (r c) (+ (* r cls) c)))
(get (lambda (r c) (vector-ref tbl (idx r c))))
(set! (lambda (r c v) (vector-set! tbl (idx r c) v))))
(do ((r 0 (+ 1 r))) ((>= r rws)) (set! r 0 r))
(do ((c 0 (+ 1 c))) ((>= c cls)) (set! 0 c c))
(lambda (op . args)
(let
((arg-cnt (length args)))
(if (< arg-cnt 2)
(error
(format
"at least two table-operation arguments expected, ~a given"
arg-len))
(let
((r (car args))
(c (cadr args)))
(case op
((get)
(if (= arg-cnt 2)
(get r c)
(error
(format "get needs two arguments, ~a given" arg-cnt))))
((set!)
(if (= arg-cnt 3)
(set! r c (caddr args))
(error
(format "set! needs three arguments, ~a given" arg-cnt))))
(else
(error
(format "unrecognized table operation \"~a\"" op))))))))))
Using a table isn't necessary because edit-distance returns only the cost of the edits and not the edits themselves. edit-distance can be rewritten to take advantage of the property that the only information needed to compute a row of the table is the previous row:
(define (edit-distance source target)
; Return the minimum edit distance that changes the given source string into
; the given target string. Matches cost nothing; insertions and deletions
; cost 1.
(let*
((slen (string-length source))
(tlen (string-length target))
(infinity (+ slen tlen 1))
(row1 (make-vector (+ tlen 1)))
(row2 (make-vector (+ tlen 1)))
(src (list->vector (string->list source)))
(tgt (list->vector (string->list target))))
(do ((t 0 (+ t 1))) ((> t tlen)) (vector-set! row1 t t))
(let along-source ((s 1) (old-row row1) (new-row row2))
(if (> s slen)
(vector-ref old-row tlen)
(let
((s-char (vector-ref src (- s 1))))
(vector-set! new-row 0 s)
(let along-target ((t 0))
(if (>= t tlen)
(along-source (+ s 1) new-row old-row)
(let
((t-incr (+ t 1)))
(vector-set! new-row t-incr
(min (if (char=? s-char (vector-ref tgt t))
(vector-ref old-row t) infinity)
(+ (vector-ref old-row t-incr) 1)
(+ (vector-ref new-row t) 1)))
(along-target t-incr)))))))))
Just in case you were wondering, here's the results of some simple srfi:78 tests when str is "hello":
(edit-distance str str) => 0 ; correct (edit-distance str "") => 5 ; correct (edit-distance "" str) => 5 ; correct (edit-distance str (string-upcase str)) => 10 ; correct (edit-distance (string-upcase str) str) => 10 ; correct (edit-distance str (substring str 1)) => 1 ; correct (edit-distance (substring str 1) str) => 1 ; correct