edit-distance


 (define (edit-distance source target) 
  
   ; Return the minimum edit distance that changes the given source string into 
   ; the given target string.  Matches cost nothing; insertions and deletions 
   ; cost 1. 
  
   (let* 
  
     ((slen (string-length source)) 
      (tlen (string-length target)) 
      (infinity (+ slen tlen 1)) 
  
      (tbl (make-table slen tlen)) 
      (src (list->vector (string->list source))) 
      (tgt (list->vector (string->list target)))) 
  
     (do ((s 0 (+ s 1))) ((>= s slen) (tbl 'get slen tlen)) 
       (let 
  
         ((s-incr (+ s 1)) 
          (s-char (vector-ref src s))) 
  
         (do ((t 0 (+ t 1))) ((>= t tlen)) 
           (let 
  
             ((t-incr (+ t 1))) 
  
             (tbl 'set! s-incr t-incr 
                  (min (if (char=? s-char (vector-ref tgt t)) 
                         (tbl 'get s t) infinity) 
                       (+ (tbl 'get s t-incr) 1) 
                       (+ (tbl 'get s-incr t) 1))))))))) 

Table is an auxiliary data structure storing intermediate values used during the edit-distance calculation. A table is a matrix sized and initialized for finding edit distances and implemented as a vector with the usual index addressing. A table is packaged as a quasi-object using the simple-object idiom.

 (define (make-table rows cols) 
  
   ; Return a table object initialized for computing minimum edit distances 
   ; using dynamic programming. 
  
   (let* 
  
     ((rws (+ 1 rows)) 
      (cls (+ 1 cols)) 
  
      (tbl (make-vector (* rws cls))) 
  
      (idx (lambda (r c) (+ (* r cls) c))) 
      (get (lambda (r c) (vector-ref tbl (idx r c)))) 
      (set! (lambda (r c v) (vector-set! tbl (idx r c) v)))) 
  
     (do ((r 0 (+ 1 r))) ((>= r rws)) (set! r 0 r)) 
     (do ((c 0 (+ 1 c))) ((>= c cls)) (set! 0 c c)) 
  
     (lambda (op . args) 
  
       (let 
  
         ((arg-cnt (length args))) 
  
         (if (< arg-cnt 2) 
  
           (error 
             (format 
               "at least two table-operation arguments expected, ~a given" 
               arg-len)) 
  
           (let 
  
             ((r (car args)) 
              (c (cadr args))) 
  
             (case op 
  
               ((get) 
  
                  (if (= arg-cnt 2) 
                    (get r c) 
                    (error 
                      (format "get needs two arguments, ~a given" arg-cnt)))) 
  
               ((set!) 
  
                  (if (= arg-cnt 3) 
                    (set! r c (caddr args)) 
                    (error 
                      (format "set! needs three arguments, ~a given" arg-cnt)))) 
  
               (else 
  
                 (error 
                   (format "unrecognized table operation \"~a\"" op)))))))))) 

Using a table isn't necessary because edit-distance returns only the cost of the edits and not the edits themselves. edit-distance can be rewritten to take advantage of the property that the only information needed to compute a row of the table is the previous row:

 (define (edit-distance source target) 
  
   ; Return the minimum edit distance that changes the given source string into 
   ; the given target string.  Matches cost nothing; insertions and deletions 
   ; cost 1. 
  
   (let* 
  
     ((slen (string-length source)) 
      (tlen (string-length target)) 
      (infinity (+ slen tlen 1)) 
  
      (row1 (make-vector (+ tlen 1))) 
      (row2 (make-vector (+ tlen 1))) 
      (src (list->vector (string->list source))) 
      (tgt (list->vector (string->list target)))) 
  
     (do ((t 0 (+ t 1))) ((> t tlen)) (vector-set! row1 t t)) 
  
     (let along-source ((s 1) (old-row row1) (new-row row2)) 
  
       (if (> s slen) 
  
         (vector-ref old-row tlen) 
  
         (let 
  
           ((s-char (vector-ref src (- s 1)))) 
  
           (vector-set! new-row 0 s) 
  
           (let along-target ((t 0)) 
  
             (if (>= t tlen) 
  
               (along-source (+ s 1) new-row old-row) 
  
               (let 
  
                 ((t-incr (+ t 1))) 
  
                 (vector-set! new-row t-incr 
                   (min (if (char=? s-char (vector-ref tgt t)) 
                          (vector-ref old-row t) infinity) 
                        (+ (vector-ref old-row t-incr) 1) 
                        (+ (vector-ref new-row t) 1))) 
  
                 (along-target t-incr))))))))) 

Just in case you were wondering, here's the results of some simple srfi:78 tests when str is "hello":

 (edit-distance str str) => 0 ; correct 
  
 (edit-distance str "") => 5 ; correct 
  
 (edit-distance "" str) => 5 ; correct 
  
 (edit-distance str (string-upcase str)) => 10 ; correct 
  
 (edit-distance (string-upcase str) str) => 10 ; correct 
  
 (edit-distance str (substring str 1)) => 1 ; correct 
  
 (edit-distance (substring str 1) str) => 1 ; correct 

category-code