# streaming popularity

Find the n most frequently occurring objects in an object stream a couple of ways.

The choice of uniformly distributed random data is dumb for this problem. For example, it can cause the Misra-Gries algorithm to return fewer than the n most frequently occurring objects.

``` \$ cat tn.scm

(define (data-counts n)

; Return an item-frequency counter capable of keeping track of
; n items.

(let*
((data (make-hash-table n))
(data-count 0)

(pick-entry (lambda ()

; Return an arbitrary entry in the table as a (key
; . value) pair.  Assumes the table is not empty.

(catch 'out
(lambda ()
(hash-fold
(lambda (key val acc) (throw 'out (cons key val)))
'() data)
(error "item set shouldn't be empty, but it is"))
(lambda (tag p) p))))

(minimum-entry (lambda ()

; Return a (key . value) pair representing an entry with
; the minimum count.  Assumes the table is not empty.

(let

((f (lambda (key val min)
(if (< val (cdr min)) (cons key val) min))))

(hash-fold f (pick-entry) data)))))

(lambda (cmd . args)
(cond

; Add the given item to the table with a count of 1.
; Assumes the item is not in the table.

(if (= data-count n)
(error "adding an item to a full table"))
(hash-set! data (car args) 1)
(set! data-count (+ data-count 1)))

; Remove a item with the lowest count.  Assumes the
; table's not empty.

((eq? 'delete-minimum cmd)
(hash-remove! data (car (minimum-entry)))
(set! data-count (- data-count 1)))

; If the given item is not in the table, return false,
; otherwise increase the item's count by one and return
; true.

((eq? 'increase-if-included cmd)
(let*
((key (car args))
(val (hash-ref data key)))

(when val
(hash-set! data key (+ val 1)))

val))

; If the given item is not in the table, return false,
; otherwise increase the item's count by one and return
; true.

((eq? 'print cmd)
(let*
((f (lambda (k v acc)
(cons (cons k (car acc)) (cons v (cdr acc)))))

(p (lambda (lst)
(let loop ((l lst))
(unless (null? l)
(format #t "~4d" (car l))
(loop (cdr l))))
(display #\newline)))

(ht (hash-fold f (cons '() '()) data)))

(p (car ht))
(p (cdr ht))))

; Reduce the item counts by one, throwing away items that
; have non-positive counts.  Return true iff items were
; thrown away.

((eq? 'reduce-counts cmd)
(let
((old-data-count data-count)
(f (lambda (key val acc)
(when (> val 1)
(hash-set! acc key (- val 1))
(set! data-count (+ data-count 1)))
acc)))

(set! data-count 0)
(set! data (hash-fold f (make-hash-table n) data))
(< data-count old-data-count)))

; Return true iff there's space for more items

((eq? 'space-available cmd)
(< data-count n))

(#t
(error "unexpected data-counts command: " cmd))))))

(define (maa-top-n data counts)

(define (place e)
(unless (counts 'increase-if-included e)
(unless (counts 'space-available)
(counts 'delete-minimum))

(for-each place data)
counts)

(define (mg-top-n data counts)

(define (place e)
(unless (counts 'increase-if-included e)
(when (or (counts 'space-available) (counts 'reduce-counts))

(for-each place data)
counts)

(define (test-data n t c)

(define (mvec n t)

(let loopo ((k 0) (lo '()))
(if (>= k t)
(vector->list (randomly-permute-vector (list->vector lo)))
(let loopm ((i 0) (lm lo))
(if (>= i n)
(loopo (+ k 1) lm)
(let loopi ((j 0) (li lm))
(if (> j i)
(loopm (+ i 1) li)
(loopi (+ j 1) (cons i li)))))))))

(let ((l (mvec n t)))
((mg-top-n l (data-counts c)) 'print)
(display #\newline)
((maa-top-n l (data-counts c)) 'print)))

\$ guile
scheme@(guile-user)> (test-data 10 1 10)
9   8   7   6   5   4   3   2   1   0
10   9   8   7   6   5   4   3   2   1

9   8   7   6   5   4   3   2   1   0
10   9   8   7   6   5   4   3   2   1
scheme@(guile-user)> (test-data 10 1 9)
9   8   7   6   5   4   3   2   1
8   7   6   5   4   3   2   1   1

9   8   7   6   5   4   3   2   1
10   9   8   7   6   5   4   3   1
scheme@(guile-user)> (test-data 10 1 5)
9   8   6   5   3
3   2   2   2   1

8   7   6   5   4
9   8   7   1   5
scheme@(guile-user)> (test-data 10 2 5)
8   7   4   3
2   1   1   1

9   8   7   6   4
20  18   1  14   7
scheme@(guile-user)>

\$
```