Update
I now have a whole fancy library for random sampling in Clojure, including reservoir sampling with lots of extras (with and without replacement, weighting, seeds). It's available here:https://github.com/bigmlcom/sampling
A little snippet of Clojure for reservoir sampling:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; For a fully fleshed out library, see: | |
;; https://github.com/bigmlcom/sampling | |
;; ------------------------------------- | |
(ns sample.reservoir | |
"Functions for sampling without replacement using a reservoir.") | |
(defn create | |
"Creates a sample reservoir." | |
[reservoir-size] | |
(with-meta [] {:reservoir-size reservoir-size | |
:insert-count 0})) | |
(defn insert | |
"Inserts a value into the sample reservoir." | |
[reservoir val] | |
(let [{:keys [reservoir-size insert-count]} (meta reservoir) | |
insert-count (inc insert-count) | |
index (rand-int insert-count)] | |
(with-meta (cond (< insert-count reservoir-size) | |
(conj reservoir val) | |
(= insert-count reservoir-size) | |
(shuffle (conj reservoir val)) | |
(< index reservoir-size) | |
(assoc reservoir index val) | |
:else reservoir) | |
{:reservoir-size reservoir-size | |
:insert-count insert-count}))) | |
(defn sample | |
"Returns a reservoir sample for a collection." | |
[coll reservoir-size] | |
(reduce insert (create reservoir-size) | |
coll)) |