(defun epsilon-greedy (epsilon)
(with-prob epsilon
(random n)
(arg-max-random-tiebreak Q)))

(defun setup ()
(setq n 10)
(setq Q (make-array n))
(setq n_a (make-array n))
(setq Q* (make-array (list n max-num-tasks)))
(setq randomness (make-array max-num-tasks))
(advance-random-state 0)
(loop for task below max-num-tasks do
(loop for a below n do
(setf (aref Q* a task) (random-normal))) (setf (aref randomness task)

(defun init ()
(loop for a below n do
(setf (aref Q a) 0.0)
(setf (aref n_a a) 0))
(setq rbar 0.0)
(setq time 0))

Page 1 Preview
