-
Notifications
You must be signed in to change notification settings - Fork 0
/
sim_kcnab_lrcr.m
executable file
·65 lines (55 loc) · 1.84 KB
/
sim_kcnab_lrcr.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
function [params, ...
S, ...
A, ...
R, ...
RPE] = sim_kcnab_lrcr(n_arms, ... % (int>0) # Arms
n_contexts, ... % (int>0) # Contexts
n_subjects, ... % (int>0) # Subjects
n_trials, ... % (int>0) # Trials
sd_r, ... % (float>0) # SD of P(reward)
noise_r) ... % ('t','f') # Reward sampled?
%SIM_NAB Simulates an N-Armed Bandit with
% Learning rate
% Choice randomness
%
% Returns
% 1. structural array of results for subjects
% 2. n_subjects x 2 array of model parameters [lr, cr]
%
% Abraham Nunes (Last Updated Nov 24, 2017)
% =========================================================================
% Generate parameters
params = make_param_array(n_subjects, 'lrcr');
% Create output structures
S = NaN(n_trials, n_subjects);
A = NaN(n_trials, n_arms, n_subjects);
R = NaN(n_trials, n_subjects);
RPE = NaN(n_trials, n_subjects);
for i = 1:n_subjects
lr = params(i, 1);
cr = params(i, 2);
Q = zeros(n_contexts, n_arms);
rprob = make_rewardpaths2(n_trials, n_contexts, n_arms, 0.2, 0.8, sd_r);
for t = 1:n_trials
% Take action
s = randsample(n_contexts,1);
a = action_selection(cr*Q(s,:));
% Sample reward
a_size = size(rprob,3);
rprob_t = reshape(rprob(t, s, :),[1,a_size])*a';
if strcmp(noise_r, 't')
r = binornd(1, rprob_t);
elseif strcmp(noise_r, 'f')
r = rprob_t;
end
% Learn
rpe = (r - Q(s,:)*a');
Q(s,:) = Q(s,:) + lr*rpe*a;
% Store data
S(t, i) = s;
A(t, :, i) = a;
R(t, i) = r;
RPE(t, i) = rpe;
end
end
end