Reinforcement learning of targeted movement (Chadderdon et al. 2012)

 Download zip file   Auto-launch 
Help downloading and running models
Accession:144538
"Sensorimotor control has traditionally been considered from a control theory perspective, without relation to neurobiology. In contrast, here we utilized a spiking-neuron model of motor cortex and trained it to perform a simple movement task, which consisted of rotating a single-joint “forearm” to a target. Learning was based on a reinforcement mechanism analogous to that of the dopamine system. This provided a global reward or punishment signal in response to decreasing or increasing distance from hand to target, respectively. Output was partially driven by Poisson motor babbling, creating stochastic movements that could then be shaped by learning. The virtual forearm consisted of a single segment rotated around an elbow joint, controlled by flexor and extensor muscles. ..."
Reference:
1 . Chadderdon GL, Neymotin SA, Kerr CC, Lytton WW (2012) Reinforcement learning of targeted movement in a spiking neuronal model of motor cortex. PLoS One 7:e47251 [PubMed]
Citations  Citation Browser
Model Information (Click on a link to find other models with that property)
Model Type: Realistic Network;
Brain Region(s)/Organism: Neocortex;
Cell Type(s): Neocortex fast spiking (FS) interneuron; Neocortex spiking regular (RS) neuron; Neocortex spiking low threshold (LTS) neuron;
Channel(s):
Gap Junctions:
Receptor(s): GabaA; AMPA; NMDA;
Gene(s):
Transmitter(s): Dopamine; Gaba; Glutamate;
Simulation Environment: NEURON;
Model Concept(s): Simplified Models; Synaptic Plasticity; Long-term Synaptic Plasticity; Reinforcement Learning; Reward-modulated STDP;
Implementer(s): Neymotin, Sam [Samuel.Neymotin at nki.rfmh.org]; Chadderdon, George [gchadder3 at gmail.com];
Search NeuronDB for information about:  GabaA; AMPA; NMDA; Dopamine; Gaba; Glutamate;
/
arm1d
README
drspk.mod *
infot.mod *
intf6_.mod *
intfsw.mod *
misc.mod *
nstim.mod *
stats.mod *
updown.mod *
vecst.mod *
arm.hoc
basestdp.hoc
col.hoc *
colors.hoc *
declist.hoc *
decmat.hoc *
decnqs.hoc *
decvec.hoc *
default.hoc *
drline.hoc *
filtutils.hoc *
geom.hoc
grvec.hoc *
hinton.hoc *
infot.hoc *
init.hoc
intfsw.hoc *
labels.hoc *
local.hoc *
misc.h *
mosinit.hoc
network.hoc
nload.hoc
nqs.hoc *
nqsnet.hoc *
nrnoc.hoc *
params.hoc
run.hoc
samutils.hoc *
sense.hoc *
setup.hoc *
sim.hoc
simctrl.hoc *
stats.hoc *
stim.hoc
syncode.hoc *
units.hoc *
xgetargs.hoc *
                            
// $Id: stats.hoc,v 1.6 2011/07/05 20:31:11 samn Exp $ 

print "Loading stats.hoc..."

//based on code from:
//http://pdos.csail.mit.edu/grid/sim/capacity-ns.tgz/capacity-sim/new-ns/
//hoc template that allows sampling from a pareto power law distribution 
//specified with objref rd
//rd = new rdmpareto($1=avg,$2=shape,[$3=seed])
//then picking values with .pick , or assigning to a vec with assignv(vec)
begintemplate rdmpareto
public avg,shape,rd,seed,pick,repick,paretoc,pareto5,assignv,reset,pareto4,pareto3
double avg[1],shape[1],seed[1]
objref rd
proc init () {
  avg=$1 shape=$2
  if(numarg()>2)seed=$3 else seed=1234
  rd=new Random()
  rd.ACG(seed)
}
proc reset () {
  rd.ACG(seed)
}
func paretoc () { local scale,shape,U
  scale=$1 shape=$2 U = rd.uniform(0,1)
  return scale * (1.0/ U^(1/shape) )
}
func pareto5 () { local avg,shape
  avg=$1 shape=$2
  return paretoc( avg * (shape -1)/shape, shape)
}
func pareto4 () { local alpha,u
  alpha=$2
  u = 1 - rd.uniform(0,1)
  return $1 + 1 / u^(1/alpha)
}
func pareto3 () { local x,z,b,a
  b = avg // 1 //min value
  a = shape // 10
  x = rd.uniform(0,1)
  z = x^-1/a
  return 1 + b * z
}
func pick () {
  return pareto5(avg,shape)
}
func repick () {
  return pick()
}
func assignv () { local i localobj vi
  vi=$o1 
  for i=0,vi.size-1 vi.x(i)=pick()
}
endtemplate rdmpareto

func skew () { local a,ret localobj v1
  a=allocvecs(v1)
  $o1.getcol($s2).moment(v1)
  ret=v1.x[4]
  dealloc(a)
  return ret
}

func skewv () { localobj v1
  v1=new Vector(5)
  $o1.moment(v1)
  return v1.x(4)
}


//** test rsampsig
objref vIN0,vIN1,vhsout,myrdm,vrs,VA
R0SZ=30000//size of group 0
R1SZ=30000//size of group 1
RPRC=100 // # of trials (combinations)
RS0M=0 //mean of group 0
RS1M=0 //mean of group 1
RS0SD=1 //sdev of group 0
RS1SD=1 //sdev of group 1
proc rsi () {
  if(myrdm==nil) myrdm=new Random()  
  {myrdm.normal(RS0M,RS0SD) vIN0=new Vector(R0SZ) vIN0.setrand(myrdm)}  
  {myrdm.normal(RS1M,RS1SD) vIN1=new Vector(R1SZ) vIN1.setrand(myrdm)}
  vhsout=new Vector(vIN0.size+vIN1.size)
  if(RPRC>1){
    vrs=new Vector(RPRC)
  } else {
    vrs=new Vector(combs_stats(R0SZ+R1SZ,mmax(R0SZ,R1SZ))*RPRC)
  }
  VA=new Vector()  VA.copy(vIN0) VA.append(vIN1)
}
func hocmeasure () {
  hretval_stats=vhsout.mean
  return vhsout.mean
}
func compfunc () {
  if(verbose_stats>1) printf("$1=%g,$2=%g\n",$1,$2)
  hretval_stats=$1-$2
  return hretval_stats
}
onesided=0
nocmbchk=1
pval=tval=0
func testrs () { local dd localobj str
  if(numarg()>0)dd=$1 else dd=1
  str=new String()
  rsi()
  vhsout.resize(vIN0.size+vIN1.size)
  pval=vrs.rsampsig(vIN0,vIN1,RPRC,"hocmeasure","compfunc",vhsout,onesided,nocmbchk)
  tval=ttest(vIN0,vIN1)
  if(dd){
    sprint(str.s,"p(abs(m0-m1))>%g=%g, t=%g, e=%g",abs(vIN0.mean-vIN1.mean),pval,tval,abs(pval-tval)/tval)
    {ge() ers=0 clr=1 hist(g,VA) clr=2  hist(g,vIN0) clr=3  hist(g,vIN1) g.label(0,0.95,str.s)}
    sprint(str.s,"m0=%g, m1=%g, n0=%g, n1=%g, s0=%g, s1=%g",vIN0.mean,vIN1.mean,vIN0.size,vIN1.size,vIN0.stdev,vIN1.stdev)
    g.label(0.0,0.0,str.s)
    sprint(str.s,"m0-m1=%g",vIN0.mean-vIN1.mean)
    g.label(0,0.9,str.s)
    g.exec_menu("View = plot")
  }
  printf("pval=%g, tval=%g, err=%g\n",pval,tval,abs(pval-tval)/tval)
  return pval
}

//* nhppvec(intensityvec,dt,maxt[,se])
// returns a Vector of spike times generated by a nonhomogenous poisson process
// described by intensity function intensityvec, with dt time-step, maxt max time
// and se the seed for random # generator
// this algorithm is called 'thinning'
obfunc nhppvec () { local i,dt,tt,maxt,maxi,se,tidx localobj tvec,ivec,rdm
  tvec=new Vector(100e3) tvec.resize(0)
  ivec=$o1 dt=$2 maxt=$3
  if(numarg()>3)se=$4 else se=1234
  rdm=new Random()
  rdm.ACG(se)
  tt=0
  maxi=ivec.max
  while(tt<maxt) {
    tt = tt - 1.0/maxi * log(rdm.uniform(0,1))
    tidx = tt / dt
    if(tidx >= ivec.size) break
    if(rdm.uniform(0,1) <= ivec.x(tidx) / maxi) {
      tvec.append(tt)
    }
  }
  return tvec
}