Reward modulated STDP (Legenstein et al. 2008)

 Download zip file 
Help downloading and running models
Accession:116837
"... This article provides tools for an analytic treatment of reward-modulated STDP, which allows us to predict under which conditions reward-modulated STDP will achieve a desired learning effect. These analytical results imply that neurons can learn through reward-modulated STDP to classify not only spatial but also temporal firing patterns of presynaptic neurons. They also can learn to respond to specific presynaptic firing patterns with particular spike patterns. Finally, the resulting learning theory predicts that even difficult credit-assignment problems, where it is very hard to tell which synaptic weights should be modified in order to increase the global reward for the system, can be solved in a self-organizing manner through reward-modulated STDP. This yields an explanation for a fundamental experimental result on biofeedback in monkeys by Fetz and Baker. In this experiment monkeys were rewarded for increasing the firing rate of a particular neuron in the cortex and were able to solve this extremely difficult credit assignment problem. ... In addition our model demonstrates that reward-modulated STDP can be applied to all synapses in a large recurrent neural network without endangering the stability of the network dynamics."
Reference:
1 . Legenstein R, Pecevski D, Maass W (2008) A learning theory for reward-modulated spike-timing-dependent plasticity with application to biofeedback. PLoS Comput Biol 4:e1000180 [PubMed]
Model Information (Click on a link to find other models with that property)
Model Type: Realistic Network;
Brain Region(s)/Organism: Neocortex;
Cell Type(s):
Channel(s):
Gap Junctions:
Receptor(s):
Gene(s):
Transmitter(s):
Simulation Environment: Python; PCSIM;
Model Concept(s): Pattern Recognition; Spatio-temporal Activity Patterns; Reinforcement Learning; STDP; Biofeedback; Reward-modulated STDP;
Implementer(s):
#======================================================================
#  Computer simulation 2 of
#      A Learning Theory for Reward-Modulated Spike-Timing-Dependent 
#        Plasticity with Application to Biofeedback
# 
#  Author: Dejan Pecevski, dejan@igi.tugraz.at
#
#  Date: March 2008
#
#======================================================================
import sys
import os
 
from pypcsim import *
import pypcsimplus as pcsim

from numpy import *
import getopt
import numpy
from datetime import datetime
from math import *
from tables import *
from mpi4py import MPI

from BiofeedModel import *
from PoissInputModel import *


class BiofeedExperiment(pcsim.Experiment):
        
    def defaultExpParameters(self):
        ep = self.expParams 
        
        # General simulation parameters
        ep.Tsim = 7200
        ep.DTsim = 1e-4
        
        # Network distribution parameters
        ep.netType = 'MT'
        ep.nThreads = 2
        ep.minDelay = 1e-3
        ep.maxDelay = 2   
        
        # Seeds of the experiment
        ep.numpyRandomSeed = 34234159
        ep.pyRandomSeed = 124243        
        ep.constructionSeed = 32241476
        ep.simulationSeed = 134212439
        
        ep.runMode = "long"        
        ep.modelName = "PoissInput"
        
    
    def setupModels(self):        
        p = self.modelParams
        ep = self.expParams
        random.seed(ep.pyRandomSeed)
        numpy.random.seed(ep.numpyRandomSeed)
        ep.samplingTime = int(ep.Tsim / (200 * ep.DTsim))
                         
        self.models.input = eval(ep.modelName + '(self.net, self.expParams, p.get("input",{}))')        
        self.models.biofeed = Biofeed(self.net, self.expParams, p.get("biofeed",{}), depModels = self.models.input.elements)
        # then synchronize (override) the value of n.nNeurons from the other parameters
        input_p = self.models.input.params
        biofeed_p = self.models.biofeed.params 
        
        self.models.input.generate()
        self.models.biofeed.generate()
        
        
    def setupRecordings(self):
        r = self.recordings        
        r.input = self.models.input.setupRecordings()
        r.biofeed = self.models.biofeed.setupRecordings()
        net = self.net
        m = self.models
                
        m.exc_ln = net.create(LinearNeuron(Rm = 1))
                
        for s in m.biofeed.elements.learning_plastic_syn:
            net.connect(s,m.exc_ln,StaticAnalogSynapse(delay = 1e-3))
        
        m.inh_ln = net.create(LinearNeuron(Rm = 1))
        
        for s in m.biofeed.elements.inhib_learn_syn:
            net.connect(s, m.inh_ln, StaticAnalogSynapse(delay = 1e-3))
        
        return r
    
    def simulate(self):
        ep = self.expParams
        biofeed_m = self.models.biofeed.elements
        ep.samplingTime = int(ep.Tsim / (200 * ep.DTsim))
        
        # Run simulation 
        print 'Running simulation:';
        t0=datetime.today()
        
        self.net.add(SimProgressBar(Time.sec(ep.Tsim)), SimEngine.ID(0,0))
        
        print "Simulation start : " , datetime.today().strftime('%x %X')
        
        for s in biofeed_m.learning_plastic_syn:
            if self.net.object(s):                
                self.net.object(s).activeDASTDP = False
        
        self.net.reset();
        self.net.advance(int(2 / ep.DTsim))
        
        for s in biofeed_m.learning_plastic_syn:
            if self.net.object(s):
                self.net.object(s).activeDASTDP = True
        
        self.net.advance(int((ep.Tsim - 2) / ep.DTsim))
        
        t1=datetime.today()
        print 'Done.', (t1-t0).seconds, 'sec CPU time for', ep.Tsim, 's simulation time';        
        self.expParams.simDuration = (t1 - t0).seconds
        
        
    def scriptList(self):        
        return ["BiofeedExperiment.py"]
    
if __name__ == "__main__":    
    if len(sys.argv) > 1:
        numpyRandomSeed = [13048, 5012835, 656545, 25092385, 24086498]
        constructionSeed = [1650126, 5606836, 4509158, 63501348, 5023958]
        simulationSeed = [1045235, 65709388, 221230, 52065069, 5230598 ]
        pyRandomSeed = [10349, 643764370, 161374352, 16406098, 70605059]
        if len(sys.argv) > 2:        
            run_idx = int(sys.argv[2])
        else:
            run_idx = 3
            
        runName = "final_" + str(run_idx) + "_"
        if len(sys.argv) > 3:
            directory = sys.argv[3]
        else:
            directory = None
        exper = BiofeedExperiment('biofeed',
                                  experParams = { "numpyRandomSeed" : numpyRandomSeed[run_idx], 
                                                  "constructionSeed" : constructionSeed[run_idx], 
                                                  "simulationSeed" : simulationSeed[run_idx],
                                                  "pyRandomSeed" : pyRandomSeed[run_idx] }, 
                                                  modelParams = {}, 
                                                  directory = directory)        
        exper.run(runName)
    else:
        exper = BiofeedExperiment('biofeed', experParams = {"Tsim":400, "runMode" : "long"}, 
                                  modelParams = {})        
        exper.run("shortrun")
        

Loading data, please wait...