Neurite: electrophysiological-mechanical coupling simulation framework (Garcia-Grajales et al 2015)

 Download zip file 
Help downloading and running models
Accession:168861
Neurite simulates the electrical signal propagation in myelinated and unmyelinated axons, and in dendritic trees under mechanical loading. Two different solvers are available (explicit and implicit) with sequential (CPU) and parallel (GPUs) versions
Reference:
1 . García-Grajales JA, Rucabado G, García-Dopico A, Peña JM, Jérusalem A (2015) Neurite, a finite difference large scale parallel program for the simulation of electrical signal propagation in neurites under mechanical loading. PLoS One 10:e0116532 [PubMed]
Model Information (Click on a link to find other models with that property)
Model Type: Axon; Dendrite;
Brain Region(s)/Organism:
Cell Type(s): Myelinated neuron;
Channel(s): I Sodium; I Potassium;
Gap Junctions:
Receptor(s):
Gene(s):
Transmitter(s):
Simulation Environment: C or C++ program;
Model Concept(s): Action Potential Initiation; Axonal Action Potentials; Action Potentials;
Implementer(s): Garcia-Grajales, Julian Andres ;
Search NeuronDB for information about:  I Sodium; I Potassium;
//
//
// File author(s):  <Gabriel Rucabado and Antonio Garcia Dopico>, (C) 2014
//
// Copyright: this software is licenced under the terms stipulated in the license.txt file located in its root directory
//
//

//!\file solverIm-gpu.cpp
//  \brief The implicit gpu solver of Neurite.
#include <math.h>
#include "solverIm-gpu.h"
#include "discretization.h"
#include "cudaException.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>

#include <iostream>
#include <fstream>
using namespace std;


// Includes from cuda
#include "cuda_runtime.h"
#include "cublas_v2.h"

#define TYPE double


#define NORMAL 1
#define BRANCH 2
#define TERM 4
#define NEXT_TERM 8
#define FIRST_NORMAL 16
#define FIRST_BRANCH 32
#define OTHER 64

//#######################################
//#### HEADERDS of CUDA KERNELS #########
//#######################################
__global__ void Update_HH (int numElem, TYPE *m, TYPE *h, TYPE *n, TYPE dT,
			   TYPE *oldPot, TYPE *restPot, int *indVec, 
			   TYPE *G_L, TYPE *E_Na, TYPE *E_K, TYPE *E_L, 
			   TYPE *g_Na, TYPE *g_K, TYPE *G_Na, TYPE *G_K, 
			   TYPE *leftShiftNa, TYPE *leftShiftK,
			   TYPE *W, TYPE *K);
__global__ void Update_Mat (int numElem, short *type, TYPE *csrVal, TYPE dT,
			    TYPE *r_a, TYPE *c_m, TYPE *dx, TYPE *W,
			    int *indVec_DL, int *indVec, int *indMat);
__global__ void Update_B (int numElem, TYPE *K, TYPE *input, TYPE dT,
			  TYPE *c_m, TYPE *dx, TYPE *oldPot, TYPE *b);
__global__ void div (TYPE *op1, TYPE *op2, TYPE *res);
__global__ void divMul (TYPE *num1, TYPE *denom1, TYPE *num2, TYPE *denom2, TYPE *res);



//#######################################
//#### BODY of CUDA KERNELS #############
//#######################################
__global__ void Update_HH (int numElem, TYPE *m, TYPE *h, TYPE *n, TYPE dT,
			   TYPE *pot, TYPE *restPot, int *indVec, 
			   TYPE *G_L, TYPE *E_Na, TYPE *E_K, TYPE *E_L, 
			   TYPE *g_Na, TYPE *g_K, TYPE *G_Na, TYPE *G_K, 
			   TYPE *leftShiftNa, TYPE *leftShiftK,
			   TYPE *W, TYPE *K){
  TYPE dt	= dT * 1000;
  int id	= blockIdx.x * blockDim.x + threadIdx.x;
  if (id < numElem) {
    TYPE temp_G_Na, temp_G_K;
    TYPE mloc, nloc, hloc;
    TYPE Am, Ah, An, Bm, Bh, Bn; 
    int indexVec  = indVec[id];
    TYPE V_prevNa = ((pot[indexVec] + leftShiftNa[id]) - restPot[id]) / SCALE; 
    TYPE V_prevK  = ((pot[indexVec] + leftShiftK[id]) - restPot[id]) / SCALE; 

    Am 	= (2.5 - 0.1 * V_prevNa) / (exp(2.5 - 0.1 * V_prevNa) - 1);
    Bm 	= 4 * exp(-V_prevNa / 18);
    Ah 	= 0.07 * exp(-V_prevNa / 20);
    Bh	= 1 / (exp(3 - 0.1 * V_prevNa) + 1);
    An	= (0.1 - 0.01 * V_prevK) / (exp(1 - 0.1 * V_prevK)-1);
    Bn	= 0.125 * exp(-V_prevK / 80);	

    mloc	= m[id];
    hloc	= h[id];
    nloc	= n[id];

    m[id] = mloc = mloc + dt * (Am * (1 - mloc) - Bm * mloc);	    
    h[id] = hloc = hloc + dt * (Ah * (1 - hloc) - Bh * hloc);	  
    n[id] = nloc = nloc + dt * (An * (1 - nloc) - Bn * nloc);

    G_Na[id]  = temp_G_Na = g_Na[id] * mloc * mloc * mloc * hloc;
    G_K[id]   = temp_G_K  = g_K[id] * nloc * nloc * nloc * nloc;
    
    W[id] 	= -(temp_G_Na + temp_G_K + G_L[id]);
    K[indexVec] = temp_G_Na * E_Na[id] + temp_G_K * E_K[id] + G_L[id] * E_L[id];
  }
}



//! \brief Update values related to HH nodes of implicit Matrix 
__global__ void Update_Mat (int numElem, short *type, TYPE *csrVal, TYPE dT,
			    TYPE *r_a, TYPE *c_m, TYPE *dx, TYPE *W,
			    int *indVec_DL, int *indVec, int *indMat){
  double alpha, betaR, betaL, betaMe, beta, delta, gamma;
  // Figure out id and index of thread 
  int id	= blockIdx.x * blockDim.x + threadIdx.x;

  // Update Matrix (HH elements)
  if (id < numElem) {
    int indexVec = indVec[id];
    int indexMat = indMat[id]; 

    // Update Matrix
    alpha = -1/(r_a[indexVec] * dx[indexVec]);
    betaR = 1/(r_a[indexVec+1] * dx[indexVec+1]);
    betaMe = -(W[id] * dx[indexVec]) + 1/(r_a[indexVec] * dx[indexVec]);
    beta = (c_m[indexVec] * dx[indexVec])/dT + betaMe + betaR;
    delta = -betaR;

    switch (type[id]){
    case (NEXT_TERM):
      csrVal[indexMat]   = alpha;
      csrVal[indexMat+1] = beta + delta;
      break;
    case (BRANCH) :
      betaL = 1/(r_a[indVec_DL[id]] * dx[indVec_DL[id]]);
      gamma = -betaL;
    
      csrVal[indexMat]   = alpha;
      csrVal[indexMat+1] = beta + betaL;
      csrVal[indexMat+2] = delta;
      csrVal[indexMat+3] = gamma;
      break;
    case (NORMAL) :
      csrVal[indexMat]   = alpha;
      csrVal[indexMat+1] = beta;
      csrVal[indexMat+2] = delta;
      break;
    case (FIRST_NORMAL) :
      csrVal[indexMat]   = alpha + beta;
      csrVal[indexMat+1] = delta;
      break;
    case (FIRST_BRANCH) :
      betaL = 1/(r_a[indVec_DL[id]] * dx[indVec_DL[id]]);
      gamma = -betaL;
      csrVal[indexMat]   = alpha + beta + betaL;
      csrVal[indexMat+1] = delta;
      csrVal[indexMat+2] = gamma;
      break;
    default:
      break;
    }
  }
}




//! \brief Update values from B vector
__global__ void Update_B (int numElem, TYPE *K, TYPE *input, TYPE dT,
			  TYPE *c_m, TYPE *dx, TYPE *pot, TYPE *b){
  //figure out id of thread
  int id	= blockIdx.x * blockDim.x + threadIdx.x;

  //figure out B
  if (id < numElem) {
    b[id] = c_m[id] * dx[id] * pot[id]/dT + K[id] * dx[id] + input[id];
  }
}


//! \brief Divide kernel
__global__ void div (TYPE *num, TYPE *denom, TYPE *res){
  register double a = *num, b = *denom;
  if (threadIdx.x == 0){
    res[0] = +(a/b);
  } else if (threadIdx.x == 1){
    res[1] = -(a/b);
  }else{
    _INFO_CUDA ("It should not be here!!!\n");
  }
}


//! \brief Divide and multiple kernel
__global__ void divMul (TYPE *num1, TYPE *denom1, TYPE *num2, TYPE *denom2, TYPE *res){
  if (threadIdx.x == 0){
    res[0] = (num1[0] / denom1[0]) * (num2[0] / denom2[0]);
  }else{
    _INFO_CUDA ("It should not be here!!!\n");
  }  
}

//#################################################
//#### BODY of PRIVATE METHODS ####################
//#################################################
void SolverImGPU::prevBoundaries (discretization **elements,
				    int *CPU_indMat_HH, int *CPU_indVec_DL_HH, short *CPU_type_HH,
				    TYPE *CPU_csr_Val, int *CPU_csr_Col, int * CPU_csr_Row){
  discretization *elem;
  double alpha, betaR, betaL, betaMe, beta, delta, gamma;

  // 0 row
  if (elements[0]->get_kind_HH()){
    CPU_indMat_HH[this->num_HH] = 0;
    CPU_indVec_DL_HH[this->num_HH] = -1;
    CPU_type_HH[this->num_HH++] = OTHER;
  }
  CPU_csr_Row[0] = 0;
  CPU_csr_Val[this->nnz] = 1;
  CPU_csr_Col[this->nnz++] = 0;

  //1 row
  elem = elements[1];

  alpha = -1/(elem->get_r_a() * elem->get_dX());
  betaR = 1/(elements[elem->get_DR()]->get_r_a()*elements[elem->get_DR()]->get_dX());
  betaL = elem->get_branching()/
    (elements[elem->get_DL()]->get_r_a()*elements[elem->get_DL()]->get_dX());
  betaMe = -elem->get_W() * elem->get_dX();
  beta = (elem->get_c_m() * elem->get_dX())/this->dT + betaMe - alpha + betaL + betaR;
  delta = -betaR;

  CPU_csr_Row[1] = 1;
  CPU_csr_Val[this->nnz] = alpha + beta;
  CPU_csr_Col[this->nnz++] = 1;
  
  CPU_csr_Val[this->nnz] = delta;
  CPU_csr_Col[this->nnz++] = 2;

  if (elements[1]->get_DL() != 0) {
    gamma = -betaL;
    CPU_csr_Val[this->nnz] = gamma;
    CPU_csr_Col[this->nnz++] = elements[1]->get_DL();
      
    if (elements[1]->get_kind_HH()){
      CPU_indMat_HH[this->num_HH] = 1;
      CPU_indVec_DL_HH[this->num_HH] = 1;
      CPU_type_HH[this->num_HH++] = FIRST_BRANCH;
    }
  } else{
    if (elements[1]->get_kind_HH()){
      CPU_indMat_HH[this->num_HH] = 1;
      CPU_indVec_DL_HH[this->num_HH] = -1;
      CPU_type_HH[this->num_HH++] = FIRST_NORMAL;
    }
  }
}



void SolverImGPU::coreMatrix (discretization **elements,
				int *CPU_indMat_HH, int *CPU_indVec_DL_HH, short *CPU_type_HH,
				TYPE *CPU_csr_Val, int *CPU_csr_Col, int * CPU_csr_Row){
  // 2<-->(n-2) Rows
  int i;
  discretization *elem, *nextElem;
  double alpha, betaR, betaL, betaMe, beta, delta, gamma;

  for (i=2; i<this->num_elem-2; i++){
    elem = elements[i];
    nextElem = elements[i+1];
    
    if (elem->get_kind_HH()){
      CPU_indMat_HH[this->num_HH] = this->nnz;
    }

    CPU_csr_Row[i] = this->nnz;

    if(elem->get_DR() == i){ 
      CPU_csr_Val[this->nnz] 	= 1;
      CPU_csr_Col[this->nnz++] = i;
      if (elem->get_kind_HH()){
	CPU_indVec_DL_HH[this->num_HH] = -1;
	CPU_type_HH[this->num_HH++] = TERM;
      }

    } else{
      alpha = -1/(elem->get_r_a() * elem->get_dX());
      betaR = 1/(elements[elem->get_DR()]->get_r_a()*elements[elem->get_DR()]->get_dX());
      betaL = elem->get_branching()/
	(elements[elem->get_DL()]->get_r_a()*elements[elem->get_DL()]->get_dX());
      betaMe = -elem->get_W() * elem->get_dX();
      beta = (elem->get_c_m() * elem->get_dX())/this->dT + betaMe - alpha + betaL + betaR;
      delta = -betaR;

      CPU_csr_Val[this->nnz] = alpha;
      CPU_csr_Col[this->nnz++] = elem->get_mother();

      if(nextElem->get_DR() == i+1){
	CPU_csr_Val[this->nnz] = beta + delta;
	CPU_csr_Col[this->nnz++] = i;

	if (elem->get_kind_HH()){
	  CPU_indVec_DL_HH[this->num_HH] = -1;
	  CPU_type_HH[this->num_HH++] = NEXT_TERM;
	}
      }else{
	CPU_csr_Val[this->nnz] = beta;
	CPU_csr_Col[this->nnz++] = i;

	CPU_csr_Val[this->nnz] = delta;
	CPU_csr_Col[this->nnz++] = elem->get_DR();

	if (elem->get_DL() != 0) {
	  gamma = -betaL;
	  CPU_csr_Val[this->nnz] = gamma;
	  CPU_csr_Col[this->nnz++] = elem->get_DL();

	  if (elem->get_kind_HH()){
	    CPU_indVec_DL_HH[this->num_HH] = elem->get_DL();
	    CPU_type_HH[this->num_HH++] = BRANCH;
	  }
	} else {
	  if (elem->get_kind_HH()){
	    CPU_indVec_DL_HH[this->num_HH] = -1;
	    CPU_type_HH[this->num_HH++] = NORMAL;
	  }
	}
      }
    }
  }
}




void SolverImGPU::postBoundaries (discretization **elements,
				  int *CPU_indMat_HH, int *CPU_indVec_DL_HH, short *CPU_type_HH,
				  TYPE *CPU_csr_Val, int *CPU_csr_Col, int * CPU_csr_Row){
  discretization *elem;
  double alpha, betaR, betaMe, beta, delta;

  // n-2 row
  elem = elements[this->num_elem-2];

  alpha = -1/(elem->get_r_a() * elem->get_dX());
  betaR = 1/(elements[elem->get_DR()]->get_r_a()*elements[elem->get_DR()]->get_dX());
  betaMe = -elem->get_W() * elem->get_dX();
  beta = (elem->get_c_m() * elem->get_dX())/this->dT + betaMe -alpha + betaR;
  delta = -betaR;


  if (elem->get_kind_HH()){
    CPU_indMat_HH[this->num_HH] = this->nnz;
    CPU_indVec_DL_HH[this->num_HH] = -1;
    CPU_type_HH[this->num_HH++] = NEXT_TERM;
  }
  CPU_csr_Row[this->num_elem-2] = this->nnz;
  CPU_csr_Val[this->nnz] = alpha;
  CPU_csr_Col[this->nnz++] = elem->get_mother();

  CPU_csr_Val[this->nnz] = beta + delta;
  CPU_csr_Col[this->nnz++] = this->num_elem-2;

  //n-1 row
  if (elements[this->num_elem-1]->get_kind_HH()){
    CPU_indMat_HH[this->num_HH] = this->nnz;
    CPU_indVec_DL_HH[this->num_HH] = -1;
    CPU_type_HH[this->num_HH++] = OTHER;
  }

  CPU_csr_Row[this->num_elem-1] = this->nnz;
  CPU_csr_Val[this->nnz] = 1;
  CPU_csr_Col[this->nnz++] = this->num_elem - 1;
  CPU_csr_Row[this->num_elem] = this->nnz;
}








//#################################################
//#### BODY of PUBLIC METHODS #####################
//#################################################
SolverImGPU::SolverImGPU(double dT, double **potential,
			   int num_term, int *ind_terminals, double input_val,
			   int num_HH_elements, Hodgkin_Huxley **HH_elements,
			   int num_elem, discretization **elements) {
  int i=0;
  cudaError_t error;
  TYPE onePos=+1, oneNeg=-1, cero=0;
  char *env = getenv("idGPU");
  cudaDeviceProp deviceProp;

  try {
    this->idGPU =(env != NULL)? atoi(env):CUDA_ID_DEF;
    if ((error = cudaSetDevice (this->idGPU)) != OK_CUDA){
      std::stringstream params;
      params << "idGPU=" << this->idGPU;
      throw cudaException(__FILE__, __FUNCTION__, __LINE__,	       
			  "Identifying the GPU", error,  params.str());     
    }

    cudaGetDeviceProperties(&deviceProp, this->idGPU);
    _INFO("GPU used ==> Device %d: \"%s\"", this->idGPU, deviceProp.name);

    // COMMON
    //////////////////////////////////////////////////
    _DEBUG("STARTING TO CREATE STRUCTURES OF SOLVER");
    DIM3(dimBlock_ALL, TH_ALL,1,1);
    DIM3(dimBlock_HH, TH_HH,1,1);
    DIM3(dimBlock_DIV, 2,1,1);
    DIM3(dimBlock_DIVMUL, 1,1,1);

    DIM3(dimGrid_DIV, 1,1,1);
    DIM3(dimGrid_DIVMUL, 1,1,1);
    this->dT 	= dT;

    // Create Events
    for (i=0; i<NUM_EVENT; i++){
      if ((error = cudaEventCreateWithFlags(&this->event[i],
					    cudaEventDisableTiming)) != OK_CUDA){
	std::stringstream params, msg;
	params << "event=" << &event[i];
	msg    << "creating event: " + i;
	throw cudaException(__FILE__, __FUNCTION__, __LINE__,
			    msg.str(), error,  params.str());
      }
    }
    // Create Streams
    for (i=0; i<NUM_STREAM; i++){
      cudaError_t error;
      if ((error = cudaStreamCreate(&stream[i]))!= OK_CUDA){
	std::stringstream params,msg;
	params << "stream=" << &stream[i];
	msg << "Creating the stream: " << i;
	throw cudaException(__FILE__, __FUNCTION__, __LINE__,	       
			    msg.str(), error,  params.str());				
      }
    }

    // Create CUBLAS context 
    cublasStatus_t cublasStatus;
    if ((cublasStatus = cublasCreate(&cublasHandle)) != OK_CUBLAS){
      throw cudaException(__BASE_FILE__, __FUNCTION__, __LINE__,
			  "creating context cublas", cublasStatus, "");
    }
    
    if ((cublasStatus = cublasSetPointerMode(this->cublasHandle, 
					     CUBLAS_POINTER_MODE_DEVICE)) != OK_CUBLAS){
      throw cudaException(__BASE_FILE__, __FUNCTION__, __LINE__,
			  "configurating pointers for cublas", cublasStatus, "");
    }
    
    // Create CUSPARSE context
    cusparseStatus_t cusparseStatus;
    if ((cusparseStatus = cusparseCreate(&cusparseHandle)) != OK_CUSPARSE){
      throw cudaException(__BASE_FILE__, __FUNCTION__, __LINE__,
			  "Creating cusparse context", cusparseStatus, "");
    }
    if ((cusparseStatus = cusparseSetPointerMode(this->cusparseHandle, 
						 CUSPARSE_POINTER_MODE_DEVICE)) != OK_CUSPARSE){
      throw cudaException(__BASE_FILE__, __FUNCTION__, __LINE__,
			  "Creating pointers for cusparse", cusparseStatus, "");
    }
    if ((cusparseStatus = cusparseCreateMatDescr(&this->descA)) != OK_CUSPARSE){
      throw cudaException(__BASE_FILE__, __FUNCTION__, __LINE__,
			  "Creating A matrix for cusparse", cusparseStatus, "");
    }
    if ((cusparseStatus = cusparseSetMatIndexBase(this->descA, CUSPARSE_INDEX_BASE_ZERO)) != OK_CUSPARSE){
      throw cudaException(__BASE_FILE__, __FUNCTION__, __LINE__,
			  "Creating A Matrix for cusparse", cusparseStatus, "");
    }
    if ((cusparseStatus = cusparseSetMatType(this->descA, CUSPARSE_MATRIX_TYPE_GENERAL)) != OK_CUSPARSE){
      throw cudaException(__BASE_FILE__, __FUNCTION__, __LINE__,
			  "Creating A Matrix for cusparse", cusparseStatus, "");
    }

   
    CUDA_ALLOC(&(this->AP), num_elem * sizeof(TYPE));
    CUDA_ALLOC(&(this->AS), num_elem * sizeof(TYPE));
    CUDA_ALLOC(&(this->P), num_elem * sizeof(TYPE));
    CUDA_ALLOC(&(this->R), num_elem * sizeof(TYPE));
    CUDA_ALLOC(&(this->R0), num_elem * sizeof(TYPE));
    CUDA_SET (this->AP, 0, num_elem * sizeof(TYPE), stream[0]);
    CUDA_SET (this->AS, 0, num_elem * sizeof(TYPE), stream[0]);
    CUDA_SET (this->P, 0, num_elem * sizeof(TYPE), stream[0]);
    CUDA_SET (this->R, 0, num_elem * sizeof(TYPE), stream[0]);
    CUDA_SET (this->R0, 0, num_elem * sizeof(TYPE), stream[0]);


    CUDA_ALLOC(&(this->p0), 1 * sizeof(TYPE));
    CUDA_ALLOC(&(this->p1), 1 * sizeof(TYPE));
    CUDA_ALLOC(&(this->apr), 1 * sizeof(TYPE));
    CUDA_ALLOC(&(this->alpha), 2 * sizeof(TYPE));
    CUDA_ALLOC(&(this->w1), 1 * sizeof(TYPE));
    CUDA_ALLOC(&(this->w2), 1 * sizeof(TYPE));
    CUDA_ALLOC(&(this->w), 2 * sizeof(TYPE));
    CUDA_ALLOC(&(this->beta), 1 * sizeof(TYPE));
    CUDA_ALLOC(&(this->onePos), 1 * sizeof(TYPE));
    CUDA_ALLOC(&(this->oneNeg), 1 * sizeof(TYPE));
    CUDA_ALLOC(&(this->zero), 1 * sizeof(TYPE));
    CUDA_ALLOC(&(this->res), 1 * sizeof(TYPE));

    CUDA_CPY (this->onePos, &onePos, 1*sizeof(TYPE), stream[0]);
    CUDA_CPY (this->oneNeg, &oneNeg, 1*sizeof(TYPE), stream[0]);
    CUDA_CPY (this->zero,   &cero,   1*sizeof(TYPE), stream[0]);
    CUDA_SET (this->p0,   0, 1*sizeof(TYPE), stream[0]);
    CUDA_SET (this->p1,   0, 1*sizeof(TYPE), stream[0]);
    CUDA_SET (this->apr,  0, 1*sizeof(TYPE), stream[0]);
    CUDA_SET (this->w1,   0, 1*sizeof(TYPE), stream[0]);
    CUDA_SET (this->w2,   0, 1*sizeof(TYPE), stream[0]);
    CUDA_SET (this->beta, 0, 1*sizeof(TYPE), stream[0]);
    CUDA_SET (this->alpha,0, 2*sizeof(TYPE), stream[0]);
    CUDA_SET (this->w,    0, 2*sizeof(TYPE), stream[0]);


    // TERMINALS
    //////////////////////////////////////////////////
    this->num_term 	= num_term;


    // ALL
    //////////////////////////////////////////////////
    this->num_elem	= num_elem;
    //   POTENTIAL
    CUDA_ALLOC(&this->pot, num_elem * sizeof(TYPE));
    CUDA_CPY(this->pot, potential[0], sizeof(TYPE)*num_elem, stream[0]);

    //   ATTRIBUTES
    TYPE *CPU_dx	= new TYPE[num_elem];
    TYPE *CPU_c_m	= new TYPE[num_elem];
    TYPE *CPU_r_a	= new TYPE[num_elem];
    TYPE *CPU_K		= new TYPE[num_elem];
    TYPE *CPU_input_on	= new TYPE[num_elem];
    TYPE *CPU_input_off	= new TYPE[num_elem];
    for (i=0; i<num_elem; i++){
      CPU_dx[i]		= elements[i]->get_dX();
      CPU_c_m[i]	= elements[i]->get_c_m();
      CPU_r_a[i]	= elements[i]->get_r_a();
      CPU_K[i]		= elements[i]->get_K();
      CPU_input_off[i]	= 0;
      CPU_input_on[i]	= elements[i]->get_input_current();
    }

    DIM3(dimGrid_ALL, (this->num_elem + TH_ALL - 1)/TH_ALL,1,1);
    int num_all_cuda = dimGrid_ALL.x * dimBlock_ALL.x;

    CUDA_ALLOC(&(this->dx), num_all_cuda * sizeof(TYPE));
    CUDA_ALLOC(&(this->c_m), num_all_cuda * sizeof(TYPE));
    CUDA_ALLOC(&(this->r_a), num_all_cuda * sizeof(TYPE));
    CUDA_ALLOC(&(this->K), num_all_cuda * sizeof(TYPE));
    CUDA_ALLOC(&(this->input_on), num_all_cuda * sizeof(TYPE));
    CUDA_ALLOC(&(this->input_off), num_all_cuda * sizeof(TYPE));
    CUDA_CPY(this->dx, CPU_dx, this->num_elem * sizeof(TYPE), stream[0]);
    CUDA_CPY(this->c_m, CPU_c_m, this->num_elem * sizeof(TYPE), stream[0]);
    CUDA_CPY(this->r_a, CPU_r_a, this->num_elem * sizeof(TYPE), stream[0]);
    CUDA_CPY(this->K, CPU_K, this->num_elem * sizeof(TYPE), stream[0]);
    CUDA_CPY(this->input_on, CPU_input_on, this->num_elem * sizeof(TYPE), stream[0]);
    CUDA_CPY(this->input_off, CPU_input_off, this->num_elem * sizeof(TYPE), stream[0]);

    // HH
    //////////////////////////////////////////////////
    this->num_HH	= num_HH_elements;
    if (this->num_HH > 0){
      int *CPU_indVec_HH	= new int [num_HH_elements];
      TYPE *CPU_W		= new TYPE [num_HH_elements];
      TYPE *CPU_Shift_Na	= new TYPE [num_HH_elements];
      TYPE *CPU_Shift_K	= new TYPE [num_HH_elements];
      TYPE *CPU_n		= new TYPE [num_HH_elements];
      TYPE *CPU_m		= new TYPE [num_HH_elements];
      TYPE *CPU_h		= new TYPE [num_HH_elements];
      TYPE *CPU_E_Na	= new TYPE [num_HH_elements];
      TYPE *CPU_G_Na	= new TYPE [num_HH_elements];
      TYPE *CPU_g_Na	= new TYPE [num_HH_elements];
      TYPE *CPU_E_L	= new TYPE [num_HH_elements];
      TYPE *CPU_G_L	= new TYPE [num_HH_elements];
      TYPE *CPU_E_K	= new TYPE [num_HH_elements];
      TYPE *CPU_G_K	= new TYPE [num_HH_elements];
      TYPE *CPU_g_K	= new TYPE [num_HH_elements];
      TYPE *CPU_rest_pot	= new TYPE [num_HH_elements];
      for (i=0; i<num_HH_elements; i++){
	CPU_indVec_HH[i]	= HH_elements[i]->get_element();
	CPU_W[i]		= HH_elements[i]->get_W();
	CPU_Shift_Na[i]	= HH_elements[i]->get_Left_Shift_Na();
	CPU_Shift_K[i]	= HH_elements[i]->get_Left_Shift_K();
	CPU_n[i]		= HH_elements[i]->get_n();
	CPU_m[i]		= HH_elements[i]->get_m();
	CPU_h[i]		= HH_elements[i]->get_h();
	CPU_E_Na[i]	= HH_elements[i]->get_E_Na();
	CPU_G_Na[i]	= HH_elements[i]->get_G_Na();
	CPU_g_Na[i]	= HH_elements[i]->get_g_Na();
	CPU_E_L[i]	= HH_elements[i]->get_E_L();
	CPU_G_L[i]	= HH_elements[i]->get_G_L();
	CPU_E_K[i]	= HH_elements[i]->get_E_K();
	CPU_G_K[i]	= HH_elements[i]->get_G_K();
	CPU_g_K[i]	= HH_elements[i]->get_g_K();
	CPU_rest_pot[i]	= HH_elements[i]->get_rest_pot();
      }

      DIM3(dimGrid_HH, (this->num_HH + TH_HH - 1)/TH_HH,1,1);
      int num_HH_cuda = dimGrid_HH.x * dimBlock_HH.x;

      CUDA_ALLOC(&(this->indVec_HH), num_HH_cuda * sizeof(int));
      CUDA_ALLOC(&(this->W), num_HH_cuda * sizeof(TYPE));
      CUDA_ALLOC(&(this->leftShiftNa), num_HH_cuda * sizeof(TYPE));
      CUDA_ALLOC(&(this->leftShiftK), num_HH_cuda * sizeof(TYPE));
      CUDA_ALLOC(&(this->n), num_HH_cuda * sizeof(TYPE));
      CUDA_ALLOC(&(this->m), num_HH_cuda * sizeof(TYPE));
      CUDA_ALLOC(&(this->h), num_HH_cuda * sizeof(TYPE));
      CUDA_ALLOC(&(this->E_Na), num_HH_cuda * sizeof(TYPE));
      CUDA_ALLOC(&(this->G_Na), num_HH_cuda * sizeof(TYPE));
      CUDA_ALLOC(&(this->g_Na), num_HH_cuda * sizeof(TYPE));
      CUDA_ALLOC(&(this->E_L), num_HH_cuda * sizeof(TYPE));
      CUDA_ALLOC(&(this->G_L), num_HH_cuda * sizeof(TYPE));
      CUDA_ALLOC(&(this->E_K), num_HH_cuda * sizeof(TYPE));
      CUDA_ALLOC(&(this->G_K), num_HH_cuda * sizeof(TYPE));
      CUDA_ALLOC(&(this->g_K), num_HH_cuda * sizeof(TYPE));
      CUDA_ALLOC(&(this->rest_pot), num_HH_cuda * sizeof(TYPE));

      CUDA_CPY(this->indVec_HH, CPU_indVec_HH, this->num_HH * sizeof(int), stream[0]);
      CUDA_CPY(this->W, CPU_W, this->num_HH * sizeof(TYPE), stream[0]);
      CUDA_CPY(this->leftShiftNa, CPU_Shift_Na, this->num_HH * sizeof(TYPE), stream[0]);
      CUDA_CPY(this->leftShiftK, CPU_Shift_K, this->num_HH * sizeof(TYPE), stream[0]);
      CUDA_CPY(this->n, CPU_n, this->num_HH * sizeof(TYPE), stream[0]);
      CUDA_CPY(this->m, CPU_m, this->num_HH * sizeof(TYPE), stream[0]);
      CUDA_CPY(this->h, CPU_h, this->num_HH * sizeof(TYPE), stream[0]);
      CUDA_CPY(this->E_Na, CPU_E_Na, this->num_HH * sizeof(TYPE), stream[0]);
      CUDA_CPY(this->G_Na, CPU_G_Na, this->num_HH * sizeof(TYPE), stream[0]);
      CUDA_CPY(this->g_Na, CPU_g_Na, this->num_HH * sizeof(TYPE), stream[0]);
      CUDA_CPY(this->E_L, CPU_E_L, this->num_HH * sizeof(TYPE), stream[0]);
      CUDA_CPY(this->G_L, CPU_G_L, this->num_HH * sizeof(TYPE), stream[0]);
      CUDA_CPY(this->E_K, CPU_E_K, this->num_HH * sizeof(TYPE), stream[0]);
      CUDA_CPY(this->G_K, CPU_G_K, this->num_HH * sizeof(TYPE), stream[0]);
      CUDA_CPY(this->g_K, CPU_g_K, this->num_HH * sizeof(TYPE), stream[0]);
      CUDA_CPY(this->rest_pot, CPU_rest_pot, this->num_HH * sizeof(TYPE), stream[0]);
    }
  }catch (cudaException &e){
    e.print();
    throw e;
  }

  // Creation temporal structures that related to the matrix
  //////////////////////////////////////////////////
  TYPE *CPU_csr_Val 	= new TYPE [this->num_elem*4];
  int *CPU_csr_Col 	= new int [this->num_elem*4];
  int *CPU_csr_Row 	= new int [this->num_elem+1];
  int *CPU_indMat	= new int [this->num_HH];
  int *CPU_indVec_DL	= new int [this->num_HH];
  short *CPU_type	= new short [this->num_HH];

  this->nnz=0;
  this->num_HH=0;
  this->prevBoundaries(elements, CPU_indMat, CPU_indVec_DL,
		       CPU_type, CPU_csr_Val, CPU_csr_Col, CPU_csr_Row);
  _DEBUG("prev->coreMatrix"); 
  this->coreMatrix(elements, CPU_indMat, CPU_indVec_DL,
		   CPU_type, CPU_csr_Val, CPU_csr_Col, CPU_csr_Row);
  _DEBUG("coreMatrix->post"); 
  this->postBoundaries(elements, CPU_indMat, CPU_indVec_DL,
		       CPU_type, CPU_csr_Val, CPU_csr_Col, CPU_csr_Row);
  
  // Transferring the matrix from CPU to GPU
  CUDA_ALLOC (&(this->csr_Val), sizeof(TYPE)*this->nnz);
  CUDA_CPY(this->csr_Val, CPU_csr_Val, sizeof(TYPE)*this->nnz, stream[0]);
  CUDA_ALLOC (&(this->csr_Col_Ind), sizeof(int)*this->nnz);
  CUDA_CPY(this->csr_Col_Ind, CPU_csr_Col, sizeof(int)*this->nnz, stream[0]);
  CUDA_ALLOC (&(this->csr_Row_Ptr), sizeof(int)*(this->num_elem+1));
  CUDA_CPY(this->csr_Row_Ptr, CPU_csr_Row, sizeof(int)*(this->num_elem+1), stream[0]);

  CUDA_ALLOC (&(this->type_HH), sizeof(short)*this->num_HH);
  CUDA_CPY(this->type_HH, CPU_type, sizeof(short)*this->num_HH, stream[0]);
  CUDA_ALLOC (&(this->indMat_HH), sizeof(int)*this->num_HH);
  CUDA_CPY(this->indMat_HH, CPU_indMat, sizeof(int)*this->num_HH, stream[0]);
  CUDA_ALLOC (&(this->indVec_DL_HH), sizeof(int)*this->num_HH);
  CUDA_CPY(this->indVec_DL_HH, CPU_indVec_DL, sizeof(int)*this->num_HH, stream[0]);

  // Free allocated temporal matrix (CPU)
  free(CPU_csr_Val);
  free(CPU_csr_Col);
  free(CPU_csr_Row);
  free(CPU_indMat);
  free(CPU_indVec_DL);

  cudaDeviceSynchronize();
}


  //!\brief This function updates the electrical variables based on the previous time step information
  //  @param input_active Flag used to indicate if it has to use  the input current to update the values
void SolverImGPU::update (bool input_active) throw(){
  
  try{
    // Set GPU card to use
    cudaSetDevice (this->idGPU);
    
    if (input_active){
      input	= input_on;
    }else{
      input	= input_off;
    }

    if (num_HH > 0){  
      KERNEL(Update_HH, dimGrid_HH, dimBlock_HH, stream[0], 
     	     "%d, %p, %p, %p, %f, %p, %p, %p, %p,"
	     "%p, %p, %p, %p, %p, %p, %p, %p, %p, %p, %p",
     	     num_HH, m, h, n, dT, pot, rest_pot, indVec_HH, 
     	     G_L, E_Na, E_K, E_L, g_Na, g_K, G_Na, G_K,
	     leftShiftNa, leftShiftK, W, K);

      KERNEL(Update_Mat, dimGrid_HH, dimBlock_HH, stream[0], 
     	     "%d, %p, %p, %f, %p, %p, %p, %p, %p, %p, %p",
     	     num_HH, type_HH, csr_Val, dT, r_a, c_m, dx, W,
	     indVec_DL_HH, indVec_HH, indMat_HH);
    }

    KERNEL(Update_B, dimGrid_ALL, dimBlock_ALL, stream[0], 
	   "%d, %p, %p, %f, %p, %p, %p, %p",
	   num_elem, K, input, dT, c_m, dx, this->pot, this->pot);
  } catch (cudaException &e){
    e.print();
    throw e;
  }
}



//! \brief Calculate one iteration of explicit finite difference discretization solver
void SolverImGPU::calculate() throw(){
  /*
R0==B
csrmv (A,x)->AP;
daxpy (R0 -AP)->R0;
dot (R0, R0)->p0;
copy (R0,P)
copy (R0,R)

while (true){
=================================
|  	STREAM 0 		|
=================================
| csrmv(A,P)->AP 		|
| dot(AP,R0)->apr 		|
| alpha=p0/apr			|
| daxpy(R-alpha*AP)->R		|
| csrmv(A, R)->As  		|
| dot(As,R)->w1			|
| dot (As, As)->w2	  	|
| w=w1/w2			|
| daxpy (Xi+ alpha*P)->Xi+1	|
| daxpy (Xi+1 + w*R)->Xi+1	|
| daxpy (R-w*As)->R		| 
| dot (R, R0)->p1		|
| beta=alpha/w * p1/p0		|
| copy(P->AS)			|
| daxpy(AS-w*AP)->AS		|
| copy(R->P)			|
| daxpy(P + beta*AS)->P		|
=================================
p0=p1 (flip)
}
  */
  int i,j;
  TYPE result, *flip, limit = RANGE_ERROR*RANGE_ERROR;

  try{
    // Set GPU card to use
    cudaSetDevice (this->idGPU);

    cublasSetStream(this->cublasHandle, stream[0]);
    cusparseSetStream(this->cusparseHandle, stream[0]);

    CUDA_CPY_SYNC(this->R0, this->pot, sizeof(TYPE)*this->num_elem);
    cusparseXcsrmv(this->cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
    		   this->num_elem, this->num_elem, this->nnz, this->onePos,
    		   this->descA, this->csr_Val, this->csr_Row_Ptr, this->csr_Col_Ind,
    		   this->pot, this->zero, this->AP);
    cublasXaxpy(this->cublasHandle, this->num_elem, this->oneNeg, this->AP, 1, this->R0, 1);
    CUDA_CPY_SYNC(this->R, this->R0, sizeof(TYPE)*this->num_elem);
    CUDA_CPY_SYNC(this->P, this->R0, sizeof(TYPE)*this->num_elem);
    cublasXot(this->cublasHandle, this->num_elem, this->R0, 1, this->R0, 1, this->p0);

    for (i=0; i<MAX_ITER; i++){
      for (j=i; j<i+ITER_CHECK; j++){ 
	cusparseXcsrmv(this->cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
		       this->num_elem, this->num_elem, this->nnz, this->onePos,
		       this->descA, this->csr_Val, this->csr_Row_Ptr, this->csr_Col_Ind,
		       this->P, this->zero, this->AP);
	cublasXot(this->cublasHandle, this->num_elem, this->AP, 1, this->R0, 1, this->apr);
	KERNEL(div, dimGrid_DIV, dimBlock_DIV, stream[0], 
	       "%p, %p, %p", this->p0, this->apr, this->alpha);
	cublasXaxpy(this->cublasHandle, this->num_elem, &this->alpha[1], this->AP, 1, this->R, 1);
	cusparseXcsrmv(this->cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
		       this->num_elem, this->num_elem, this->nnz, this->onePos,
		       this->descA, this->csr_Val, this->csr_Row_Ptr, this->csr_Col_Ind,
		       this->R, this->zero, this->AS);
	cublasXot(this->cublasHandle, this->num_elem, this->AS, 1, this->R, 1, this->w1);
	cublasXot(this->cublasHandle, this->num_elem, this->AS, 1, this->AS, 1, this->w2);

	KERNEL(div, dimGrid_DIV, dimBlock_DIV, stream[0], 
	       "%p, %p, %p", this->w1, this->w2, this->w);

	cublasXaxpy(this->cublasHandle, this->num_elem, &(this->alpha[0]), this->P, 1, this->pot, 1);
	cublasXaxpy(this->cublasHandle, this->num_elem, &(this->w[0]), this->R, 1, this->pot, 1);

	cublasXaxpy(this->cublasHandle, this->num_elem, &this->w[1], this->AS, 1, this->R, 1);

	cublasXot(this->cublasHandle, this->num_elem, this->R, 1, this->R0, 1, this->p1);

	KERNEL(divMul, dimGrid_DIVMUL, dimBlock_DIVMUL, stream[0], 
	       "%p, %p, %p, %p, %p",
	       &(this->alpha[0]), &(this->w[0]), this->p1, this->p0, this->beta);
	CUDA_CPY_SYNC (this->AS, this->P, sizeof(TYPE)*this->num_elem);
	cublasXaxpy(this->cublasHandle, this->num_elem, &this->w[1], this->AP, 1, this->AS, 1);

	CUDA_CPY_SYNC (this->P, this->R, sizeof(TYPE)*this->num_elem);
	cublasXaxpy(this->cublasHandle, this->num_elem, this->beta, this->AS, 1, this->P, 1);

	flip 	= this->p0;
	this->p0= this->p1;
	this->p1= flip;
      }

      i=j-1;
      // SYNC HOST-DEVICE to check error
      cublasXot(this->cublasHandle, this->num_elem, this->R, 1, this->R,1,this->res);
      //COPY SYNC
      CUDA_CPY_SYNC(&result, this->res, sizeof(TYPE));
      if (result <= limit){
	break;
      }
    }


    if (result <= limit){
      _DEBUG("Finalize by precision at iteration %d [%.16e <= %.16e]",
	     i-1, result, RANGE_ERROR);
    } else {
      _DEBUG("Finalize by iterations [%.16e > %.16e]", result, RANGE_ERROR);
    }
    checkError(__PRETTY_FUNCTION__, __BASE_FILE__, __LINE__);

  }catch (cudaException &e){
    e.print();
    throw e;
  }
}

   //! \brief Copy the current potential to the pointer of the parameter
  //  @param potential Pointer on which the potential will be copied 
void SolverImGPU::snapshot(double *potential) throw(){
  try{
    CUDA_CPY(potential, this->pot, sizeof(TYPE) * this->num_elem, stream[0]);
  }catch (cudaException &e){
    e.print();
    throw e;
  }
}


//! \brief Free all memory used by solver
SolverImGPU::~SolverImGPU(){
  CUDA_FREE(this->csr_Val);
  CUDA_FREE(this->csr_Col_Ind);
  CUDA_FREE(this->csr_Row_Ptr);

  CUDA_FREE(this->pot);
  CUDA_FREE(this->dx);
  CUDA_FREE(this->c_m);
  CUDA_FREE(this->r_a);
  CUDA_FREE(this->K);
  CUDA_FREE(this->input_on);
  CUDA_FREE(this->input_off);

  CUDA_FREE(this->indVec_DL_HH);
  CUDA_FREE(this->indVec_HH);
  CUDA_FREE(this->indMat_HH);
  CUDA_FREE(this->type_HH);
  CUDA_FREE(this->n);
  CUDA_FREE(this->m);
  CUDA_FREE(this->h);
  CUDA_FREE(this->E_Na);
  CUDA_FREE(this->G_Na);
  CUDA_FREE(this->g_Na);
  CUDA_FREE(this->E_L);
  CUDA_FREE(this->G_L);
  CUDA_FREE(this->E_K);
  CUDA_FREE(this->G_K);
  CUDA_FREE(this->g_K);
  CUDA_FREE(this->rest_pot);
  CUDA_FREE(this->W);
}

Loading data, please wait...