SpikeGPU  1.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
spike::device Namespace Reference

spike::device contains all CUDA kernels. More...

Namespaces

 var
 spike::device::var contains all CUDA kernels for the variable bandwidth preconditioner.
 

Functions

template<typename T >
__global__ void assembleReducedMat (int k, T *dWV, T *d_comp)
 
template<typename T >
__global__ void assembleReducedMat_g32 (int k, T *dWV, T *d_comp)
 
template<typename T >
__global__ void assembleReducedMat_general (int k, T *dWV, T *d_comp)
 
template<typename T >
__global__ void copydAtodA2 (int N, int k, T *dA, T *dA2, int num_of_rows, int partition_size, int partition_num, int rest_num)
 
template<typename T >
__global__ void copydAtodA2_general (int N, int k, T *dA, T *dA2, int num_of_rows, int partition_size, int partition_num, int rest_num)
 
template<typename T >
__global__ void copydWV_general (int k, T *dA, T *dWV, T *d_spike, int partition_size, int partition_num, int rest_num)
 
template<typename T >
__global__ void copydWV_g32 (int k, T *dA, T *dWV, T *d_spike, int partition_size, int partition_num, int rest_num)
 
template<typename T >
__global__ void copydWV (int k, T *dA, T *dWV, T *d_spike, int partition_size, int partition_num, int rest_num)
 
template<typename T >
__global__ void copydAtoPartialA (int N, int k, T *dA, T *dA2, T *d_partial_A, int partition_size, int partition_num, int rest_num, int num_of_rows)
 
template<typename T >
__global__ void copydAtoPartialA_general (int N, int k, T *dA, T *dA2, T *d_partial_A, int partition_size, int partition_num, int rest_num, int num_of_rows)
 
template<typename T >
__global__ void copyWVFromOrToExtendedV (int N, int k, int partition_size, int rest_num, T *dWV, T *d_eV, bool from)
 
template<typename T >
__global__ void copyWVFromOrToExtendedV_general (int N, int k, int partition_size, int rest_num, T *dWV, T *d_eV, bool from)
 
template<typename T >
__global__ void copyWVFromOrToExtendedWVTranspose_general (int row_size, int k, int rightWidth, int partition_size, int rest_num, int column_deltaW, T *dWV, T *d_eWV, bool from)
 
template<typename T >
__global__ void copyWVFromOrToExtendedW (int N, int k, int partition_size, int rest_num, T *dWV, T *d_eW, bool from)
 
template<typename T >
__global__ void copyWVFromOrToExtendedW_general (int N, int k, int partition_size, int rest_num, T *dWV, T *d_eW, bool from)
 
template<typename T >
__global__ void copyFromCOOMatrixToBandedMatrix (int nnz, int bandwidth, int *rows, int *cols, T *vals, T *dB)
 
template<typename T >
__global__ void assembleReducedMat_var_bandwidth (int *ks, int *offsets_src, int *offsets_dst, T *dWV, T *d_comp)
 
template<typename T >
__global__ void assembleReducedMat_var_bandwidth_g32 (int *ks, int *offsets_src, int *offsets_dst, T *dWV, T *d_comp)
 
template<typename T >
__global__ void assembleReducedMat_var_bandwidth_general (int *ks, int *offsets_src, int *offsets_dst, T *dWV, T *d_comp)
 
template<typename T >
__global__ void matrixVReordering (int k, T *WV, T *WV_spare, int *perms, int *widths)
 
template<typename T >
__global__ void matrixWReordering (int k, T *WV, T *WV_spare, int *perms, int *widths)
 
template<typename T >
__global__ void matrixVReordering_perPartition (int k, T *WV, T *WV_spare, int *perms)
 
template<typename T >
__global__ void matrixWReordering_perPartition (int k, T *WV, T *WV_spare, int *perms)
 
template<typename T >
__device__ T boostValue (const T in_val, T &out_val, const T threshold)
 
template<typename T >
__device__ T boostValue (const T in_val, T &out_val, const T threshold, const T new_val)
 
template<typename T >
__global__ void bandLU (T *dA, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void swBandLU (T *dA, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandLU_safe (T *dA, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandLU_g32 (T *dA, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandLU_g32_safe (T *dA, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandUL_g32 (T *dA, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandUL_g32_safe (T *dA, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandUL (T *dA, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void swBandUL (T *dA, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandUL_safe (T *dA, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandLU_critical_div (T *dA, int start_row, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandLU_critical_div_onePart (T *dA, int start_row, int k)
 
template<typename T >
__global__ void bandLU_critical_div_safe (T *dA, int start_row, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandLU_critical_div_onePart_safe (T *dA, int start_row, int k)
 
template<typename T >
__global__ void bandLU_critical_sub (T *dA, int start_row, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandLU_critical_sub_onePart (T *dA, int start_row, int k)
 
template<typename T >
__global__ void bandLU_critical_div_general (T *dA, int start_row, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandLU_critical_div_onePart_general (T *dA, int start_row, int k, int last)
 
template<typename T >
__global__ void bandLU_critical_div_safe_general (T *dA, int start_row, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandLU_critical_div_onePart_safe_general (T *dA, int start_row, int k, int last)
 
template<typename T >
__global__ void bandLU_critical_sub_general (T *dA, int start_row, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandLU_critical_sub_onePart_general (T *dA, int start_row, int k, int last)
 
template<typename T >
__global__ void bandUL_critical_div (T *dA, int start_row, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandUL_critical_div_safe (T *dA, int start_row, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandUL_critical_sub (T *dA, int start_row, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandUL_critical_div_general (T *dA, int start_row, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandUL_critical_div_safe_general (T *dA, int start_row, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandUL_critical_sub_general (T *dA, int start_row, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void bandLU_post_divide (T *dA, int k, int N)
 
template<typename T >
__global__ void bandLU_post_divide_general (T *dA, int k, int N)
 
template<typename T >
__global__ void fullLU_div (T *dA, int partition_size, int cur_row)
 
template<typename T >
__global__ void fullLU_div_safe (T *dA, int partition_size, int cur_row)
 
template<typename T >
__global__ void fullLU_div_general (T *dA, int k, int partition_size, int cur_row)
 
template<typename T >
__global__ void fullLU_div_safe_general (T *dA, int k, int partition_size, int cur_row)
 
template<typename T >
__global__ void fullLU_sub (T *dA, int partition_size, int cur_row)
 
template<typename T >
__global__ void fullLU_sub_general (T *dA, int k, int partition_size, int cur_row)
 
template<typename T >
__global__ void fullLU_sub_spec (T *dA, int partition_size, int k)
 
template<typename T >
__global__ void fullLU_sub_spec_general (T *dA, int partition_size, int k)
 
template<typename T >
__global__ void boostLastPivot (T *dA, int start_row, int k, int partition_size, int rest_num)
 
template<typename T >
__global__ void innerProductBCX_g256 (T *d_spike, T *dB, T *dB_final, int N, int k, int b_partition_size, int b_partition_num, int b_rest_num)
 
template<typename T >
__global__ void innerProductBCX_g64 (T *d_spike, T *dB, T *dB_final, int N, int k, int b_partition_size, int b_partition_num, int b_rest_num)
 
template<typename T >
__global__ void innerProductBCX_g32 (T *d_spike, T *dB, T *dB_final, int N, int k, int b_partition_size, int b_partition_num, int b_rest_num)
 
template<typename T >
__global__ void innerProductBCX (T *d_spike, T *dB, T *dB_final, int N, int k, int b_partition_size, int b_partition_num, int b_rest_num)
 
template<typename T >
__global__ void innerProductBCX_var_bandwidth_g256 (T *d_spike, T *dB, T *dB_final, int N, int *ks, int *offsets, int b_partition_size, int b_partition_num, int b_rest_num)
 
template<typename T >
__global__ void innerProductBCX_var_bandwidth_g64 (T *d_spike, T *dB, T *dB_final, int N, int *ks, int *offsets, int b_partition_size, int b_partition_num, int b_rest_num)
 
template<typename T >
__global__ void innerProductBCX_var_bandwidth_g32 (T *d_spike, T *dB, T *dB_final, int N, int *ks, int *offsets, int b_partition_size, int b_partition_num, int b_rest_num)
 
template<typename T >
__global__ void innerProductBCX_var_bandwidth (T *d_spike, T *dB, T *dB_final, int N, int *ks, int *offsets, int b_partition_size, int b_partition_num, int b_rest_num)
 
template<typename T >
__global__ void permute (int N, T *ori_array, T *final_array, int *per_array)
 
template<typename T >
__global__ void columnPermute (int N, int g_k, T *ori_array, T *final_array, int *per_array)
 
template<typename T >
__global__ void forwardElimLNormal (int N, int k, int partition_size, T *dA, T *dB, int b_partition_size, int b_rest_num)
 
template<typename T >
__global__ void forwardElimLNormal_g512 (int N, int k, int partition_size, T *dA, T *dB, int b_partition_size, int b_rest_num)
 
template<typename T >
__global__ void backwardElimUNormal (int N, int k, int partition_size, T *dA, T *dB, int b_partition_size, int b_rest_num)
 
template<typename T >
__global__ void backwardElimUNormal_g512 (int N, int k, int partition_size, T *dA, T *dB, int b_partition_size, int b_rest_num)
 
template<typename T >
__global__ void forwardElimL_general (int N, int k, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void forwardElimL_g32 (int N, int k, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void forwardElimL (int N, int k, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void preBck_sol_divide (int N, int k, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void bckElim_sol (int N, int k, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void bckElim_sol_medium (int N, int k, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void bckElim_sol_narrow (int N, int k, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void backwardElimU_general (int N, int k, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void backwardElimU_g32 (int N, int k, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void backwardElimU (int N, int k, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void backwardElimUdWV (int k, T *dA, T *dB, int partition_size, int odd, int divide)
 
template<typename T >
__global__ void forwardElimLdWV (int k, T *dA, T *dB, int partition_size, int odd, int divide)
 
template<typename T >
__global__ void forwardElimLdWV_g32 (int k, T *dA, T *dB, int partition_size, int odd, int divide)
 
template<typename T >
__global__ void backwardElimUdWV_g32 (int k, T *dA, T *dB, int partition_size, int odd, int divide)
 
template<typename T >
__global__ void forwardElimLdWV_general (int k, T *dA, T *dB, int partition_size, int odd, int divide)
 
template<typename T >
__global__ void backwardElimUdWV_general (int k, T *dA, T *dB, int partition_size, int odd, int divide)
 
template<typename T >
__global__ void forwardElimL_bottom_general (int N, int k, int delta, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void backwardElimU_bottom_general (int N, int k, int delta, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void forwardElimL_bottom_g32 (int N, int k, int delta, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void backwardElimU_bottom_g32 (int N, int k, int delta, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void forwardElimL_bottom (int N, int k, int delta, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void backwardElimU_bottom (int N, int k, int delta, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void forwardElimL_LU_UL_general (int N, int k, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void forwardElimL_LU_UL_g32 (int N, int k, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void forwardElimL_LU_UL (int N, int k, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void backwardElimU_LU_UL_general (int N, int k, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void backwardElimU_LU_UL_g32 (int N, int k, T *dA, T *dB, int partition_size, int rest_num)
 
template<typename T >
__global__ void backwardElimU_LU_UL (int N, int k, T *dA, T *dB, int partition_size, int rest_num)
 

Detailed Description

spike::device contains all CUDA kernels.

Function Documentation

template<typename T >
__global__ void spike::device::assembleReducedMat ( int  k,
T *  dWV,
T *  d_comp 
)
template<typename T >
__global__ void spike::device::assembleReducedMat_g32 ( int  k,
T *  dWV,
T *  d_comp 
)
template<typename T >
__global__ void spike::device::assembleReducedMat_general ( int  k,
T *  dWV,
T *  d_comp 
)
template<typename T >
__global__ void spike::device::assembleReducedMat_var_bandwidth ( int *  ks,
int *  offsets_src,
int *  offsets_dst,
T *  dWV,
T *  d_comp 
)
template<typename T >
__global__ void spike::device::assembleReducedMat_var_bandwidth_g32 ( int *  ks,
int *  offsets_src,
int *  offsets_dst,
T *  dWV,
T *  d_comp 
)
template<typename T >
__global__ void spike::device::assembleReducedMat_var_bandwidth_general ( int *  ks,
int *  offsets_src,
int *  offsets_dst,
T *  dWV,
T *  d_comp 
)
template<typename T >
__global__ void spike::device::backwardElimU ( int  N,
int  k,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::backwardElimU_bottom ( int  N,
int  k,
int  delta,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::backwardElimU_bottom_g32 ( int  N,
int  k,
int  delta,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::backwardElimU_bottom_general ( int  N,
int  k,
int  delta,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::backwardElimU_g32 ( int  N,
int  k,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::backwardElimU_general ( int  N,
int  k,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::backwardElimU_LU_UL ( int  N,
int  k,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::backwardElimU_LU_UL_g32 ( int  N,
int  k,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::backwardElimU_LU_UL_general ( int  N,
int  k,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::backwardElimUdWV ( int  k,
T *  dA,
T *  dB,
int  partition_size,
int  odd,
int  divide 
)
template<typename T >
__global__ void spike::device::backwardElimUdWV_g32 ( int  k,
T *  dA,
T *  dB,
int  partition_size,
int  odd,
int  divide 
)
template<typename T >
__global__ void spike::device::backwardElimUdWV_general ( int  k,
T *  dA,
T *  dB,
int  partition_size,
int  odd,
int  divide 
)
template<typename T >
__global__ void spike::device::backwardElimUNormal ( int  N,
int  k,
int  partition_size,
T *  dA,
T *  dB,
int  b_partition_size,
int  b_rest_num 
)
template<typename T >
__global__ void spike::device::backwardElimUNormal_g512 ( int  N,
int  k,
int  partition_size,
T *  dA,
T *  dB,
int  b_partition_size,
int  b_rest_num 
)
template<typename T >
__global__ void spike::device::bandLU ( T *  dA,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandLU_critical_div ( T *  dA,
int  start_row,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandLU_critical_div_general ( T *  dA,
int  start_row,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandLU_critical_div_onePart ( T *  dA,
int  start_row,
int  k 
)
template<typename T >
__global__ void spike::device::bandLU_critical_div_onePart_general ( T *  dA,
int  start_row,
int  k,
int  last 
)
template<typename T >
__global__ void spike::device::bandLU_critical_div_onePart_safe ( T *  dA,
int  start_row,
int  k 
)
template<typename T >
__global__ void spike::device::bandLU_critical_div_onePart_safe_general ( T *  dA,
int  start_row,
int  k,
int  last 
)
template<typename T >
__global__ void spike::device::bandLU_critical_div_safe ( T *  dA,
int  start_row,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandLU_critical_div_safe_general ( T *  dA,
int  start_row,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandLU_critical_sub ( T *  dA,
int  start_row,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandLU_critical_sub_general ( T *  dA,
int  start_row,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandLU_critical_sub_onePart ( T *  dA,
int  start_row,
int  k 
)
template<typename T >
__global__ void spike::device::bandLU_critical_sub_onePart_general ( T *  dA,
int  start_row,
int  k,
int  last 
)
template<typename T >
__global__ void spike::device::bandLU_g32 ( T *  dA,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandLU_g32_safe ( T *  dA,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandLU_post_divide ( T *  dA,
int  k,
int  N 
)
template<typename T >
__global__ void spike::device::bandLU_post_divide_general ( T *  dA,
int  k,
int  N 
)
template<typename T >
__global__ void spike::device::bandLU_safe ( T *  dA,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandUL ( T *  dA,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandUL_critical_div ( T *  dA,
int  start_row,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandUL_critical_div_general ( T *  dA,
int  start_row,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandUL_critical_div_safe ( T *  dA,
int  start_row,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandUL_critical_div_safe_general ( T *  dA,
int  start_row,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandUL_critical_sub ( T *  dA,
int  start_row,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandUL_critical_sub_general ( T *  dA,
int  start_row,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandUL_g32 ( T *  dA,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandUL_g32_safe ( T *  dA,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bandUL_safe ( T *  dA,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bckElim_sol ( int  N,
int  k,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bckElim_sol_medium ( int  N,
int  k,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::bckElim_sol_narrow ( int  N,
int  k,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::boostLastPivot ( T *  dA,
int  start_row,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__device__ T spike::device::boostValue ( const T  in_val,
T &  out_val,
const T  threshold 
)
inline
template<typename T >
__device__ T spike::device::boostValue ( const T  in_val,
T &  out_val,
const T  threshold,
const T  new_val 
)
inline
template<typename T >
__global__ void spike::device::columnPermute ( int  N,
int  g_k,
T *  ori_array,
T *  final_array,
int *  per_array 
)
template<typename T >
__global__ void spike::device::copydAtodA2 ( int  N,
int  k,
T *  dA,
T *  dA2,
int  num_of_rows,
int  partition_size,
int  partition_num,
int  rest_num 
)
template<typename T >
__global__ void spike::device::copydAtodA2_general ( int  N,
int  k,
T *  dA,
T *  dA2,
int  num_of_rows,
int  partition_size,
int  partition_num,
int  rest_num 
)
template<typename T >
__global__ void spike::device::copydAtoPartialA ( int  N,
int  k,
T *  dA,
T *  dA2,
T *  d_partial_A,
int  partition_size,
int  partition_num,
int  rest_num,
int  num_of_rows 
)
template<typename T >
__global__ void spike::device::copydAtoPartialA_general ( int  N,
int  k,
T *  dA,
T *  dA2,
T *  d_partial_A,
int  partition_size,
int  partition_num,
int  rest_num,
int  num_of_rows 
)
template<typename T >
__global__ void spike::device::copydWV ( int  k,
T *  dA,
T *  dWV,
T *  d_spike,
int  partition_size,
int  partition_num,
int  rest_num 
)
template<typename T >
__global__ void spike::device::copydWV_g32 ( int  k,
T *  dA,
T *  dWV,
T *  d_spike,
int  partition_size,
int  partition_num,
int  rest_num 
)
template<typename T >
__global__ void spike::device::copydWV_general ( int  k,
T *  dA,
T *  dWV,
T *  d_spike,
int  partition_size,
int  partition_num,
int  rest_num 
)
template<typename T >
__global__ void spike::device::copyFromCOOMatrixToBandedMatrix ( int  nnz,
int  bandwidth,
int *  rows,
int *  cols,
T *  vals,
T *  dB 
)
template<typename T >
__global__ void spike::device::copyWVFromOrToExtendedV ( int  N,
int  k,
int  partition_size,
int  rest_num,
T *  dWV,
T *  d_eV,
bool  from 
)
template<typename T >
__global__ void spike::device::copyWVFromOrToExtendedV_general ( int  N,
int  k,
int  partition_size,
int  rest_num,
T *  dWV,
T *  d_eV,
bool  from 
)
template<typename T >
__global__ void spike::device::copyWVFromOrToExtendedW ( int  N,
int  k,
int  partition_size,
int  rest_num,
T *  dWV,
T *  d_eW,
bool  from 
)
template<typename T >
__global__ void spike::device::copyWVFromOrToExtendedW_general ( int  N,
int  k,
int  partition_size,
int  rest_num,
T *  dWV,
T *  d_eW,
bool  from 
)
template<typename T >
__global__ void spike::device::copyWVFromOrToExtendedWVTranspose_general ( int  row_size,
int  k,
int  rightWidth,
int  partition_size,
int  rest_num,
int  column_deltaW,
T *  dWV,
T *  d_eWV,
bool  from 
)
template<typename T >
__global__ void spike::device::forwardElimL ( int  N,
int  k,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::forwardElimL_bottom ( int  N,
int  k,
int  delta,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::forwardElimL_bottom_g32 ( int  N,
int  k,
int  delta,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::forwardElimL_bottom_general ( int  N,
int  k,
int  delta,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::forwardElimL_g32 ( int  N,
int  k,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::forwardElimL_general ( int  N,
int  k,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::forwardElimL_LU_UL ( int  N,
int  k,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::forwardElimL_LU_UL_g32 ( int  N,
int  k,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::forwardElimL_LU_UL_general ( int  N,
int  k,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::forwardElimLdWV ( int  k,
T *  dA,
T *  dB,
int  partition_size,
int  odd,
int  divide 
)
template<typename T >
__global__ void spike::device::forwardElimLdWV_g32 ( int  k,
T *  dA,
T *  dB,
int  partition_size,
int  odd,
int  divide 
)
template<typename T >
__global__ void spike::device::forwardElimLdWV_general ( int  k,
T *  dA,
T *  dB,
int  partition_size,
int  odd,
int  divide 
)
template<typename T >
__global__ void spike::device::forwardElimLNormal ( int  N,
int  k,
int  partition_size,
T *  dA,
T *  dB,
int  b_partition_size,
int  b_rest_num 
)
template<typename T >
__global__ void spike::device::forwardElimLNormal_g512 ( int  N,
int  k,
int  partition_size,
T *  dA,
T *  dB,
int  b_partition_size,
int  b_rest_num 
)
template<typename T >
__global__ void spike::device::fullLU_div ( T *  dA,
int  partition_size,
int  cur_row 
)
template<typename T >
__global__ void spike::device::fullLU_div_general ( T *  dA,
int  k,
int  partition_size,
int  cur_row 
)
template<typename T >
__global__ void spike::device::fullLU_div_safe ( T *  dA,
int  partition_size,
int  cur_row 
)
template<typename T >
__global__ void spike::device::fullLU_div_safe_general ( T *  dA,
int  k,
int  partition_size,
int  cur_row 
)
template<typename T >
__global__ void spike::device::fullLU_sub ( T *  dA,
int  partition_size,
int  cur_row 
)
template<typename T >
__global__ void spike::device::fullLU_sub_general ( T *  dA,
int  k,
int  partition_size,
int  cur_row 
)
template<typename T >
__global__ void spike::device::fullLU_sub_spec ( T *  dA,
int  partition_size,
int  k 
)
template<typename T >
__global__ void spike::device::fullLU_sub_spec_general ( T *  dA,
int  partition_size,
int  k 
)
template<typename T >
__global__ void spike::device::innerProductBCX ( T *  d_spike,
T *  dB,
T *  dB_final,
int  N,
int  k,
int  b_partition_size,
int  b_partition_num,
int  b_rest_num 
)
template<typename T >
__global__ void spike::device::innerProductBCX_g256 ( T *  d_spike,
T *  dB,
T *  dB_final,
int  N,
int  k,
int  b_partition_size,
int  b_partition_num,
int  b_rest_num 
)
template<typename T >
__global__ void spike::device::innerProductBCX_g32 ( T *  d_spike,
T *  dB,
T *  dB_final,
int  N,
int  k,
int  b_partition_size,
int  b_partition_num,
int  b_rest_num 
)
template<typename T >
__global__ void spike::device::innerProductBCX_g64 ( T *  d_spike,
T *  dB,
T *  dB_final,
int  N,
int  k,
int  b_partition_size,
int  b_partition_num,
int  b_rest_num 
)
template<typename T >
__global__ void spike::device::innerProductBCX_var_bandwidth ( T *  d_spike,
T *  dB,
T *  dB_final,
int  N,
int *  ks,
int *  offsets,
int  b_partition_size,
int  b_partition_num,
int  b_rest_num 
)
template<typename T >
__global__ void spike::device::innerProductBCX_var_bandwidth_g256 ( T *  d_spike,
T *  dB,
T *  dB_final,
int  N,
int *  ks,
int *  offsets,
int  b_partition_size,
int  b_partition_num,
int  b_rest_num 
)
template<typename T >
__global__ void spike::device::innerProductBCX_var_bandwidth_g32 ( T *  d_spike,
T *  dB,
T *  dB_final,
int  N,
int *  ks,
int *  offsets,
int  b_partition_size,
int  b_partition_num,
int  b_rest_num 
)
template<typename T >
__global__ void spike::device::innerProductBCX_var_bandwidth_g64 ( T *  d_spike,
T *  dB,
T *  dB_final,
int  N,
int *  ks,
int *  offsets,
int  b_partition_size,
int  b_partition_num,
int  b_rest_num 
)
template<typename T >
__global__ void spike::device::matrixVReordering ( int  k,
T *  WV,
T *  WV_spare,
int *  perms,
int *  widths 
)
template<typename T >
__global__ void spike::device::matrixVReordering_perPartition ( int  k,
T *  WV,
T *  WV_spare,
int *  perms 
)
template<typename T >
__global__ void spike::device::matrixWReordering ( int  k,
T *  WV,
T *  WV_spare,
int *  perms,
int *  widths 
)
template<typename T >
__global__ void spike::device::matrixWReordering_perPartition ( int  k,
T *  WV,
T *  WV_spare,
int *  perms 
)
template<typename T >
__global__ void spike::device::permute ( int  N,
T *  ori_array,
T *  final_array,
int *  per_array 
)
template<typename T >
__global__ void spike::device::preBck_sol_divide ( int  N,
int  k,
T *  dA,
T *  dB,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::swBandLU ( T *  dA,
int  k,
int  partition_size,
int  rest_num 
)
template<typename T >
__global__ void spike::device::swBandUL ( T *  dA,
int  k,
int  partition_size,
int  rest_num 
)