SpikeGPU
1.0.0
|
spike::device contains all CUDA kernels. More...
Namespaces | |
var | |
spike::device::var contains all CUDA kernels for the variable bandwidth preconditioner. | |
Functions | |
template<typename T > | |
__global__ void | assembleReducedMat (int k, T *dWV, T *d_comp) |
template<typename T > | |
__global__ void | assembleReducedMat_g32 (int k, T *dWV, T *d_comp) |
template<typename T > | |
__global__ void | assembleReducedMat_general (int k, T *dWV, T *d_comp) |
template<typename T > | |
__global__ void | copydAtodA2 (int N, int k, T *dA, T *dA2, int num_of_rows, int partition_size, int partition_num, int rest_num) |
template<typename T > | |
__global__ void | copydAtodA2_general (int N, int k, T *dA, T *dA2, int num_of_rows, int partition_size, int partition_num, int rest_num) |
template<typename T > | |
__global__ void | copydWV_general (int k, T *dA, T *dWV, T *d_spike, int partition_size, int partition_num, int rest_num) |
template<typename T > | |
__global__ void | copydWV_g32 (int k, T *dA, T *dWV, T *d_spike, int partition_size, int partition_num, int rest_num) |
template<typename T > | |
__global__ void | copydWV (int k, T *dA, T *dWV, T *d_spike, int partition_size, int partition_num, int rest_num) |
template<typename T > | |
__global__ void | copydAtoPartialA (int N, int k, T *dA, T *dA2, T *d_partial_A, int partition_size, int partition_num, int rest_num, int num_of_rows) |
template<typename T > | |
__global__ void | copydAtoPartialA_general (int N, int k, T *dA, T *dA2, T *d_partial_A, int partition_size, int partition_num, int rest_num, int num_of_rows) |
template<typename T > | |
__global__ void | copyWVFromOrToExtendedV (int N, int k, int partition_size, int rest_num, T *dWV, T *d_eV, bool from) |
template<typename T > | |
__global__ void | copyWVFromOrToExtendedV_general (int N, int k, int partition_size, int rest_num, T *dWV, T *d_eV, bool from) |
template<typename T > | |
__global__ void | copyWVFromOrToExtendedWVTranspose_general (int row_size, int k, int rightWidth, int partition_size, int rest_num, int column_deltaW, T *dWV, T *d_eWV, bool from) |
template<typename T > | |
__global__ void | copyWVFromOrToExtendedW (int N, int k, int partition_size, int rest_num, T *dWV, T *d_eW, bool from) |
template<typename T > | |
__global__ void | copyWVFromOrToExtendedW_general (int N, int k, int partition_size, int rest_num, T *dWV, T *d_eW, bool from) |
template<typename T > | |
__global__ void | copyFromCOOMatrixToBandedMatrix (int nnz, int bandwidth, int *rows, int *cols, T *vals, T *dB) |
template<typename T > | |
__global__ void | assembleReducedMat_var_bandwidth (int *ks, int *offsets_src, int *offsets_dst, T *dWV, T *d_comp) |
template<typename T > | |
__global__ void | assembleReducedMat_var_bandwidth_g32 (int *ks, int *offsets_src, int *offsets_dst, T *dWV, T *d_comp) |
template<typename T > | |
__global__ void | assembleReducedMat_var_bandwidth_general (int *ks, int *offsets_src, int *offsets_dst, T *dWV, T *d_comp) |
template<typename T > | |
__global__ void | matrixVReordering (int k, T *WV, T *WV_spare, int *perms, int *widths) |
template<typename T > | |
__global__ void | matrixWReordering (int k, T *WV, T *WV_spare, int *perms, int *widths) |
template<typename T > | |
__global__ void | matrixVReordering_perPartition (int k, T *WV, T *WV_spare, int *perms) |
template<typename T > | |
__global__ void | matrixWReordering_perPartition (int k, T *WV, T *WV_spare, int *perms) |
template<typename T > | |
__device__ T | boostValue (const T in_val, T &out_val, const T threshold) |
template<typename T > | |
__device__ T | boostValue (const T in_val, T &out_val, const T threshold, const T new_val) |
template<typename T > | |
__global__ void | bandLU (T *dA, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | swBandLU (T *dA, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandLU_safe (T *dA, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandLU_g32 (T *dA, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandLU_g32_safe (T *dA, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandUL_g32 (T *dA, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandUL_g32_safe (T *dA, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandUL (T *dA, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | swBandUL (T *dA, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandUL_safe (T *dA, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandLU_critical_div (T *dA, int start_row, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandLU_critical_div_onePart (T *dA, int start_row, int k) |
template<typename T > | |
__global__ void | bandLU_critical_div_safe (T *dA, int start_row, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandLU_critical_div_onePart_safe (T *dA, int start_row, int k) |
template<typename T > | |
__global__ void | bandLU_critical_sub (T *dA, int start_row, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandLU_critical_sub_onePart (T *dA, int start_row, int k) |
template<typename T > | |
__global__ void | bandLU_critical_div_general (T *dA, int start_row, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandLU_critical_div_onePart_general (T *dA, int start_row, int k, int last) |
template<typename T > | |
__global__ void | bandLU_critical_div_safe_general (T *dA, int start_row, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandLU_critical_div_onePart_safe_general (T *dA, int start_row, int k, int last) |
template<typename T > | |
__global__ void | bandLU_critical_sub_general (T *dA, int start_row, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandLU_critical_sub_onePart_general (T *dA, int start_row, int k, int last) |
template<typename T > | |
__global__ void | bandUL_critical_div (T *dA, int start_row, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandUL_critical_div_safe (T *dA, int start_row, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandUL_critical_sub (T *dA, int start_row, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandUL_critical_div_general (T *dA, int start_row, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandUL_critical_div_safe_general (T *dA, int start_row, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandUL_critical_sub_general (T *dA, int start_row, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandLU_post_divide (T *dA, int k, int N) |
template<typename T > | |
__global__ void | bandLU_post_divide_general (T *dA, int k, int N) |
template<typename T > | |
__global__ void | fullLU_div (T *dA, int partition_size, int cur_row) |
template<typename T > | |
__global__ void | fullLU_div_safe (T *dA, int partition_size, int cur_row) |
template<typename T > | |
__global__ void | fullLU_div_general (T *dA, int k, int partition_size, int cur_row) |
template<typename T > | |
__global__ void | fullLU_div_safe_general (T *dA, int k, int partition_size, int cur_row) |
template<typename T > | |
__global__ void | fullLU_sub (T *dA, int partition_size, int cur_row) |
template<typename T > | |
__global__ void | fullLU_sub_general (T *dA, int k, int partition_size, int cur_row) |
template<typename T > | |
__global__ void | fullLU_sub_spec (T *dA, int partition_size, int k) |
template<typename T > | |
__global__ void | fullLU_sub_spec_general (T *dA, int partition_size, int k) |
template<typename T > | |
__global__ void | boostLastPivot (T *dA, int start_row, int k, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | innerProductBCX_g256 (T *d_spike, T *dB, T *dB_final, int N, int k, int b_partition_size, int b_partition_num, int b_rest_num) |
template<typename T > | |
__global__ void | innerProductBCX_g64 (T *d_spike, T *dB, T *dB_final, int N, int k, int b_partition_size, int b_partition_num, int b_rest_num) |
template<typename T > | |
__global__ void | innerProductBCX_g32 (T *d_spike, T *dB, T *dB_final, int N, int k, int b_partition_size, int b_partition_num, int b_rest_num) |
template<typename T > | |
__global__ void | innerProductBCX (T *d_spike, T *dB, T *dB_final, int N, int k, int b_partition_size, int b_partition_num, int b_rest_num) |
template<typename T > | |
__global__ void | innerProductBCX_var_bandwidth_g256 (T *d_spike, T *dB, T *dB_final, int N, int *ks, int *offsets, int b_partition_size, int b_partition_num, int b_rest_num) |
template<typename T > | |
__global__ void | innerProductBCX_var_bandwidth_g64 (T *d_spike, T *dB, T *dB_final, int N, int *ks, int *offsets, int b_partition_size, int b_partition_num, int b_rest_num) |
template<typename T > | |
__global__ void | innerProductBCX_var_bandwidth_g32 (T *d_spike, T *dB, T *dB_final, int N, int *ks, int *offsets, int b_partition_size, int b_partition_num, int b_rest_num) |
template<typename T > | |
__global__ void | innerProductBCX_var_bandwidth (T *d_spike, T *dB, T *dB_final, int N, int *ks, int *offsets, int b_partition_size, int b_partition_num, int b_rest_num) |
template<typename T > | |
__global__ void | permute (int N, T *ori_array, T *final_array, int *per_array) |
template<typename T > | |
__global__ void | columnPermute (int N, int g_k, T *ori_array, T *final_array, int *per_array) |
template<typename T > | |
__global__ void | forwardElimLNormal (int N, int k, int partition_size, T *dA, T *dB, int b_partition_size, int b_rest_num) |
template<typename T > | |
__global__ void | forwardElimLNormal_g512 (int N, int k, int partition_size, T *dA, T *dB, int b_partition_size, int b_rest_num) |
template<typename T > | |
__global__ void | backwardElimUNormal (int N, int k, int partition_size, T *dA, T *dB, int b_partition_size, int b_rest_num) |
template<typename T > | |
__global__ void | backwardElimUNormal_g512 (int N, int k, int partition_size, T *dA, T *dB, int b_partition_size, int b_rest_num) |
template<typename T > | |
__global__ void | forwardElimL_general (int N, int k, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | forwardElimL_g32 (int N, int k, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | forwardElimL (int N, int k, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | preBck_sol_divide (int N, int k, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bckElim_sol (int N, int k, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bckElim_sol_medium (int N, int k, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bckElim_sol_narrow (int N, int k, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | backwardElimU_general (int N, int k, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | backwardElimU_g32 (int N, int k, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | backwardElimU (int N, int k, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | backwardElimUdWV (int k, T *dA, T *dB, int partition_size, int odd, int divide) |
template<typename T > | |
__global__ void | forwardElimLdWV (int k, T *dA, T *dB, int partition_size, int odd, int divide) |
template<typename T > | |
__global__ void | forwardElimLdWV_g32 (int k, T *dA, T *dB, int partition_size, int odd, int divide) |
template<typename T > | |
__global__ void | backwardElimUdWV_g32 (int k, T *dA, T *dB, int partition_size, int odd, int divide) |
template<typename T > | |
__global__ void | forwardElimLdWV_general (int k, T *dA, T *dB, int partition_size, int odd, int divide) |
template<typename T > | |
__global__ void | backwardElimUdWV_general (int k, T *dA, T *dB, int partition_size, int odd, int divide) |
template<typename T > | |
__global__ void | forwardElimL_bottom_general (int N, int k, int delta, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | backwardElimU_bottom_general (int N, int k, int delta, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | forwardElimL_bottom_g32 (int N, int k, int delta, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | backwardElimU_bottom_g32 (int N, int k, int delta, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | forwardElimL_bottom (int N, int k, int delta, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | backwardElimU_bottom (int N, int k, int delta, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | forwardElimL_LU_UL_general (int N, int k, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | forwardElimL_LU_UL_g32 (int N, int k, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | forwardElimL_LU_UL (int N, int k, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | backwardElimU_LU_UL_general (int N, int k, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | backwardElimU_LU_UL_g32 (int N, int k, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | backwardElimU_LU_UL (int N, int k, T *dA, T *dB, int partition_size, int rest_num) |
spike::device contains all CUDA kernels.
__global__ void spike::device::assembleReducedMat | ( | int | k, |
T * | dWV, | ||
T * | d_comp | ||
) |
__global__ void spike::device::assembleReducedMat_g32 | ( | int | k, |
T * | dWV, | ||
T * | d_comp | ||
) |
__global__ void spike::device::assembleReducedMat_general | ( | int | k, |
T * | dWV, | ||
T * | d_comp | ||
) |
__global__ void spike::device::assembleReducedMat_var_bandwidth | ( | int * | ks, |
int * | offsets_src, | ||
int * | offsets_dst, | ||
T * | dWV, | ||
T * | d_comp | ||
) |
__global__ void spike::device::assembleReducedMat_var_bandwidth_g32 | ( | int * | ks, |
int * | offsets_src, | ||
int * | offsets_dst, | ||
T * | dWV, | ||
T * | d_comp | ||
) |
__global__ void spike::device::assembleReducedMat_var_bandwidth_general | ( | int * | ks, |
int * | offsets_src, | ||
int * | offsets_dst, | ||
T * | dWV, | ||
T * | d_comp | ||
) |
__global__ void spike::device::backwardElimU | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::backwardElimU_bottom | ( | int | N, |
int | k, | ||
int | delta, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::backwardElimU_bottom_g32 | ( | int | N, |
int | k, | ||
int | delta, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::backwardElimU_bottom_general | ( | int | N, |
int | k, | ||
int | delta, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::backwardElimU_g32 | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::backwardElimU_general | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::backwardElimU_LU_UL | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::backwardElimU_LU_UL_g32 | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::backwardElimU_LU_UL_general | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::backwardElimUdWV | ( | int | k, |
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | odd, | ||
int | divide | ||
) |
__global__ void spike::device::backwardElimUdWV_g32 | ( | int | k, |
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | odd, | ||
int | divide | ||
) |
__global__ void spike::device::backwardElimUdWV_general | ( | int | k, |
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | odd, | ||
int | divide | ||
) |
__global__ void spike::device::backwardElimUNormal | ( | int | N, |
int | k, | ||
int | partition_size, | ||
T * | dA, | ||
T * | dB, | ||
int | b_partition_size, | ||
int | b_rest_num | ||
) |
__global__ void spike::device::backwardElimUNormal_g512 | ( | int | N, |
int | k, | ||
int | partition_size, | ||
T * | dA, | ||
T * | dB, | ||
int | b_partition_size, | ||
int | b_rest_num | ||
) |
__global__ void spike::device::bandLU | ( | T * | dA, |
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandLU_critical_div | ( | T * | dA, |
int | start_row, | ||
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandLU_critical_div_general | ( | T * | dA, |
int | start_row, | ||
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandLU_critical_div_onePart | ( | T * | dA, |
int | start_row, | ||
int | k | ||
) |
__global__ void spike::device::bandLU_critical_div_onePart_general | ( | T * | dA, |
int | start_row, | ||
int | k, | ||
int | last | ||
) |
__global__ void spike::device::bandLU_critical_div_onePart_safe | ( | T * | dA, |
int | start_row, | ||
int | k | ||
) |
__global__ void spike::device::bandLU_critical_div_onePart_safe_general | ( | T * | dA, |
int | start_row, | ||
int | k, | ||
int | last | ||
) |
__global__ void spike::device::bandLU_critical_div_safe | ( | T * | dA, |
int | start_row, | ||
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandLU_critical_div_safe_general | ( | T * | dA, |
int | start_row, | ||
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandLU_critical_sub | ( | T * | dA, |
int | start_row, | ||
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandLU_critical_sub_general | ( | T * | dA, |
int | start_row, | ||
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandLU_critical_sub_onePart | ( | T * | dA, |
int | start_row, | ||
int | k | ||
) |
__global__ void spike::device::bandLU_critical_sub_onePart_general | ( | T * | dA, |
int | start_row, | ||
int | k, | ||
int | last | ||
) |
__global__ void spike::device::bandLU_g32 | ( | T * | dA, |
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandLU_g32_safe | ( | T * | dA, |
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandLU_post_divide | ( | T * | dA, |
int | k, | ||
int | N | ||
) |
__global__ void spike::device::bandLU_post_divide_general | ( | T * | dA, |
int | k, | ||
int | N | ||
) |
__global__ void spike::device::bandLU_safe | ( | T * | dA, |
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandUL | ( | T * | dA, |
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandUL_critical_div | ( | T * | dA, |
int | start_row, | ||
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandUL_critical_div_general | ( | T * | dA, |
int | start_row, | ||
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandUL_critical_div_safe | ( | T * | dA, |
int | start_row, | ||
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandUL_critical_div_safe_general | ( | T * | dA, |
int | start_row, | ||
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandUL_critical_sub | ( | T * | dA, |
int | start_row, | ||
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandUL_critical_sub_general | ( | T * | dA, |
int | start_row, | ||
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandUL_g32 | ( | T * | dA, |
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandUL_g32_safe | ( | T * | dA, |
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bandUL_safe | ( | T * | dA, |
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bckElim_sol | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bckElim_sol_medium | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::bckElim_sol_narrow | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::boostLastPivot | ( | T * | dA, |
int | start_row, | ||
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
|
inline |
|
inline |
__global__ void spike::device::columnPermute | ( | int | N, |
int | g_k, | ||
T * | ori_array, | ||
T * | final_array, | ||
int * | per_array | ||
) |
__global__ void spike::device::copydAtodA2 | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dA2, | ||
int | num_of_rows, | ||
int | partition_size, | ||
int | partition_num, | ||
int | rest_num | ||
) |
__global__ void spike::device::copydAtodA2_general | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dA2, | ||
int | num_of_rows, | ||
int | partition_size, | ||
int | partition_num, | ||
int | rest_num | ||
) |
__global__ void spike::device::copydAtoPartialA | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dA2, | ||
T * | d_partial_A, | ||
int | partition_size, | ||
int | partition_num, | ||
int | rest_num, | ||
int | num_of_rows | ||
) |
__global__ void spike::device::copydAtoPartialA_general | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dA2, | ||
T * | d_partial_A, | ||
int | partition_size, | ||
int | partition_num, | ||
int | rest_num, | ||
int | num_of_rows | ||
) |
__global__ void spike::device::copydWV | ( | int | k, |
T * | dA, | ||
T * | dWV, | ||
T * | d_spike, | ||
int | partition_size, | ||
int | partition_num, | ||
int | rest_num | ||
) |
__global__ void spike::device::copydWV_g32 | ( | int | k, |
T * | dA, | ||
T * | dWV, | ||
T * | d_spike, | ||
int | partition_size, | ||
int | partition_num, | ||
int | rest_num | ||
) |
__global__ void spike::device::copydWV_general | ( | int | k, |
T * | dA, | ||
T * | dWV, | ||
T * | d_spike, | ||
int | partition_size, | ||
int | partition_num, | ||
int | rest_num | ||
) |
__global__ void spike::device::copyFromCOOMatrixToBandedMatrix | ( | int | nnz, |
int | bandwidth, | ||
int * | rows, | ||
int * | cols, | ||
T * | vals, | ||
T * | dB | ||
) |
__global__ void spike::device::copyWVFromOrToExtendedV | ( | int | N, |
int | k, | ||
int | partition_size, | ||
int | rest_num, | ||
T * | dWV, | ||
T * | d_eV, | ||
bool | from | ||
) |
__global__ void spike::device::copyWVFromOrToExtendedV_general | ( | int | N, |
int | k, | ||
int | partition_size, | ||
int | rest_num, | ||
T * | dWV, | ||
T * | d_eV, | ||
bool | from | ||
) |
__global__ void spike::device::copyWVFromOrToExtendedW | ( | int | N, |
int | k, | ||
int | partition_size, | ||
int | rest_num, | ||
T * | dWV, | ||
T * | d_eW, | ||
bool | from | ||
) |
__global__ void spike::device::copyWVFromOrToExtendedW_general | ( | int | N, |
int | k, | ||
int | partition_size, | ||
int | rest_num, | ||
T * | dWV, | ||
T * | d_eW, | ||
bool | from | ||
) |
__global__ void spike::device::copyWVFromOrToExtendedWVTranspose_general | ( | int | row_size, |
int | k, | ||
int | rightWidth, | ||
int | partition_size, | ||
int | rest_num, | ||
int | column_deltaW, | ||
T * | dWV, | ||
T * | d_eWV, | ||
bool | from | ||
) |
__global__ void spike::device::forwardElimL | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::forwardElimL_bottom | ( | int | N, |
int | k, | ||
int | delta, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::forwardElimL_bottom_g32 | ( | int | N, |
int | k, | ||
int | delta, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::forwardElimL_bottom_general | ( | int | N, |
int | k, | ||
int | delta, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::forwardElimL_g32 | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::forwardElimL_general | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::forwardElimL_LU_UL | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::forwardElimL_LU_UL_g32 | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::forwardElimL_LU_UL_general | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::forwardElimLdWV | ( | int | k, |
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | odd, | ||
int | divide | ||
) |
__global__ void spike::device::forwardElimLdWV_g32 | ( | int | k, |
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | odd, | ||
int | divide | ||
) |
__global__ void spike::device::forwardElimLdWV_general | ( | int | k, |
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | odd, | ||
int | divide | ||
) |
__global__ void spike::device::forwardElimLNormal | ( | int | N, |
int | k, | ||
int | partition_size, | ||
T * | dA, | ||
T * | dB, | ||
int | b_partition_size, | ||
int | b_rest_num | ||
) |
__global__ void spike::device::forwardElimLNormal_g512 | ( | int | N, |
int | k, | ||
int | partition_size, | ||
T * | dA, | ||
T * | dB, | ||
int | b_partition_size, | ||
int | b_rest_num | ||
) |
__global__ void spike::device::fullLU_div | ( | T * | dA, |
int | partition_size, | ||
int | cur_row | ||
) |
__global__ void spike::device::fullLU_div_general | ( | T * | dA, |
int | k, | ||
int | partition_size, | ||
int | cur_row | ||
) |
__global__ void spike::device::fullLU_div_safe | ( | T * | dA, |
int | partition_size, | ||
int | cur_row | ||
) |
__global__ void spike::device::fullLU_div_safe_general | ( | T * | dA, |
int | k, | ||
int | partition_size, | ||
int | cur_row | ||
) |
__global__ void spike::device::fullLU_sub | ( | T * | dA, |
int | partition_size, | ||
int | cur_row | ||
) |
__global__ void spike::device::fullLU_sub_general | ( | T * | dA, |
int | k, | ||
int | partition_size, | ||
int | cur_row | ||
) |
__global__ void spike::device::fullLU_sub_spec | ( | T * | dA, |
int | partition_size, | ||
int | k | ||
) |
__global__ void spike::device::fullLU_sub_spec_general | ( | T * | dA, |
int | partition_size, | ||
int | k | ||
) |
__global__ void spike::device::innerProductBCX | ( | T * | d_spike, |
T * | dB, | ||
T * | dB_final, | ||
int | N, | ||
int | k, | ||
int | b_partition_size, | ||
int | b_partition_num, | ||
int | b_rest_num | ||
) |
__global__ void spike::device::innerProductBCX_g256 | ( | T * | d_spike, |
T * | dB, | ||
T * | dB_final, | ||
int | N, | ||
int | k, | ||
int | b_partition_size, | ||
int | b_partition_num, | ||
int | b_rest_num | ||
) |
__global__ void spike::device::innerProductBCX_g32 | ( | T * | d_spike, |
T * | dB, | ||
T * | dB_final, | ||
int | N, | ||
int | k, | ||
int | b_partition_size, | ||
int | b_partition_num, | ||
int | b_rest_num | ||
) |
__global__ void spike::device::innerProductBCX_g64 | ( | T * | d_spike, |
T * | dB, | ||
T * | dB_final, | ||
int | N, | ||
int | k, | ||
int | b_partition_size, | ||
int | b_partition_num, | ||
int | b_rest_num | ||
) |
__global__ void spike::device::innerProductBCX_var_bandwidth | ( | T * | d_spike, |
T * | dB, | ||
T * | dB_final, | ||
int | N, | ||
int * | ks, | ||
int * | offsets, | ||
int | b_partition_size, | ||
int | b_partition_num, | ||
int | b_rest_num | ||
) |
__global__ void spike::device::innerProductBCX_var_bandwidth_g256 | ( | T * | d_spike, |
T * | dB, | ||
T * | dB_final, | ||
int | N, | ||
int * | ks, | ||
int * | offsets, | ||
int | b_partition_size, | ||
int | b_partition_num, | ||
int | b_rest_num | ||
) |
__global__ void spike::device::innerProductBCX_var_bandwidth_g32 | ( | T * | d_spike, |
T * | dB, | ||
T * | dB_final, | ||
int | N, | ||
int * | ks, | ||
int * | offsets, | ||
int | b_partition_size, | ||
int | b_partition_num, | ||
int | b_rest_num | ||
) |
__global__ void spike::device::innerProductBCX_var_bandwidth_g64 | ( | T * | d_spike, |
T * | dB, | ||
T * | dB_final, | ||
int | N, | ||
int * | ks, | ||
int * | offsets, | ||
int | b_partition_size, | ||
int | b_partition_num, | ||
int | b_rest_num | ||
) |
__global__ void spike::device::matrixVReordering | ( | int | k, |
T * | WV, | ||
T * | WV_spare, | ||
int * | perms, | ||
int * | widths | ||
) |
__global__ void spike::device::matrixVReordering_perPartition | ( | int | k, |
T * | WV, | ||
T * | WV_spare, | ||
int * | perms | ||
) |
__global__ void spike::device::matrixWReordering | ( | int | k, |
T * | WV, | ||
T * | WV_spare, | ||
int * | perms, | ||
int * | widths | ||
) |
__global__ void spike::device::matrixWReordering_perPartition | ( | int | k, |
T * | WV, | ||
T * | WV_spare, | ||
int * | perms | ||
) |
__global__ void spike::device::permute | ( | int | N, |
T * | ori_array, | ||
T * | final_array, | ||
int * | per_array | ||
) |
__global__ void spike::device::preBck_sol_divide | ( | int | N, |
int | k, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::swBandLU | ( | T * | dA, |
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::swBandUL | ( | T * | dA, |
int | k, | ||
int | partition_size, | ||
int | rest_num | ||
) |