SpikeGPU
1.0.0
|
spike::device::var contains all CUDA kernels for the variable bandwidth preconditioner. More...
Functions | |
template<typename T > | |
__global__ void | bandLU (T *dA, int *ks, int *offsets, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandLU_safe (T *dA, int *ks, int *offsets, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandLU_g32 (T *dA, int *ks, int *offsets, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandLU_g32_safe (T *dA, int *ks, int *offsets, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandLU_critical_div_general (T *dA, int start_row, int *ks, int *offsets, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandLU_critical_div_safe_general (T *dA, int start_row, int *ks, int *offsets, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bandLU_critical_sub_general (T *dA, int start_row, int *ks, int *offsets, int partition_size, int rest_num, int last) |
template<typename T > | |
__global__ void | bandLU_post_divide_per_partition (T *dA, int k, int offset, int partSize) |
template<typename T > | |
__global__ void | bandLU_post_divide_per_partition_general (T *dA, int k, int offset, int partSize) |
template<typename T > | |
__global__ void | fullLU_div (T *dA, int *ks, int *offsets, int cur_row) |
template<typename T > | |
__global__ void | fullLU_div_safe (T *dA, int *ks, int *offsets, int cur_row) |
template<typename T > | |
__global__ void | fullLU_div_general (T *dA, int *ks, int *offsets, int cur_row) |
template<typename T > | |
__global__ void | fullLU_div_safe_general (T *dA, int *ks, int *offsets, int cur_row) |
template<typename T > | |
__global__ void | fullLU_sub (T *dA, int *ks, int *offsets, int cur_row) |
template<typename T > | |
__global__ void | fullLU_sub_general (T *dA, int *ks, int *offsets, int cur_row) |
template<typename T > | |
__global__ void | fullLU_sub_spec (T *dA, int *ks, int *offsets) |
template<typename T > | |
__global__ void | fullLU_sub_spec_general (T *dA, int *ks, int *offsets) |
template<typename T > | |
__global__ void | fullLU_post_divide (T *dA, int *ks, int *offsets) |
template<typename T > | |
__global__ void | fullLU_post_divide_general (T *dA, int *ks, int *offsets) |
template<typename T > | |
__global__ void | boostLastPivot (T *dA, int start_row, int *ks, int *offsets, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | fwdElim_full_narrow (int N, int *ks, int *offsets, T *dA, T *dB, int b_partition_size, int b_rest_num) |
template<typename T > | |
__global__ void | fwdElim_full (int N, int *ks, int *offsets, T *dA, T *dB, int b_partition_size, int b_rest_num) |
template<typename T > | |
__global__ void | preBck_full_divide_narrow (int N, int *ks, int *offsets, T *dA, T *dB, int b_partition_size, int b_rest_num) |
template<typename T > | |
__global__ void | preBck_full_divide (int N, int *ks, int *offsets, T *dA, T *dB, int b_partition_size, int b_rest_num) |
template<typename T > | |
__global__ void | bckElim_full_narrow (int N, int *ks, int *offsets, T *dA, T *dB, int b_partition_size, int b_rest_num) |
template<typename T > | |
__global__ void | bckElim_full (int N, int *ks, int *offsets, T *dA, T *dB, int b_partition_size, int b_rest_num) |
template<typename T > | |
__global__ void | fwdElim_sol (int N, int *ks, int *offsets, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | fwdElim_sol_medium (int N, int *ks, int *offsets, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | fwdElim_sol_narrow (int N, int *ks, int *offsets, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bckElim_sol (int N, int *ks, int *offsets, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bckElim_sol_medium (int N, int *ks, int *offsets, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | bckElim_sol_narrow (int N, int *ks, int *offsets, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__global__ void | preBck_sol_divide (int N, int *ks, int *offsets, T *dA, T *dB, int partition_size, int rest_num) |
template<typename T > | |
__device__ void | fwdElim_offDiag_large_tiled (T *dA, T *dB, int idx, int k, int g_k, int r, int first_row, int last_row, int offset, T *a_elements) |
template<typename T > | |
__device__ void | bckElim_offDiag_large_tiled (T *dA, T *dB, int idx, int k, int g_k, int r, int first_row, int last_row, int offset, T *a_elements) |
template<typename T > | |
__global__ void | fwdElim_spike (int N, int *ks, int g_k, int rightWidth, int *offsets, T *dA, T *dB, int partition_size, int rest_num, int *left_spike_widths, int *right_spike_widths, int *first_rows) |
template<typename T > | |
__global__ void | bckElim_spike (int N, int *ks, int g_k, int rightWidth, int *offsets, T *dA, T *dB, int partition_size, int rest_num, int *left_spike_widths, int *right_spike_widths, int *first_rows) |
template<typename T > | |
__global__ void | fwdElim_rightSpike_per_partition (int N, int k, int pivotIdx, T *dA, T *dB, int first_row, int last_row) |
template<typename T > | |
__global__ void | preBck_rightSpike_divide_per_partition (int N, int k, int pivotIdx, T *dA, T *dB, int first_row, int last_row) |
template<typename T > | |
__global__ void | preBck_offDiag_divide_per_partition (int g_k, int k, int pivotIdx, T *dA, T *dB, int first_row, int last_row) |
template<typename T > | |
__global__ void | preBck_offDiag_divide (int N, int g_k, int *ks, int *offsets, T *dA, T *dB, int partSize, int remainder) |
template<typename T > | |
__global__ void | bckElim_rightSpike_per_partition (int N, int k, int pivotIdx, T *dA, T *dB, int first_row, int last_row) |
template<typename T > | |
__global__ void | fwdElim_leftSpike_per_partition (int N, int k, int bid_delta, int pivotIdx, T *dA, T *dB, int first_row, int last_row) |
template<typename T > | |
__global__ void | preBck_leftSpike_divide_per_partition (int N, int k, int bid_delta, int pivotIdx, T *dA, T *dB, int first_row, int last_row) |
template<typename T > | |
__global__ void | bckElim_leftSpike_per_partition (int N, int k, int bid_delta, int pivotIdx, T *dA, T *dB, int first_row, int last_row) |
spike::device::var contains all CUDA kernels for the variable bandwidth preconditioner.
__global__ void spike::device::var::bandLU | ( | T * | dA, |
int * | ks, | ||
int * | offsets, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::var::bandLU_critical_div_general | ( | T * | dA, |
int | start_row, | ||
int * | ks, | ||
int * | offsets, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::var::bandLU_critical_div_safe_general | ( | T * | dA, |
int | start_row, | ||
int * | ks, | ||
int * | offsets, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::var::bandLU_critical_sub_general | ( | T * | dA, |
int | start_row, | ||
int * | ks, | ||
int * | offsets, | ||
int | partition_size, | ||
int | rest_num, | ||
int | last | ||
) |
__global__ void spike::device::var::bandLU_g32 | ( | T * | dA, |
int * | ks, | ||
int * | offsets, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::var::bandLU_g32_safe | ( | T * | dA, |
int * | ks, | ||
int * | offsets, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::var::bandLU_post_divide_per_partition | ( | T * | dA, |
int | k, | ||
int | offset, | ||
int | partSize | ||
) |
__global__ void spike::device::var::bandLU_post_divide_per_partition_general | ( | T * | dA, |
int | k, | ||
int | offset, | ||
int | partSize | ||
) |
__global__ void spike::device::var::bandLU_safe | ( | T * | dA, |
int * | ks, | ||
int * | offsets, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::var::bckElim_full | ( | int | N, |
int * | ks, | ||
int * | offsets, | ||
T * | dA, | ||
T * | dB, | ||
int | b_partition_size, | ||
int | b_rest_num | ||
) |
__global__ void spike::device::var::bckElim_full_narrow | ( | int | N, |
int * | ks, | ||
int * | offsets, | ||
T * | dA, | ||
T * | dB, | ||
int | b_partition_size, | ||
int | b_rest_num | ||
) |
__global__ void spike::device::var::bckElim_leftSpike_per_partition | ( | int | N, |
int | k, | ||
int | bid_delta, | ||
int | pivotIdx, | ||
T * | dA, | ||
T * | dB, | ||
int | first_row, | ||
int | last_row | ||
) |
__device__ void spike::device::var::bckElim_offDiag_large_tiled | ( | T * | dA, |
T * | dB, | ||
int | idx, | ||
int | k, | ||
int | g_k, | ||
int | r, | ||
int | first_row, | ||
int | last_row, | ||
int | offset, | ||
T * | a_elements | ||
) |
__global__ void spike::device::var::bckElim_rightSpike_per_partition | ( | int | N, |
int | k, | ||
int | pivotIdx, | ||
T * | dA, | ||
T * | dB, | ||
int | first_row, | ||
int | last_row | ||
) |
__global__ void spike::device::var::bckElim_sol | ( | int | N, |
int * | ks, | ||
int * | offsets, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::var::bckElim_sol_medium | ( | int | N, |
int * | ks, | ||
int * | offsets, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::var::bckElim_sol_narrow | ( | int | N, |
int * | ks, | ||
int * | offsets, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::var::bckElim_spike | ( | int | N, |
int * | ks, | ||
int | g_k, | ||
int | rightWidth, | ||
int * | offsets, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num, | ||
int * | left_spike_widths, | ||
int * | right_spike_widths, | ||
int * | first_rows | ||
) |
__global__ void spike::device::var::boostLastPivot | ( | T * | dA, |
int | start_row, | ||
int * | ks, | ||
int * | offsets, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::var::fullLU_div | ( | T * | dA, |
int * | ks, | ||
int * | offsets, | ||
int | cur_row | ||
) |
__global__ void spike::device::var::fullLU_div_general | ( | T * | dA, |
int * | ks, | ||
int * | offsets, | ||
int | cur_row | ||
) |
__global__ void spike::device::var::fullLU_div_safe | ( | T * | dA, |
int * | ks, | ||
int * | offsets, | ||
int | cur_row | ||
) |
__global__ void spike::device::var::fullLU_div_safe_general | ( | T * | dA, |
int * | ks, | ||
int * | offsets, | ||
int | cur_row | ||
) |
__global__ void spike::device::var::fullLU_post_divide | ( | T * | dA, |
int * | ks, | ||
int * | offsets | ||
) |
__global__ void spike::device::var::fullLU_post_divide_general | ( | T * | dA, |
int * | ks, | ||
int * | offsets | ||
) |
__global__ void spike::device::var::fullLU_sub | ( | T * | dA, |
int * | ks, | ||
int * | offsets, | ||
int | cur_row | ||
) |
__global__ void spike::device::var::fullLU_sub_general | ( | T * | dA, |
int * | ks, | ||
int * | offsets, | ||
int | cur_row | ||
) |
__global__ void spike::device::var::fullLU_sub_spec | ( | T * | dA, |
int * | ks, | ||
int * | offsets | ||
) |
__global__ void spike::device::var::fullLU_sub_spec_general | ( | T * | dA, |
int * | ks, | ||
int * | offsets | ||
) |
__global__ void spike::device::var::fwdElim_full | ( | int | N, |
int * | ks, | ||
int * | offsets, | ||
T * | dA, | ||
T * | dB, | ||
int | b_partition_size, | ||
int | b_rest_num | ||
) |
__global__ void spike::device::var::fwdElim_full_narrow | ( | int | N, |
int * | ks, | ||
int * | offsets, | ||
T * | dA, | ||
T * | dB, | ||
int | b_partition_size, | ||
int | b_rest_num | ||
) |
__global__ void spike::device::var::fwdElim_leftSpike_per_partition | ( | int | N, |
int | k, | ||
int | bid_delta, | ||
int | pivotIdx, | ||
T * | dA, | ||
T * | dB, | ||
int | first_row, | ||
int | last_row | ||
) |
__device__ void spike::device::var::fwdElim_offDiag_large_tiled | ( | T * | dA, |
T * | dB, | ||
int | idx, | ||
int | k, | ||
int | g_k, | ||
int | r, | ||
int | first_row, | ||
int | last_row, | ||
int | offset, | ||
T * | a_elements | ||
) |
__global__ void spike::device::var::fwdElim_rightSpike_per_partition | ( | int | N, |
int | k, | ||
int | pivotIdx, | ||
T * | dA, | ||
T * | dB, | ||
int | first_row, | ||
int | last_row | ||
) |
__global__ void spike::device::var::fwdElim_sol | ( | int | N, |
int * | ks, | ||
int * | offsets, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::var::fwdElim_sol_medium | ( | int | N, |
int * | ks, | ||
int * | offsets, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::var::fwdElim_sol_narrow | ( | int | N, |
int * | ks, | ||
int * | offsets, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |
__global__ void spike::device::var::fwdElim_spike | ( | int | N, |
int * | ks, | ||
int | g_k, | ||
int | rightWidth, | ||
int * | offsets, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num, | ||
int * | left_spike_widths, | ||
int * | right_spike_widths, | ||
int * | first_rows | ||
) |
__global__ void spike::device::var::preBck_full_divide | ( | int | N, |
int * | ks, | ||
int * | offsets, | ||
T * | dA, | ||
T * | dB, | ||
int | b_partition_size, | ||
int | b_rest_num | ||
) |
__global__ void spike::device::var::preBck_full_divide_narrow | ( | int | N, |
int * | ks, | ||
int * | offsets, | ||
T * | dA, | ||
T * | dB, | ||
int | b_partition_size, | ||
int | b_rest_num | ||
) |
__global__ void spike::device::var::preBck_leftSpike_divide_per_partition | ( | int | N, |
int | k, | ||
int | bid_delta, | ||
int | pivotIdx, | ||
T * | dA, | ||
T * | dB, | ||
int | first_row, | ||
int | last_row | ||
) |
__global__ void spike::device::var::preBck_offDiag_divide | ( | int | N, |
int | g_k, | ||
int * | ks, | ||
int * | offsets, | ||
T * | dA, | ||
T * | dB, | ||
int | partSize, | ||
int | remainder | ||
) |
__global__ void spike::device::var::preBck_offDiag_divide_per_partition | ( | int | g_k, |
int | k, | ||
int | pivotIdx, | ||
T * | dA, | ||
T * | dB, | ||
int | first_row, | ||
int | last_row | ||
) |
__global__ void spike::device::var::preBck_rightSpike_divide_per_partition | ( | int | N, |
int | k, | ||
int | pivotIdx, | ||
T * | dA, | ||
T * | dB, | ||
int | first_row, | ||
int | last_row | ||
) |
__global__ void spike::device::var::preBck_sol_divide | ( | int | N, |
int * | ks, | ||
int * | offsets, | ||
T * | dA, | ||
T * | dB, | ||
int | partition_size, | ||
int | rest_num | ||
) |