|
template<typename T > |
__global__ void | spike::device::var::fwdElim_full_narrow (int N, int *ks, int *offsets, T *dA, T *dB, int b_partition_size, int b_rest_num) |
|
template<typename T > |
__global__ void | spike::device::var::fwdElim_full (int N, int *ks, int *offsets, T *dA, T *dB, int b_partition_size, int b_rest_num) |
|
template<typename T > |
__global__ void | spike::device::var::preBck_full_divide_narrow (int N, int *ks, int *offsets, T *dA, T *dB, int b_partition_size, int b_rest_num) |
|
template<typename T > |
__global__ void | spike::device::var::preBck_full_divide (int N, int *ks, int *offsets, T *dA, T *dB, int b_partition_size, int b_rest_num) |
|
template<typename T > |
__global__ void | spike::device::var::bckElim_full_narrow (int N, int *ks, int *offsets, T *dA, T *dB, int b_partition_size, int b_rest_num) |
|
template<typename T > |
__global__ void | spike::device::var::bckElim_full (int N, int *ks, int *offsets, T *dA, T *dB, int b_partition_size, int b_rest_num) |
|
template<typename T > |
__global__ void | spike::device::var::fwdElim_sol (int N, int *ks, int *offsets, T *dA, T *dB, int partition_size, int rest_num) |
|
template<typename T > |
__global__ void | spike::device::var::fwdElim_sol_medium (int N, int *ks, int *offsets, T *dA, T *dB, int partition_size, int rest_num) |
|
template<typename T > |
__global__ void | spike::device::var::fwdElim_sol_narrow (int N, int *ks, int *offsets, T *dA, T *dB, int partition_size, int rest_num) |
|
template<typename T > |
__global__ void | spike::device::var::bckElim_sol (int N, int *ks, int *offsets, T *dA, T *dB, int partition_size, int rest_num) |
|
template<typename T > |
__global__ void | spike::device::var::bckElim_sol_medium (int N, int *ks, int *offsets, T *dA, T *dB, int partition_size, int rest_num) |
|
template<typename T > |
__global__ void | spike::device::var::bckElim_sol_narrow (int N, int *ks, int *offsets, T *dA, T *dB, int partition_size, int rest_num) |
|
template<typename T > |
__global__ void | spike::device::var::preBck_sol_divide (int N, int *ks, int *offsets, T *dA, T *dB, int partition_size, int rest_num) |
|
template<typename T > |
__device__ void | spike::device::var::fwdElim_offDiag_large_tiled (T *dA, T *dB, int idx, int k, int g_k, int r, int first_row, int last_row, int offset, T *a_elements) |
|
template<typename T > |
__device__ void | spike::device::var::bckElim_offDiag_large_tiled (T *dA, T *dB, int idx, int k, int g_k, int r, int first_row, int last_row, int offset, T *a_elements) |
|
template<typename T > |
__global__ void | spike::device::var::fwdElim_spike (int N, int *ks, int g_k, int rightWidth, int *offsets, T *dA, T *dB, int partition_size, int rest_num, int *left_spike_widths, int *right_spike_widths, int *first_rows) |
|
template<typename T > |
__global__ void | spike::device::var::bckElim_spike (int N, int *ks, int g_k, int rightWidth, int *offsets, T *dA, T *dB, int partition_size, int rest_num, int *left_spike_widths, int *right_spike_widths, int *first_rows) |
|
template<typename T > |
__global__ void | spike::device::var::fwdElim_rightSpike_per_partition (int N, int k, int pivotIdx, T *dA, T *dB, int first_row, int last_row) |
|
template<typename T > |
__global__ void | spike::device::var::preBck_rightSpike_divide_per_partition (int N, int k, int pivotIdx, T *dA, T *dB, int first_row, int last_row) |
|
template<typename T > |
__global__ void | spike::device::var::preBck_offDiag_divide_per_partition (int g_k, int k, int pivotIdx, T *dA, T *dB, int first_row, int last_row) |
|
template<typename T > |
__global__ void | spike::device::var::preBck_offDiag_divide (int N, int g_k, int *ks, int *offsets, T *dA, T *dB, int partSize, int remainder) |
|
template<typename T > |
__global__ void | spike::device::var::bckElim_rightSpike_per_partition (int N, int k, int pivotIdx, T *dA, T *dB, int first_row, int last_row) |
|
template<typename T > |
__global__ void | spike::device::var::fwdElim_leftSpike_per_partition (int N, int k, int bid_delta, int pivotIdx, T *dA, T *dB, int first_row, int last_row) |
|
template<typename T > |
__global__ void | spike::device::var::preBck_leftSpike_divide_per_partition (int N, int k, int bid_delta, int pivotIdx, T *dA, T *dB, int first_row, int last_row) |
|
template<typename T > |
__global__ void | spike::device::var::bckElim_leftSpike_per_partition (int N, int k, int bid_delta, int pivotIdx, T *dA, T *dB, int first_row, int last_row) |
|
Various forward/backward sweep CUDA kernels used for the case of partitions with equal bandwidths.