11 #include <cusp/csr_matrix.h>
12 #include <cusp/array1d.h>
13 #include <cusp/blas.h>
14 #include <cusp/print.h>
15 #include <cusp/io/matrix_market.h>
17 #include <thrust/sequence.h>
18 #include <thrust/scan.h>
19 #include <thrust/functional.h>
20 #include <thrust/logical.h>
114 template <
typename Array,
typename PrecValueType>
122 template <
typename Matrix>
123 bool setup(
const Matrix& A);
125 template <
typename Array1>
126 bool update(
const Array1& entries);
128 template <
typename SpmvOperator>
129 bool solve(SpmvOperator& spmv,
137 typedef typename Array::value_type SolverValueType;
138 typedef typename Array::memory_space MemorySpace;
140 typedef typename cusp::array1d<SolverValueType, MemorySpace> SolverVector;
141 typedef typename cusp::array1d<PrecValueType, MemorySpace> PrecVector;
143 typedef typename cusp::array1d<PrecValueType, cusp::host_memory> PrecVectorH;
144 typedef typename cusp::array1d<int, cusp::host_memory>
IntVectorH;
146 typedef typename cusp::coo_matrix<int, PrecValueType, cusp::host_memory> PrecMatrixCooH;
152 std::vector<Precond<PrecVector>*> m_precond_pointers;
153 std::vector<IntVectorH> m_comp_reorderings;
159 bool m_singleComponent;
160 bool m_trackReordering;
174 maxNumIterations(100),
176 performReorder(true),
179 maxBandwidth(std::numeric_limits<int>::max()),
183 safeFactorization(false),
184 variableBandwidth(true),
185 singleComponent(false),
186 trackReordering(false)
199 time_cpu_assemble(0),
214 residualNorm(std::numeric_limits<double>::max()),
215 relResidualNorm(std::numeric_limits<double>::max())
225 template <
typename Array,
typename PrecValueType>
228 : m_monitor(opts.maxNumIterations, opts.tolerance),
229 m_precond(numPartitions, opts.performReorder, opts.performMC64, opts.applyScaling,
230 opts.dropOffFraction, opts.maxBandwidth, opts.factMethod, opts.precondType,
231 opts.safeFactorization, opts.variableBandwidth, opts.trackReordering),
232 m_solver(opts.solverType),
233 m_singleComponent(opts.singleComponent),
234 m_trackReordering(opts.trackReordering),
244 template <
typename Array,
typename PrecValueType>
247 for (
size_t i = 0; i < m_precond_pointers.size(); i++)
248 delete m_precond_pointers[i];
249 m_precond_pointers.clear();
261 template <
typename Array,
typename PrecValueType>
262 template <
typename Matrix>
267 size_t nnz = A.num_entries;
276 size_t numComponents;
278 if (m_singleComponent)
282 for (
size_t i = 0; i < nnz; i++)
288 if (m_trackReordering)
289 m_compMap.resize(nnz);
291 if (numComponents > 1) {
294 std::vector<PrecMatrixCooH> coo_matrices(numComponents);
296 m_comp_reorderings.resize(numComponents);
297 if (m_comp_perms.size() != m_n)
298 m_comp_perms.resize(m_n, -1);
300 cusp::blas::fill(m_comp_perms, -1);
302 for (
size_t i=0; i < numComponents; i++)
303 m_comp_reorderings[i].clear();
305 IntVectorH cur_indices(numComponents, 0);
307 IntVectorH visited(m_n, 0);
309 for (
size_t i = 0; i < nnz; i++) {
310 int from = Acoo.row_indices[i];
311 int to = Acoo.column_indices[i];
314 visited[from] = visited[to] = 1;
316 if (m_comp_perms[from] < 0) {
317 m_comp_perms[from] = cur_indices[compIndex];
318 m_comp_reorderings[compIndex].push_back(from);
319 cur_indices[compIndex] ++;
322 if (m_comp_perms[to] < 0) {
323 m_comp_perms[to] = cur_indices[compIndex];
324 m_comp_reorderings[compIndex].push_back(to);
325 cur_indices[compIndex] ++;
328 PrecMatrixCooH& cur_matrix = coo_matrices[compIndex];
330 cur_matrix.row_indices.push_back(m_comp_perms[from]);
331 cur_matrix.column_indices.push_back(m_comp_perms[to]);
332 cur_matrix.values.push_back(Acoo.values[i]);
334 if (m_trackReordering)
335 m_compMap[i] = compIndex;
338 if (thrust::any_of(visited.begin(), visited.end(), thrust::logical_not<int>() ))
341 for (
size_t i = 0; i < numComponents; i++) {
342 PrecMatrixCooH& cur_matrix = coo_matrices[i];
344 cur_matrix.num_entries = cur_matrix.values.size();
345 cur_matrix.num_rows = cur_matrix.num_cols = cur_indices[i];
347 if (m_precond_pointers.size() <= i)
350 m_precond_pointers[i]->setup(cur_matrix);
353 m_compIndices.resize(m_n, 0);
354 m_comp_reorderings.resize(1);
355 m_comp_reorderings[0].resize(m_n);
356 m_comp_perms.resize(m_n);
358 if (m_trackReordering)
359 cusp::blas::fill(m_compMap, 0);
361 thrust::sequence(m_comp_perms.begin(), m_comp_perms.end());
362 thrust::sequence(m_comp_reorderings[0].begin(), m_comp_reorderings[0].end());
364 if (m_precond_pointers.size() == 0)
367 m_precond_pointers[0]->setup(A);
374 m_stats.bandwidthReorder = m_precond_pointers[0]->getBandwidthReordering();
375 m_stats.bandwidth = m_precond_pointers[0]->getBandwidth();
376 m_stats.bandwidthMC64 = m_precond_pointers[0]->getBandwidthMC64();
377 m_stats.numPartitions = m_precond_pointers[0]->getNumPartitions();
378 m_stats.actualDropOff = m_precond_pointers[0]->getActualDropOff();
379 m_stats.time_reorder = m_precond_pointers[0]->getTimeReorder();
380 m_stats.time_cpu_assemble = m_precond_pointers[0]->getTimeCPUAssemble();
381 m_stats.time_transfer = m_precond_pointers[0]->getTimeTransfer();
382 m_stats.time_toBanded = m_precond_pointers[0]->getTimeToBanded();
383 m_stats.time_offDiags = m_precond_pointers[0]->getTimeCopyOffDiags();
384 m_stats.time_bandLU = m_precond_pointers[0]->getTimeBandLU();
385 m_stats.time_bandUL = m_precond_pointers[0]->getTimeBandUL();
386 m_stats.time_assembly = m_precond_pointers[0]->gettimeAssembly();
387 m_stats.time_fullLU = m_precond_pointers[0]->getTimeFullLU();
389 for (
size_t i=1; i < numComponents; i++) {
390 if (m_stats.bandwidthReorder < m_precond_pointers[i]->getBandwidthReordering())
391 m_stats.bandwidthReorder = m_precond_pointers[i]->getBandwidthReordering();
392 if (m_stats.bandwidth < m_precond_pointers[i]->getBandwidth())
393 m_stats.bandwidth = m_precond_pointers[i]->getBandwidth();
394 if (m_stats.bandwidthMC64 < m_precond_pointers[i]->getBandwidthMC64())
395 m_stats.bandwidthMC64 = m_precond_pointers[i]->getBandwidthMC64();
396 if (m_stats.numPartitions > m_precond_pointers[i]->getNumPartitions())
397 m_stats.numPartitions = m_precond_pointers[i]->getNumPartitions();
398 m_stats.time_reorder += m_precond_pointers[i]->getTimeReorder();
399 m_stats.time_cpu_assemble += m_precond_pointers[i]->getTimeCPUAssemble();
400 m_stats.time_transfer += m_precond_pointers[i]->getTimeTransfer();
401 m_stats.time_toBanded += m_precond_pointers[i]->getTimeToBanded();
402 m_stats.time_offDiags += m_precond_pointers[i]->getTimeCopyOffDiags();
403 m_stats.time_bandLU += m_precond_pointers[i]->getTimeBandLU();
404 m_stats.time_bandUL += m_precond_pointers[i]->getTimeBandUL();
405 m_stats.time_assembly += m_precond_pointers[i]->gettimeAssembly();
406 m_stats.time_fullLU += m_precond_pointers[i]->getTimeFullLU();
429 template <
typename Array,
typename PrecValueType>
430 template <
typename Array1>
438 if (!m_trackReordering)
446 int numComponents = m_precond_pointers.size();
448 if (numComponents <= 1) {
449 PrecVector tmp_entries = entries;
451 m_precond_pointers[0]->update(tmp_entries);
454 PrecVectorH h_entries = entries;
456 int nnz = h_entries.size();
458 std::vector<PrecVectorH> new_entries(numComponents);
460 for (
int i=0; i < nnz; i++)
461 new_entries[m_compMap[i]].push_back(h_entries[i]);
463 for (
int i=0; i < numComponents; i++) {
464 PrecVector tmp_entries = new_entries[i];
466 m_precond_pointers[i]->update(tmp_entries);
474 m_stats.time_reorder = 0;
475 m_stats.time_cpu_assemble = m_precond_pointers[0]->getTimeCPUAssemble();
476 m_stats.time_transfer = m_precond_pointers[0]->getTimeTransfer();
477 m_stats.time_toBanded = m_precond_pointers[0]->getTimeToBanded();
478 m_stats.time_offDiags = m_precond_pointers[0]->getTimeCopyOffDiags();
479 m_stats.time_bandLU = m_precond_pointers[0]->getTimeBandLU();
480 m_stats.time_bandUL = m_precond_pointers[0]->getTimeBandUL();
481 m_stats.time_assembly = m_precond_pointers[0]->gettimeAssembly();
482 m_stats.time_fullLU = m_precond_pointers[0]->getTimeFullLU();
484 for (
int i=1; i < numComponents; i++) {
485 m_stats.time_cpu_assemble += m_precond_pointers[i]->getTimeCPUAssemble();
486 m_stats.time_transfer += m_precond_pointers[i]->getTimeTransfer();
487 m_stats.time_toBanded += m_precond_pointers[i]->getTimeToBanded();
488 m_stats.time_offDiags += m_precond_pointers[i]->getTimeCopyOffDiags();
489 m_stats.time_bandLU += m_precond_pointers[i]->getTimeBandLU();
490 m_stats.time_bandUL += m_precond_pointers[i]->getTimeBandUL();
491 m_stats.time_assembly += m_precond_pointers[i]->gettimeAssembly();
492 m_stats.time_fullLU += m_precond_pointers[i]->getTimeFullLU();
511 template <
typename Array,
typename PrecValueType>
512 template <
typename SpmvOperator>
522 SolverVector b_vector = b;
523 SolverVector x_vector = x;
527 m_monitor.init(b_vector);
533 spike::bicgstab2(spmv, b_vector, x_vector, m_monitor, m_precond_pointers, m_compIndices, m_comp_perms, m_comp_reorderings);
535 thrust::copy(x_vector.begin(), x_vector.end(), x.begin());
539 m_stats.residualNorm = m_monitor.getResidualNorm();
540 m_stats.relResidualNorm = m_monitor.getRelResidualNorm();
541 m_stats.numIterations = m_monitor.getNumIterations();
543 int numComponents = m_precond_pointers.size();
544 m_stats.time_shuffle = m_precond_pointers[0]->getTimeShuffle();
545 for (
int i=1; i < numComponents; i++)
546 m_stats.time_shuffle += m_precond_pointers[i]->getTimeShuffle();
548 return m_monitor.converged();
Input solver options.
Definition: solver.h:43
bool variableBandwidth
Definition: solver.h:60
Solver(int numPartitions, const Options &opts)
Spike solver constructor.
Definition: solver.h:226
double timeSetup
Definition: solver.h:75
KrylovSolverType solverType
Definition: solver.h:47
double relResidualNorm
Definition: solver.h:100
Options()
Definition: solver.h:172
void adjustComponentIndices()
Definition: components.h:45
Definition: exception.h:20
Definition: exception.h:13
bool solve(SpmvOperator &spmv, const Array &b, Array &x)
Linear system solve.
Definition: solver.h:514
KrylovSolverType
Definition: common.h:55
bool performMC64
Definition: solver.h:52
virtual void Start()
Definition: timer.h:108
double time_reorder
Definition: solver.h:79
const Stats & getStats() const
Extract solver statistics.
Definition: solver.h:134
CPU and GPU timer classes.
IntVectorH m_compIndices
Definition: components.h:97
FactorizationMethod
Definition: common.h:60
virtual void Stop()
Definition: timer.h:112
float numIterations
Definition: solver.h:98
PreconditionerType
Definition: common.h:65
bool setup(const Matrix &A)
Preconditioner setup.
Definition: solver.h:264
double timeSolve
Definition: solver.h:77
double time_offDiags
Definition: solver.h:83
Stats()
Definition: solver.h:195
bool trackReordering
Definition: solver.h:62
FactorizationMethod factMethod
Definition: solver.h:57
double residualNorm
Definition: solver.h:99
double time_toBanded
Definition: solver.h:82
double time_bandUL
Definition: solver.h:85
BiCGStab(L) preconditioned iterative Krylov solver.
~Solver()
Spike solver destructor.
Definition: solver.h:245
int maxBandwidth
Definition: solver.h:54
double time_assembly
Definition: solver.h:87
double dropOffFraction
Definition: solver.h:55
Spike preconditioner.
Definition: precond.h:37
bool safeFactorization
Definition: solver.h:59
Definition: exception.h:21
int bandwidthReorder
Definition: solver.h:91
int maxNumIterations
Definition: solver.h:48
int bandwidth
Definition: solver.h:93
int numPartitions
Definition: solver.h:95
cusp::array1d< int, cusp::host_memory > IntVectorH
Definition: bicgstab2.h:20
double time_bandLU
Definition: solver.h:84
Definition of the Spike preconditioner class.
bool applyScaling
Definition: solver.h:53
bool update(const Array1 &entries)
Preconditioner update.
Definition: solver.h:432
double tolerance
Definition: solver.h:49
double actualDropOff
Definition: solver.h:96
Definition: components.h:19
double timeUpdate
Definition: solver.h:76
Output solver statistics.
Definition: solver.h:71
double time_cpu_assemble
Definition: solver.h:80
int m_numComponents
Definition: components.h:99
double time_transfer
Definition: solver.h:81
int bandwidthMC64
Definition: solver.h:92
void combineComponents(int node1, int node2)
Definition: components.h:36
PreconditionerType precondType
Definition: solver.h:58
Main SPIKE::GPU solver.
Definition: solver.h:115
bool performReorder
Definition: solver.h:51
double time_fullLU
Definition: solver.h:86
Definition: exception.h:22
virtual double getElapsed()
Definition: timer.h:116
bool singleComponent
Definition: solver.h:61
void bicgstab2(SpmvOperator &spmv, const SolverVector &b, SolverVector &x, Monitor< SolverVector > &monitor, std::vector< Precond< PrecVector > * > &precond_pointers, IntVectorH &compIndices, IntVectorH &comp_perms, std::vector< IntVectorH > &comp_reorderings)
Specializations of the generic spike::bicgstabl function for L=2.
Definition: bicgstab2.h:260
CPU timer.
Definition: timer.h:101
double time_shuffle
Definition: solver.h:89