SpikeGPU  1.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
solver.h
Go to the documentation of this file.
1 
5 #ifndef SPIKE_SOLVER_H
6 #define SPIKE_SOLVER_H
7 
8 #include <limits>
9 #include <vector>
10 
11 #include <cusp/csr_matrix.h>
12 #include <cusp/array1d.h>
13 #include <cusp/blas.h>
14 #include <cusp/print.h>
15 #include <cusp/io/matrix_market.h>
16 
17 #include <thrust/sequence.h>
18 #include <thrust/scan.h>
19 #include <thrust/functional.h>
20 #include <thrust/logical.h>
21 
22 
23 #include <spike/common.h>
24 #include <spike/components.h>
25 #include <spike/monitor.h>
26 #include <spike/precond.h>
27 #include <spike/bicgstab2.h>
28 #include <spike/timer.h>
29 
30 
36 namespace spike {
37 
39 
43 struct Options
44 {
45  Options();
46 
49  double tolerance;
52  bool performMC64;
53  bool applyScaling;
55  double dropOffFraction;
63 };
64 
65 
67 
71 struct Stats
72 {
73  Stats();
74 
75  double timeSetup;
76  double timeUpdate;
77  double timeSolve;
79  double time_reorder;
81  double time_transfer;
82  double time_toBanded; /*TODO: combine this with time_cpu_assemble*/
83  double time_offDiags;
84  double time_bandLU;
85  double time_bandUL;
86  double time_fullLU;
87  double time_assembly;
89  double time_shuffle;
93  int bandwidth;
96  double actualDropOff;
98  float numIterations;
99  double residualNorm;
101 };
102 
103 
105 
114 template <typename Array, typename PrecValueType>
115 class Solver
116 {
117 public:
118  Solver(int numPartitions,
119  const Options& opts);
120  ~Solver();
121 
122  template <typename Matrix>
123  bool setup(const Matrix& A);
124 
125  template <typename Array1>
126  bool update(const Array1& entries);
127 
128  template <typename SpmvOperator>
129  bool solve(SpmvOperator& spmv,
130  const Array& b,
131  Array& x);
132 
134  const Stats& getStats() const {return m_stats;}
135 
136 private:
137  typedef typename Array::value_type SolverValueType;
138  typedef typename Array::memory_space MemorySpace;
139 
140  typedef typename cusp::array1d<SolverValueType, MemorySpace> SolverVector;
141  typedef typename cusp::array1d<PrecValueType, MemorySpace> PrecVector;
142 
143  typedef typename cusp::array1d<PrecValueType, cusp::host_memory> PrecVectorH;
144  typedef typename cusp::array1d<int, cusp::host_memory> IntVectorH;
145 
146  typedef typename cusp::coo_matrix<int, PrecValueType, cusp::host_memory> PrecMatrixCooH;
147 
148 
149  KrylovSolverType m_solver;
150  Monitor<SolverVector> m_monitor;
151  Precond<PrecVector> m_precond;
152  std::vector<Precond<PrecVector>*> m_precond_pointers;
153  std::vector<IntVectorH> m_comp_reorderings;
154  IntVectorH m_comp_perms;
155  IntVectorH m_compIndices;
156  IntVectorH m_compMap;
157 
158  int m_n;
159  bool m_singleComponent;
160  bool m_trackReordering;
161  bool m_setupDone;
162 
163  Stats m_stats;
164 };
165 
166 
171 inline
173 : solverType(BiCGStab2),
174  maxNumIterations(100),
175  tolerance(1e-6),
176  performReorder(true),
177  performMC64(true),
178  applyScaling(true),
179  maxBandwidth(std::numeric_limits<int>::max()),
180  dropOffFraction(0),
181  factMethod(LU_only),
182  precondType(Spike),
183  safeFactorization(false),
184  variableBandwidth(true),
185  singleComponent(false),
186  trackReordering(false)
187 {
188 }
189 
194 inline
196 : timeSetup(0),
197  timeSolve(0),
198  time_reorder(0),
199  time_cpu_assemble(0),
200  time_transfer(0),
201  time_toBanded(0),
202  time_offDiags(0),
203  time_bandLU(0),
204  time_bandUL(0),
205  time_assembly(0),
206  time_fullLU(0),
207  time_shuffle(0),
208  bandwidthReorder(0),
209  bandwidthMC64(0),
210  bandwidth(0),
211  numPartitions(0),
212  actualDropOff(0),
213  numIterations(0),
214  residualNorm(std::numeric_limits<double>::max()),
215  relResidualNorm(std::numeric_limits<double>::max())
216 {
217 }
218 
219 
221 
225 template <typename Array, typename PrecValueType>
227  const Options& opts)
228 : m_monitor(opts.maxNumIterations, opts.tolerance),
229  m_precond(numPartitions, opts.performReorder, opts.performMC64, opts.applyScaling,
230  opts.dropOffFraction, opts.maxBandwidth, opts.factMethod, opts.precondType,
231  opts.safeFactorization, opts.variableBandwidth, opts.trackReordering),
232  m_solver(opts.solverType),
233  m_singleComponent(opts.singleComponent),
234  m_trackReordering(opts.trackReordering),
235  m_setupDone(false)
236 {
237 }
238 
239 
241 
244 template <typename Array, typename PrecValueType>
246 {
247  for (size_t i = 0; i < m_precond_pointers.size(); i++)
248  delete m_precond_pointers[i];
249  m_precond_pointers.clear();
250 }
251 
252 
254 
261 template <typename Array, typename PrecValueType>
262 template <typename Matrix>
263 bool
265 {
266  m_n = A.num_rows;
267  size_t nnz = A.num_entries;
268 
269  CPUTimer timer;
270 
271  timer.Start();
272 
273  PrecMatrixCooH Acoo;
274 
275  Components sc(m_n);
276  size_t numComponents;
277 
278  if (m_singleComponent)
279  numComponents = 1;
280  else {
281  Acoo = A;
282  for (size_t i = 0; i < nnz; i++)
283  sc.combineComponents(Acoo.row_indices[i], Acoo.column_indices[i]);
285  numComponents = sc.m_numComponents;
286  }
287 
288  if (m_trackReordering)
289  m_compMap.resize(nnz);
290 
291  if (numComponents > 1) {
292  m_compIndices = sc.m_compIndices;
293 
294  std::vector<PrecMatrixCooH> coo_matrices(numComponents);
295 
296  m_comp_reorderings.resize(numComponents);
297  if (m_comp_perms.size() != m_n)
298  m_comp_perms.resize(m_n, -1);
299  else
300  cusp::blas::fill(m_comp_perms, -1);
301 
302  for (size_t i=0; i < numComponents; i++)
303  m_comp_reorderings[i].clear();
304 
305  IntVectorH cur_indices(numComponents, 0);
306 
307  IntVectorH visited(m_n, 0);
308 
309  for (size_t i = 0; i < nnz; i++) {
310  int from = Acoo.row_indices[i];
311  int to = Acoo.column_indices[i];
312  int compIndex = sc.m_compIndices[from];
313 
314  visited[from] = visited[to] = 1;
315 
316  if (m_comp_perms[from] < 0) {
317  m_comp_perms[from] = cur_indices[compIndex];
318  m_comp_reorderings[compIndex].push_back(from);
319  cur_indices[compIndex] ++;
320  }
321 
322  if (m_comp_perms[to] < 0) {
323  m_comp_perms[to] = cur_indices[compIndex];
324  m_comp_reorderings[compIndex].push_back(to);
325  cur_indices[compIndex] ++;
326  }
327 
328  PrecMatrixCooH& cur_matrix = coo_matrices[compIndex];
329 
330  cur_matrix.row_indices.push_back(m_comp_perms[from]);
331  cur_matrix.column_indices.push_back(m_comp_perms[to]);
332  cur_matrix.values.push_back(Acoo.values[i]);
333 
334  if (m_trackReordering)
335  m_compMap[i] = compIndex;
336  }
337 
338  if (thrust::any_of(visited.begin(), visited.end(), thrust::logical_not<int>() ))
339  throw system_error(system_error::Matrix_singular, "Singular matrix found");
340 
341  for (size_t i = 0; i < numComponents; i++) {
342  PrecMatrixCooH& cur_matrix = coo_matrices[i];
343 
344  cur_matrix.num_entries = cur_matrix.values.size();
345  cur_matrix.num_rows = cur_matrix.num_cols = cur_indices[i];
346 
347  if (m_precond_pointers.size() <= i)
348  m_precond_pointers.push_back(new Precond<PrecVector>(m_precond));
349 
350  m_precond_pointers[i]->setup(cur_matrix);
351  }
352  } else {
353  m_compIndices.resize(m_n, 0);
354  m_comp_reorderings.resize(1);
355  m_comp_reorderings[0].resize(m_n);
356  m_comp_perms.resize(m_n);
357 
358  if (m_trackReordering)
359  cusp::blas::fill(m_compMap, 0);
360 
361  thrust::sequence(m_comp_perms.begin(), m_comp_perms.end());
362  thrust::sequence(m_comp_reorderings[0].begin(), m_comp_reorderings[0].end());
363 
364  if (m_precond_pointers.size() == 0)
365  m_precond_pointers.push_back(new Precond<PrecVector>(m_precond));
366 
367  m_precond_pointers[0]->setup(A);
368  }
369 
370  timer.Stop();
371 
372  m_stats.timeSetup = timer.getElapsed();
373 
374  m_stats.bandwidthReorder = m_precond_pointers[0]->getBandwidthReordering();
375  m_stats.bandwidth = m_precond_pointers[0]->getBandwidth();
376  m_stats.bandwidthMC64 = m_precond_pointers[0]->getBandwidthMC64();
377  m_stats.numPartitions = m_precond_pointers[0]->getNumPartitions();
378  m_stats.actualDropOff = m_precond_pointers[0]->getActualDropOff();
379  m_stats.time_reorder = m_precond_pointers[0]->getTimeReorder();
380  m_stats.time_cpu_assemble = m_precond_pointers[0]->getTimeCPUAssemble();
381  m_stats.time_transfer = m_precond_pointers[0]->getTimeTransfer();
382  m_stats.time_toBanded = m_precond_pointers[0]->getTimeToBanded();
383  m_stats.time_offDiags = m_precond_pointers[0]->getTimeCopyOffDiags();
384  m_stats.time_bandLU = m_precond_pointers[0]->getTimeBandLU();
385  m_stats.time_bandUL = m_precond_pointers[0]->getTimeBandUL();
386  m_stats.time_assembly = m_precond_pointers[0]->gettimeAssembly();
387  m_stats.time_fullLU = m_precond_pointers[0]->getTimeFullLU();
388 
389  for (size_t i=1; i < numComponents; i++) {
390  if (m_stats.bandwidthReorder < m_precond_pointers[i]->getBandwidthReordering())
391  m_stats.bandwidthReorder = m_precond_pointers[i]->getBandwidthReordering();
392  if (m_stats.bandwidth < m_precond_pointers[i]->getBandwidth())
393  m_stats.bandwidth = m_precond_pointers[i]->getBandwidth();
394  if (m_stats.bandwidthMC64 < m_precond_pointers[i]->getBandwidthMC64())
395  m_stats.bandwidthMC64 = m_precond_pointers[i]->getBandwidthMC64();
396  if (m_stats.numPartitions > m_precond_pointers[i]->getNumPartitions())
397  m_stats.numPartitions = m_precond_pointers[i]->getNumPartitions();
398  m_stats.time_reorder += m_precond_pointers[i]->getTimeReorder();
399  m_stats.time_cpu_assemble += m_precond_pointers[i]->getTimeCPUAssemble();
400  m_stats.time_transfer += m_precond_pointers[i]->getTimeTransfer();
401  m_stats.time_toBanded += m_precond_pointers[i]->getTimeToBanded();
402  m_stats.time_offDiags += m_precond_pointers[i]->getTimeCopyOffDiags();
403  m_stats.time_bandLU += m_precond_pointers[i]->getTimeBandLU();
404  m_stats.time_bandUL += m_precond_pointers[i]->getTimeBandUL();
405  m_stats.time_assembly += m_precond_pointers[i]->gettimeAssembly();
406  m_stats.time_fullLU += m_precond_pointers[i]->getTimeFullLU();
407  }
408 
409  m_setupDone = true;
410 
411  return true;
412 }
413 
414 
416 
429 template <typename Array, typename PrecValueType>
430 template <typename Array1>
431 bool
433 {
434  // Check if this call to update() is legal.
435  if (!m_setupDone)
436  throw system_error(system_error::Illegal_update, "Illegal call to update() before setup().");
437 
438  if (!m_trackReordering)
439  throw system_error(system_error::Illegal_update, "Illegal call to update() with reordering tracking disabled.");
440 
441 
442  // Update the preconditioner.
443  CPUTimer timer;
444  timer.Start();
445 
446  int numComponents = m_precond_pointers.size();
447 
448  if (numComponents <= 1) {
449  PrecVector tmp_entries = entries;
450 
451  m_precond_pointers[0]->update(tmp_entries);
452  }
453  else {
454  PrecVectorH h_entries = entries;
455 
456  int nnz = h_entries.size();
457 
458  std::vector<PrecVectorH> new_entries(numComponents);
459 
460  for (int i=0; i < nnz; i++)
461  new_entries[m_compMap[i]].push_back(h_entries[i]);
462 
463  for (int i=0; i < numComponents; i++) {
464  PrecVector tmp_entries = new_entries[i];
465 
466  m_precond_pointers[i]->update(tmp_entries);
467  }
468  }
469 
470  timer.Stop();
471 
472  m_stats.timeUpdate = timer.getElapsed();
473 
474  m_stats.time_reorder = 0;
475  m_stats.time_cpu_assemble = m_precond_pointers[0]->getTimeCPUAssemble();
476  m_stats.time_transfer = m_precond_pointers[0]->getTimeTransfer();
477  m_stats.time_toBanded = m_precond_pointers[0]->getTimeToBanded();
478  m_stats.time_offDiags = m_precond_pointers[0]->getTimeCopyOffDiags();
479  m_stats.time_bandLU = m_precond_pointers[0]->getTimeBandLU();
480  m_stats.time_bandUL = m_precond_pointers[0]->getTimeBandUL();
481  m_stats.time_assembly = m_precond_pointers[0]->gettimeAssembly();
482  m_stats.time_fullLU = m_precond_pointers[0]->getTimeFullLU();
483 
484  for (int i=1; i < numComponents; i++) {
485  m_stats.time_cpu_assemble += m_precond_pointers[i]->getTimeCPUAssemble();
486  m_stats.time_transfer += m_precond_pointers[i]->getTimeTransfer();
487  m_stats.time_toBanded += m_precond_pointers[i]->getTimeToBanded();
488  m_stats.time_offDiags += m_precond_pointers[i]->getTimeCopyOffDiags();
489  m_stats.time_bandLU += m_precond_pointers[i]->getTimeBandLU();
490  m_stats.time_bandUL += m_precond_pointers[i]->getTimeBandUL();
491  m_stats.time_assembly += m_precond_pointers[i]->gettimeAssembly();
492  m_stats.time_fullLU += m_precond_pointers[i]->getTimeFullLU();
493  }
494 
495  return true;
496 }
497 
498 
500 
511 template <typename Array, typename PrecValueType>
512 template <typename SpmvOperator>
513 bool
515  const Array& b,
516  Array& x)
517 {
518  // Check if this call to solve() is legal.
519  if (!m_setupDone)
520  throw system_error(system_error::Illegal_solve, "Illegal call to solve() before setup().");
521 
522  SolverVector b_vector = b;
523  SolverVector x_vector = x;
524 
525 
526  // Solve the linear system.
527  m_monitor.init(b_vector);
528 
529  CPUTimer timer;
530 
531  timer.Start();
532 
533  spike::bicgstab2(spmv, b_vector, x_vector, m_monitor, m_precond_pointers, m_compIndices, m_comp_perms, m_comp_reorderings);
534 
535  thrust::copy(x_vector.begin(), x_vector.end(), x.begin());
536  timer.Stop();
537 
538  m_stats.timeSolve = timer.getElapsed();
539  m_stats.residualNorm = m_monitor.getResidualNorm();
540  m_stats.relResidualNorm = m_monitor.getRelResidualNorm();
541  m_stats.numIterations = m_monitor.getNumIterations();
542 
543  int numComponents = m_precond_pointers.size();
544  m_stats.time_shuffle = m_precond_pointers[0]->getTimeShuffle();
545  for (int i=1; i < numComponents; i++)
546  m_stats.time_shuffle += m_precond_pointers[i]->getTimeShuffle();
547 
548  return m_monitor.converged();
549 }
550 
551 
552 } // namespace spike
553 
554 
555 #endif
Input solver options.
Definition: solver.h:43
bool variableBandwidth
Definition: solver.h:60
Solver(int numPartitions, const Options &opts)
Spike solver constructor.
Definition: solver.h:226
double timeSetup
Definition: solver.h:75
KrylovSolverType solverType
Definition: solver.h:47
double relResidualNorm
Definition: solver.h:100
Options()
Definition: solver.h:172
void adjustComponentIndices()
Definition: components.h:45
Definition: exception.h:20
Definition: exception.h:13
bool solve(SpmvOperator &spmv, const Array &b, Array &x)
Linear system solve.
Definition: solver.h:514
Definition: common.h:62
KrylovSolverType
Definition: common.h:55
bool performMC64
Definition: solver.h:52
virtual void Start()
Definition: timer.h:108
double time_reorder
Definition: solver.h:79
const Stats & getStats() const
Extract solver statistics.
Definition: solver.h:134
CPU and GPU timer classes.
IntVectorH m_compIndices
Definition: components.h:97
FactorizationMethod
Definition: common.h:60
virtual void Stop()
Definition: timer.h:112
float numIterations
Definition: solver.h:98
PreconditionerType
Definition: common.h:65
bool setup(const Matrix &A)
Preconditioner setup.
Definition: solver.h:264
double timeSolve
Definition: solver.h:77
double time_offDiags
Definition: solver.h:83
Stats()
Definition: solver.h:195
bool trackReordering
Definition: solver.h:62
FactorizationMethod factMethod
Definition: solver.h:57
double residualNorm
Definition: solver.h:99
double time_toBanded
Definition: solver.h:82
double time_bandUL
Definition: solver.h:85
BiCGStab(L) preconditioned iterative Krylov solver.
~Solver()
Spike solver destructor.
Definition: solver.h:245
int maxBandwidth
Definition: solver.h:54
double time_assembly
Definition: solver.h:87
double dropOffFraction
Definition: solver.h:55
Spike preconditioner.
Definition: precond.h:37
bool safeFactorization
Definition: solver.h:59
Definition: exception.h:21
int bandwidthReorder
Definition: solver.h:91
int maxNumIterations
Definition: solver.h:48
int bandwidth
Definition: solver.h:93
int numPartitions
Definition: solver.h:95
cusp::array1d< int, cusp::host_memory > IntVectorH
Definition: bicgstab2.h:20
double time_bandLU
Definition: solver.h:84
Definition: monitor.h:19
Definition: common.h:57
Definition of the Spike preconditioner class.
bool applyScaling
Definition: solver.h:53
bool update(const Array1 &entries)
Preconditioner update.
Definition: solver.h:432
Definition: common.h:66
double tolerance
Definition: solver.h:49
double actualDropOff
Definition: solver.h:96
Definition: components.h:19
double timeUpdate
Definition: solver.h:76
Output solver statistics.
Definition: solver.h:71
double time_cpu_assemble
Definition: solver.h:80
int m_numComponents
Definition: components.h:99
double time_transfer
Definition: solver.h:81
int bandwidthMC64
Definition: solver.h:92
void combineComponents(int node1, int node2)
Definition: components.h:36
PreconditionerType precondType
Definition: solver.h:58
Main SPIKE::GPU solver.
Definition: solver.h:115
bool performReorder
Definition: solver.h:51
double time_fullLU
Definition: solver.h:86
Definition: exception.h:22
virtual double getElapsed()
Definition: timer.h:116
bool singleComponent
Definition: solver.h:61
void bicgstab2(SpmvOperator &spmv, const SolverVector &b, SolverVector &x, Monitor< SolverVector > &monitor, std::vector< Precond< PrecVector > * > &precond_pointers, IntVectorH &compIndices, IntVectorH &comp_perms, std::vector< IntVectorH > &comp_reorderings)
Specializations of the generic spike::bicgstabl function for L=2.
Definition: bicgstab2.h:260
CPU timer.
Definition: timer.h:101
double time_shuffle
Definition: solver.h:89