preconditioner_array.cc
Go to the documentation of this file.
1 //LIC// ====================================================================
2 //LIC// This file forms part of oomph-lib, the object-oriented,
3 //LIC// multi-physics finite-element library, available
4 //LIC// at http://www.oomph-lib.org.
5 //LIC//
6 //LIC// Version 1.0; svn revision $LastChangedRevision$
7 //LIC//
8 //LIC// $LastChangedDate$
9 //LIC//
10 //LIC// Copyright (C) 2006-2016 Matthias Heil and Andrew Hazel
11 //LIC//
12 //LIC// This library is free software; you can redistribute it and/or
13 //LIC// modify it under the terms of the GNU Lesser General Public
14 //LIC// License as published by the Free Software Foundation; either
15 //LIC// version 2.1 of the License, or (at your option) any later version.
16 //LIC//
17 //LIC// This library is distributed in the hope that it will be useful,
18 //LIC// but WITHOUT ANY WARRANTY; without even the implied warranty of
19 //LIC// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 //LIC// Lesser General Public License for more details.
21 //LIC//
22 //LIC// You should have received a copy of the GNU Lesser General Public
23 //LIC// License along with this library; if not, write to the Free Software
24 //LIC// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25 //LIC// 02110-1301 USA.
26 //LIC//
27 //LIC// The authors may be contacted at oomph-lib@maths.man.ac.uk.
28 //LIC//
29 //LIC//====================================================================
30 
31 // Config header generated by autoconfig
32 #ifdef HAVE_CONFIG_H
33 #include <oomph-lib-config.h>
34 #endif
35 
36 // Preconditioner array is only useful if we have mpi, otherwise a dummy
37 // implmentation is used and this file doesn't need to implement anything
38 // (see the header file).
39 #ifdef OOMPH_HAS_MPI
40 
41 //oomph-lib includes
42 #include "preconditioner_array.h"
43 
44 namespace oomph
45 {
46 
47 
48  //============================================================================
49  /// Setup the preconditioners. Sets up each preconditioner in the
50  /// array for the corresponding matrix in the vector matrix_pt.
51  /// The number of preconditioners in the array is taken to be the length of
52  /// prec_pt.
53  //============================================================================
55  (Vector<CRDoubleMatrix*> matrix_pt,
56  Vector<Preconditioner*> prec_pt,
57  const OomphCommunicator* comm_pt)
58  {
59  // clean memory
60  this->clean_up_memory();
61 
62  // get the number of preconditioners in the array
63  Nprec = prec_pt.size();
64 
65 #ifdef PARANOID
66  // check that the preconditioners have been set
67  if (Nprec < 2)
68  {
69  std::ostringstream error_message;
70  error_message << "The PreconditionerArray requires at least 2 "
71  << "preconditioners";
72  throw OomphLibError(error_message.str(),
73  OOMPH_CURRENT_FUNCTION,
74  OOMPH_EXCEPTION_LOCATION);
75  }
76  // first check that the vector matrix_pt is the correct length
77  if (matrix_pt.size() != Nprec)
78  {
79  std::ostringstream error_message;
80  error_message << "The same number of preconditioners and matrices must "
81  << "be passed to the setup_preconditioners(...).";
82  throw OomphLibError(error_message.str(),
83  OOMPH_CURRENT_FUNCTION,
84  OOMPH_EXCEPTION_LOCATION);
85  }
86 
87  // Resize the storage of the PARANOID check distributions
88  // Already cleared by clean_up_memory call at top of function
89  Distribution_pt.resize(Nprec);
90 #endif
91 
92  // for each matrix... PARANOID and store copy of global communicator
93  for (unsigned i = 0; i < Nprec; i++)
94  {
95 
96 #ifdef PARANOID
97  // paranoid check that each matrix is a CRDoubleMatrix and that
98  // it is built
99  if (matrix_pt[i] == 0)
100  {
101  std::ostringstream error_message;
102  error_message << "matrix_pt[" << i << "] = NULL.";
103  throw OomphLibError(error_message.str(),
104  OOMPH_CURRENT_FUNCTION,
105  OOMPH_EXCEPTION_LOCATION);
106  }
107 
108  // check the matrix has been built
109  if (!matrix_pt[i]->built())
110  {
111  std::ostringstream error_message;
112  error_message << "Matrix " << i << " has not been built.";
113  throw OomphLibError(error_message.str(),
114  OOMPH_CURRENT_FUNCTION,
115  OOMPH_EXCEPTION_LOCATION);
116 
117  }
118 #endif
119 
120  // check that all the matrices have the same communicator
121  // and store a copy of the communicator
122  if (i == 0)
123  {
125  new OomphCommunicator
126  (matrix_pt[i]->distribution_pt()->communicator_pt());
127  }
128 
129 #ifdef PARANOID
130  else
131  {
132  if (*Global_communicator_pt !=
133  *matrix_pt[i]->distribution_pt()->communicator_pt())
134  {
135  std::ostringstream error_message;
136  error_message << "All matrices must have the same communicator.";
137  throw OomphLibError(error_message.str(),
138  OOMPH_CURRENT_FUNCTION,
139  OOMPH_EXCEPTION_LOCATION);
140  }
141  }
142 
143  // store a copy of the Distribution of each preconditioner for future
144  // PARANOID checks
145  Distribution_pt[i] = new LinearAlgebraDistribution
146  (matrix_pt[i]->distribution_pt());
147 #endif
148  }
149 
150  // number of processors
151  unsigned nproc = Global_communicator_pt->nproc();
152 
153  // next compute the distribution of the preconditioner over the processors
154  // such that each preconditioner has an (as to near to) equal number of
155  // processors
156  First_proc_for_prec.resize(Nprec);
157  Nproc_for_prec.resize(Nprec);
158 
159  // compute first row
160  for (unsigned p=0;p<Nprec;p++)
161  {
162  First_proc_for_prec[p] = unsigned(double(p*nproc)/
163  double(Nprec));
164  }
165 
166  // compute local nrow
167  for (unsigned p=0; p<Nprec-1; p++)
168  {
170  }
171  Nproc_for_prec[Nprec-1] = nproc - First_proc_for_prec[Nprec-1];
172 
173  #ifdef PARANOID
174  // paranoid check that every preconditioner has more than one processor
175  for (unsigned p=0;p<Nprec;p++)
176  {
177  if (Nproc_for_prec[p] == 0)
178  {
179  std::ostringstream error_message;
180  error_message << "We only have " << nproc << " processor[s]!\n"
181  << "This is not enough to perform the " << Nprec
182  << " block solves in parallel! Sorry! \n"
183  << "Please run this with more processors or disable the\n"
184  << "request for two-level paralellism.\n";
185  throw OomphLibError(error_message.str(),
186  OOMPH_CURRENT_FUNCTION,
187  OOMPH_EXCEPTION_LOCATION);
188  }
189  }
190 #endif
191 
192  // compute the color of this processor
193  Color = 0;
194  unsigned my_rank = Global_communicator_pt->my_rank();
195  while (!(First_proc_for_prec[Color] <= my_rank &&
196  First_proc_for_prec[Color] + Nproc_for_prec[Color] > my_rank))
197  {
198  Color++;
199  }
200 
201  // create the local preconditioner
202  Local_communicator_pt = Global_communicator_pt->split(Color,my_rank);
203 
204  // pointer for the local matrix on this processor
205  CRDoubleMatrix* local_matrix_pt = 0;
206 
207  // resize storage for details of the data to be sent and received
208  First_row_for_proc.resize(Nprec);
209  Nrow_local_for_proc.resize(Nprec);
210  First_row_from_proc.resize(Nprec);
211  Nrow_local_from_proc.resize(Nprec);
212 
213  // Vector of MPI_Requests - used for distributed matrices
214  Vector<MPI_Request> req;
215 
216  // Counter for the number of requests used
217  unsigned c = 0;
218 
219  // storage for the target distribution
220  Vector< Vector<unsigned> > target_first_row(Nprec);
221  Vector< Vector<unsigned> > target_nrow_local(Nprec);
222 
223  // create storage for the nnz to be sent and received for each
224  // preconditioner
225  Vector< Vector<unsigned> > nnz_send(Nprec);
226  Vector< Vector<unsigned> > nnz_recv(Nprec);
227 
228 
229  /////////////////////////////////////////////////////////////////////////////
230  /////////////////////////////////////////////////////////////////////////////
231  /////////////////////////////////////////////////////////////////////////////
232  /////////////////////////////////////////////////////////////////////////////
233 
234 
235 
236  // METHOD 0
237  if (Method == 0)
238  {
239 
240  // for every matrix we assemble the duplicate of the matrix on fewer
241  // processors and setup the preconditioner
242  for (unsigned i = 0; i < Nprec; i++)
243  {
244 
245  // if the matrix is global (!distributed) then just construct a copy
246  // on the subset of processors
247  if (matrix_pt[i]->distributed())
248  {
249 
250  // first compute the distribution of this preconditioner on its subset
251  // of processors
252 
253  // number of rows for this preconditioner
254  unsigned nrow = matrix_pt[i]->nrow();
255 
256  // setup First_row_for_local_prec and Nrow_local_for_local_prec
257  target_first_row[i].resize(nproc);
258  target_nrow_local[i].resize(nproc);
259  unsigned nproc_local = Nproc_for_prec[i];
260  for (unsigned p = 0; p < nproc_local; p++)
261  {
262  int pp = First_proc_for_prec[i] + p;
263  target_first_row[i][pp] = unsigned(double(p*nrow)/
264  double(nproc_local));
265  }
266  for (unsigned p = 0; p < nproc_local-1; p++)
267  {
268  int pp = First_proc_for_prec[i] + p;
269  target_nrow_local[i][pp] = target_first_row[i][pp+1]
270  - target_first_row[i][pp];
271  }
272  unsigned last_local_proc = First_proc_for_prec[i] + nproc_local - 1;
273  target_nrow_local[i][last_local_proc] = nrow -
274  target_first_row[i][last_local_proc];
275 
276  // get the details of the current distribution
277  Vector<unsigned> current_first_row(nproc);
278  Vector<unsigned> current_nrow_local(nproc);
279  for (unsigned p = 0; p < nproc; p++)
280  {
281  current_first_row[p] = matrix_pt[i]->first_row(p);
282  current_nrow_local[p] = matrix_pt[i]->nrow_local(p);
283  }
284 
285  // resize storage for details of the data to be sent and received
286  First_row_for_proc[i].resize(nproc,0);
287  Nrow_local_for_proc[i].resize(nproc,0);
288  First_row_from_proc[i].resize(nproc,0);
289  Nrow_local_from_proc[i].resize(nproc,0);
290 
291  // for every processor compute first_row and nrow_local that will
292  // will sent and received by this processor
293  for (unsigned p = 0; p < nproc; p++)
294  {
295  // start with data to be sent
296  if ((target_first_row[i][p] < (current_first_row[my_rank] +
297  current_nrow_local[my_rank])) &&
298  (current_first_row[my_rank] < (target_first_row[i][p] +
299  target_nrow_local[i][p])))
300  {
301  First_row_for_proc[i][p] =
302  std::max(current_first_row[my_rank],
303  target_first_row[i][p]);
304  Nrow_local_for_proc[i][p] =
305  std::min((current_first_row[my_rank] +
306  current_nrow_local[my_rank]),
307  (target_first_row[i][p] +
308  target_nrow_local[i][p])) - First_row_for_proc[i][p];
309  }
310 
311  // and data to be received
312  if ((target_first_row[i][my_rank] < (current_first_row[p] +
313  current_nrow_local[p]))
314  && (current_first_row[p] < (target_first_row[i][my_rank] +
315  target_nrow_local[i][my_rank])))
316  {
317  First_row_from_proc[i][p] =
318  std::max(current_first_row[p],
319  target_first_row[i][my_rank]);
320  Nrow_local_from_proc[i][p] =
321  std::min((current_first_row[p] +
322  current_nrow_local[p]),
323  (target_first_row[i][my_rank] +
324  target_nrow_local[i][my_rank]))-
326  }
327  }
328 
329  // resize nnz_send
330  nnz_send[i].resize(nproc);
331 
332  // compute the number of nnzs to be sent
333  // and the number of send and receive requests to be made (nreq)
334  for (unsigned p = 0; p < nproc; p++)
335  {
336  if (Nrow_local_for_proc[i][p] != 0)
337  {
338  int* row_start = matrix_pt[i]->row_start();
339  unsigned k = First_row_for_proc[i][p]-current_first_row[my_rank];
340  nnz_send[i][p] = row_start[k + Nrow_local_for_proc[i][p]] -
341  row_start[k];
342  }
343  }
344 
345  // send nnz to be sent to each processor
346  for (unsigned p = 0; p < nproc; p++)
347  {
348 
349  // dont mpi send to send
350  if (p != my_rank)
351  {
352 
353  // non block send
354  if (Nrow_local_for_proc[i][p] != 0)
355  {
356 
357  // send to other processors
358  int tag = this->compute_tag(nproc,my_rank,p,0);
359  MPI_Request tr;
360  req.push_back(tr);
361  MPI_Isend(&nnz_send[i][p],1,MPI_UNSIGNED,p,tag,
362  Global_communicator_pt->mpi_comm(),&req[c]);
363  c++;
364  }
365  }
366  }
367 
368  // resize nnz_recv
369  nnz_recv[i].resize(nproc);
370 
371  // receive nnz from other processors
372  for (unsigned pp = 0; pp < nproc; pp++)
373  {
374 
375  // next processor to receive from
376  unsigned p = (nproc + my_rank - pp)%nproc;
377 
378  // dont mpi send to send
379  if (p != my_rank)
380  {
381 
382  // blocking recv
383  if (Nrow_local_from_proc[i][p] != 0)
384  {
385  int tag = this->compute_tag(nproc,p,my_rank,0);
386  MPI_Status stat;
387  unsigned nnz_temp;
388  MPI_Recv(&nnz_temp,1,MPI_UNSIGNED,p,tag,
389  Global_communicator_pt->mpi_comm(),&stat);
390  nnz_recv[i][p] = nnz_temp;
391  }
392  }
393 
394  // receive from self
395  else
396  {
397  nnz_recv[i][p] = nnz_send[i][p];
398  }
399  }
400 
401  // get pointers to the underlying data in the current matrix
402  double* values_send = matrix_pt[i]->value();
403  int* row_start_send = matrix_pt[i]->row_start();
404  int* column_index_send = matrix_pt[i]->column_index();
405 
406  // send and receive the contents of the vector
407  for (unsigned p = 0; p < nproc; p++)
408  {
409 
410  // use mpi methods to send to and receive from all but my rank
411  if (p != my_rank)
412  {
413 
414  // send
415  if (nnz_send[i][p] != 0)
416  {
417 
418  // compute the offset for row_start
419  int offset_n =
420  First_row_for_proc[i][p]-current_first_row[my_rank];
421 
422  // compute the offset for the values and column_index
423  int offset_nnz = row_start_send[offset_n];
424 
425  // values
426  int tag = this->compute_tag(nproc,my_rank,p,1);
427  MPI_Request tr1;
428  req.push_back(tr1);
429  MPI_Isend(values_send + offset_nnz,int(nnz_send[i][p]),
430  MPI_DOUBLE,
431  p,tag,Global_communicator_pt->mpi_comm(),&req[c]);
432  c++;
433 
434  // column_index
435  tag = this->compute_tag(nproc,my_rank,p,2);
436  MPI_Request tr2;
437  req.push_back(tr2);
438  MPI_Isend(column_index_send + offset_nnz,int(nnz_send[i][p]),
439  MPI_INT,p,tag,Global_communicator_pt->mpi_comm(),
440  &req[c]);
441  c++;
442 
443  // row_start
444  tag = this->compute_tag(nproc,my_rank,p,3);
445  MPI_Request tr3;
446  req.push_back(tr3);
447  MPI_Isend(row_start_send + offset_n,
448  int(Nrow_local_for_proc[i][p]),MPI_INT,p,tag,
449  Global_communicator_pt->mpi_comm(),&req[c]);
450  c++;
451  }
452  }
453  }
454  }
455  }
456 
457  // for every matrix we assemble the duplicate of the matrix on fewer
458  // processors and setup the preconditioner
459  for (unsigned i = 0; i < Nprec; i++)
460  {
461 
462  // if the matrix is global (!distributed) then just construct a copy
463  // on the subset of processors
464  if (!matrix_pt[i]->distributed())
465  {
466  oomph_info << "matrix not distributed" << std::endl;
467  // if this matrix is to be preconditioned my this processor
468  if (i == Color)
469  {
470 
471  // create the local distribution for this matrix
472  LinearAlgebraDistribution* temp_dist_pt =
473  new LinearAlgebraDistribution(Local_communicator_pt,
474  matrix_pt[i]->nrow(),
475  false);
476 
477  // create the corresponding matrix
478  local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
479  delete temp_dist_pt; // (dist has now been copied)
480 
481  // get pointers to the underlying data
482  double* values_pt = matrix_pt[i]->value();
483  int* column_index_pt = matrix_pt[i]->column_index();
484  int* row_start_pt = matrix_pt[i]->row_start();
485 
486  // build the matrix without a copy of the data
487  local_matrix_pt->build_without_copy(matrix_pt[i]->ncol(),
488  matrix_pt[i]->nnz(),
489  values_pt,
490  column_index_pt,
491  row_start_pt);
492  }
493  }
494 
495  // else we assemble a copy of the matrix distributed over a subset of
496  // processors
497  else
498  {
499 
500  // number of rows for this preconditioner
501 
502  // if we are assembling the matrix on this processor
503  if (i == Color)
504  {
505 
506 
507  // create the local distribution for this matrix
508  LinearAlgebraDistribution* temp_dist_pt =
509  new LinearAlgebraDistribution
510  (Local_communicator_pt,target_first_row[i][my_rank],
511  target_nrow_local[i][my_rank]);
512 
513  // create the corresponding matrix
514  local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
515  delete temp_dist_pt; // (dist has now been copied)
516 
517  // get the number of nnzs to be received from each processor
518 
519  // total number of nnz to be reveived
520  unsigned nnz_total = 0;
521  for (unsigned p = 0; p < nproc; p++)
522  {
523  nnz_total += nnz_recv[i][p];
524  }
525 
526  // compute nnz block start
527  Vector<unsigned> nnz_start_proc;
528  Vector<unsigned> nnz_start_index;
529  unsigned row_ptr = target_first_row[i][my_rank];
530  int p = 0;
531  unsigned nnz_ptr = 0;
532  for (p = 0; p < int(nproc); p++)
533  {
534 
535  if (First_row_from_proc[i][p] == row_ptr &&
536  Nrow_local_from_proc[i][p] != 0 &&
537  nnz_ptr != nnz_total)
538  {
539  nnz_start_proc.push_back(p);
540  nnz_start_index.push_back(nnz_ptr);
541  nnz_ptr += nnz_recv[i][p];
542  row_ptr += Nrow_local_from_proc[i][p];
543  p = -1;
544  }
545  }
546 
547  // storage for received data
548  double* values_recv = new double[nnz_total];
549  int* column_index_recv = new int[nnz_total];
550  int* row_start_recv = new int[target_nrow_local[i][my_rank]+1];
551 
552  // send and receive the contents of the vector
553  for (unsigned pp = 0; pp < nproc; pp++)
554  {
555 
556  // next processor to receive from
557  unsigned p = (nproc + my_rank - pp)%nproc;
558 
559  // use mpi methods to send to and receive from all but my rank
560  if (p != my_rank)
561  {
562 
563  // just receive
564  if (nnz_recv[i][p] != 0)
565  {
566 
567  // compute the offset for row_start
568  int offset_n =
569  First_row_from_proc[i][p]-target_first_row[i][my_rank];
570 
571  // compute the offset for the values and column_index
572  unsigned k = 0;
573  while (nnz_start_proc[k] != p)
574  {
575  k++;
576  }
577  int offset_nnz = nnz_start_index[k];
578 
579  // values
580  int tag = this->compute_tag(nproc,p,my_rank,1);
581  MPI_Status stat1;
582  MPI_Recv(values_recv + offset_nnz,int(nnz_recv[i][p]),
583  MPI_DOUBLE,p,tag,Global_communicator_pt->mpi_comm(),
584  &stat1);
585 
586  // column_index
587  tag = this->compute_tag(nproc,p,my_rank,2);
588  MPI_Status stat2;
589  MPI_Recv(column_index_recv + offset_nnz,int(nnz_recv[i][p]),
590  MPI_INT,p,tag,Global_communicator_pt->mpi_comm(),
591  &stat2);
592 
593  // row_start
594  tag = this->compute_tag(nproc,p,my_rank,3);
595  MPI_Status stat3;
596  MPI_Recv(row_start_recv + offset_n,
597  int(Nrow_local_from_proc[i][p]),MPI_INT,p,tag,
598  Global_communicator_pt->mpi_comm(),&stat3);
599  }
600  }
601  // otehrwise just send to self (or copy)
602  else
603  {
604  if (nnz_recv[i][p] != 0)
605  {
606 
607  // get pointers to the underlying data in the current matrix
608  double* values_send = matrix_pt[i]->value();
609  int* row_start_send = matrix_pt[i]->row_start();
610  int* column_index_send = matrix_pt[i]->column_index();
611 
612  // offset for row_start send to self
613  unsigned offset_n_send =
614  First_row_for_proc[i][my_rank]-matrix_pt[i]->first_row(p);
615  // offset for values and column+_index send to self
616  unsigned offset_nnz_send = row_start_send[offset_n_send];
617 
618  // offset for row_start receive from self
619  unsigned offset_n_recv =
620  First_row_from_proc[i][my_rank]-target_first_row[i][my_rank];
621 
622  // offset for values and columm_index receive from self
623  unsigned k = 0;
624  while (nnz_start_proc[k] != p)
625  {
626  k++;
627  }
628  unsigned offset_nnz_recv = nnz_start_index[k];
629 
630  // and send
631 
632  // values and column_index
633  unsigned n_nnz = nnz_send[i][my_rank];
634  for (unsigned j = 0; j < n_nnz; j++)
635  {
636  values_recv[offset_nnz_recv + j] =
637  values_send[offset_nnz_send + j];
638  column_index_recv[offset_nnz_recv + j] =
639  column_index_send[offset_nnz_send + j];
640  }
641 
642  // row start
643  unsigned n_n = Nrow_local_from_proc[i][my_rank];
644  for (unsigned j = 0; j < n_n; j++)
645  {
646  row_start_recv[offset_n_recv + j] =
647  row_start_send[offset_n_send + j];
648  }
649  }
650  }
651  }
652 
653 
654  // number of processors contributing to the local vector on this
655  // processor
656 
657  // update the row start
658  unsigned nproc_contrib = nnz_start_index.size();
659  for (unsigned j = 0; j < nproc_contrib; j++)
660  {
661  unsigned first = First_row_from_proc[i][nnz_start_proc[j]] -
662  target_first_row[i][my_rank];
663  unsigned last = first + Nrow_local_from_proc[i][nnz_start_proc[j]];
664  unsigned nnz_inc = nnz_start_index[j]-row_start_recv[first];
665  for (unsigned k = first; k < last; k++)
666  {
667  row_start_recv[k]+=nnz_inc;
668  }
669  }
670  row_start_recv[target_nrow_local[i][my_rank]] = int(nnz_total);
671 
672  // build the matrix without a copy of the data
673  local_matrix_pt->build_without_copy(matrix_pt[i]->ncol(),
674  nnz_total,
675  values_recv,
676  column_index_recv,
677  row_start_recv);
678  }
679  }
680  }
681 
682  // wait for all sends to complete
683  if (c!=0)
684  {
685  Vector<MPI_Status> stat(c);
686  MPI_Waitall(c,&req[0],&stat[0]);
687  }
688  }
689 
690 
691  /////////////////////////////////////////////////////////////////////////////
692  /////////////////////////////////////////////////////////////////////////////
693  /////////////////////////////////////////////////////////////////////////////
694  /////////////////////////////////////////////////////////////////////////////
695 
696 
697  // METHOD 1
698  else if (Method == 1)
699  {
700 
701  // temporary storgage for nnz recv
702  unsigned* nnz_recv_temp = new unsigned[nproc*Nprec];
703  for (unsigned j = 0; j < nproc*Nprec; j++)
704  {
705  nnz_recv_temp[j] = 0;
706  }
707 
708  // for every matrix we assemble the duplicate of the matrix on fewer
709  // processors and setup the preconditioner
710  for (unsigned i = 0; i < Nprec; i++)
711  {
712 
713  // if the matrix is global (!distributed) then just construct a copy
714  // on the subset of processors
715  if (!matrix_pt[i]->distributed())
716  {
717 
718  // if this matrix is to be preconditioned my this processor
719  if (i == Color)
720  {
721 
722  // create the local distribution for this matrix
723  LinearAlgebraDistribution* temp_dist_pt =
724  new LinearAlgebraDistribution(Local_communicator_pt,
725  matrix_pt[i]->nrow(),
726  false);
727 
728  // create the corresponding matrix
729  local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
730  delete temp_dist_pt; // (dist has now been copied)
731 
732  // get pointers to the underlying data
733  double* values_pt = matrix_pt[i]->value();
734  int* column_index_pt = matrix_pt[i]->column_index();
735  int* row_start_pt = matrix_pt[i]->row_start();
736 
737  // build the matrix without a copy of the data
738  local_matrix_pt->build_without_copy(matrix_pt[i]->ncol(),
739  matrix_pt[i]->nnz(),
740  values_pt,
741  column_index_pt,
742  row_start_pt);
743  }
744  }
745 
746  // if the matrix is global (!distributed) then just construct a copy
747  // on the subset of processors
748  else
749  {
750 
751  // first compute the distribution of this preconditioner on its subset
752  // of processors
753 
754  // number of rows for this preconditioner
755  unsigned nrow = matrix_pt[i]->nrow();
756 
757  // setup First_row_for_local_prec and Nrow_local_for_local_prec
758  target_first_row[i].resize(nproc);
759  target_nrow_local[i].resize(nproc);
760  unsigned nproc_local = Nproc_for_prec[i];
761  for (unsigned p = 0; p < nproc_local; p++)
762  {
763  int pp = First_proc_for_prec[i] + p;
764  target_first_row[i][pp] = unsigned(double(p*nrow)/
765  double(nproc_local));
766  }
767  for (unsigned p = 0; p < nproc_local-1; p++)
768  {
769  int pp = First_proc_for_prec[i] + p;
770  target_nrow_local[i][pp] = target_first_row[i][pp+1]
771  - target_first_row[i][pp];
772  }
773  unsigned last_local_proc = First_proc_for_prec[i] + nproc_local - 1;
774  target_nrow_local[i][last_local_proc] = nrow -
775  target_first_row[i][last_local_proc];
776 
777  // get the details of the current distribution
778  Vector<unsigned> current_first_row(nproc);
779  Vector<unsigned> current_nrow_local(nproc);
780  for (unsigned p = 0; p < nproc; p++)
781  {
782  current_first_row[p] = matrix_pt[i]->first_row(p);
783  current_nrow_local[p] = matrix_pt[i]->nrow_local(p);
784  }
785 
786  // resize storage for details of the data to be sent and received
787  First_row_for_proc[i].resize(nproc,0);
788  Nrow_local_for_proc[i].resize(nproc,0);
789  First_row_from_proc[i].resize(nproc,0);
790  Nrow_local_from_proc[i].resize(nproc,0);
791 
792  // for every processor compute first_row and nrow_local that will
793  // will sent and received by this processor
794  for (unsigned p = 0; p < nproc; p++)
795  {
796  // start with data to be sent
797  if ((target_first_row[i][p] < (current_first_row[my_rank] +
798  current_nrow_local[my_rank])) &&
799  (current_first_row[my_rank] < (target_first_row[i][p] +
800  target_nrow_local[i][p])))
801  {
802  First_row_for_proc[i][p] =
803  std::max(current_first_row[my_rank],
804  target_first_row[i][p]);
805  Nrow_local_for_proc[i][p] =
806  std::min((current_first_row[my_rank] +
807  current_nrow_local[my_rank]),
808  (target_first_row[i][p] +
809  target_nrow_local[i][p])) - First_row_for_proc[i][p];
810  }
811 
812  // and data to be received
813  if ((target_first_row[i][my_rank] < (current_first_row[p] +
814  current_nrow_local[p]))
815  && (current_first_row[p] < (target_first_row[i][my_rank] +
816  target_nrow_local[i][my_rank])))
817  {
818  First_row_from_proc[i][p] =
819  std::max(current_first_row[p],
820  target_first_row[i][my_rank]);
821  Nrow_local_from_proc[i][p] =
822  std::min((current_first_row[p] +
823  current_nrow_local[p]),
824  (target_first_row[i][my_rank] +
825  target_nrow_local[i][my_rank]))-
827  }
828  }
829 
830  // resize nnz_send
831  nnz_send[i].resize(nproc);
832 
833  // compute the number of nnzs to be sent
834  // and the number of send and receive requests to be made (nreq)
835  for (unsigned p = 0; p < nproc; p++)
836  {
837  if (Nrow_local_for_proc[i][p] != 0)
838  {
839  int* row_start = matrix_pt[i]->row_start();
840  unsigned k = First_row_for_proc[i][p]-current_first_row[my_rank];
841  nnz_send[i][p] = row_start[k + Nrow_local_for_proc[i][p]] -
842  row_start[k];
843  }
844  }
845 
846  // resize nnz_recv
847  nnz_recv[i].resize(nproc);
848 
849  // send nnz to be sent to each processor
850  for (unsigned p = 0; p < nproc; p++)
851  {
852 
853  // send and recv
854 
855  // dont mpi send to self
856  if (p != my_rank)
857  {
858 
859  // non block send
860  if (Nrow_local_for_proc[i][p] != 0)
861  {
862 
863  // send to other processors
864  int tag = this->compute_tag(nproc,my_rank,p,0);
865  MPI_Request tr;
866  req.push_back(tr);
867  MPI_Isend(&nnz_send[i][p],1,MPI_UNSIGNED,p,tag,
868  Global_communicator_pt->mpi_comm(),&req[c]);
869  c++;
870  }
871 
872  // non blocking recv
873  if (Nrow_local_from_proc[i][p] != 0)
874  {
875  int tag = this->compute_tag(nproc,p,my_rank,0);
876  MPI_Request tr;
877  req.push_back(tr);
878  MPI_Irecv(nnz_recv_temp + (i*nproc) + p,1,MPI_UNSIGNED,p,tag,
879  Global_communicator_pt->mpi_comm(),&req[c]);
880  c++;
881  }
882  }
883  // receive from self
884  else
885  {
886  if (Nrow_local_for_proc[i][p] != 0)
887  {
888  nnz_recv_temp[(i*nproc)+p] = nnz_send[i][p];
889  }
890  }
891  }
892  }
893  }
894  if (c!=0)
895  {
896  Vector<MPI_Status> stat(c);
897  MPI_Waitall(c,&req[0],&stat[0]);
898  req.clear();
899  stat.clear();
900  }
901  c=0;
902  for (unsigned i = 0; i < Nprec; i++)
903  {
904  for (unsigned p = 0; p < nproc; p++)
905  {
906  nnz_recv[i][p] = nnz_recv_temp[(i*nproc)+p];
907  }
908  }
909  delete nnz_recv_temp;
910 
911  // get the number of nnzs to be received from each processor
912 
913  // total number of nnz to be reveived
914  unsigned nnz_total = 0;
915  for (unsigned p = 0; p < nproc; p++)
916  {
917  nnz_total += nnz_recv[Color][p];
918  }
919 
920  // compute nnz block start
921  Vector<unsigned> nnz_start_proc;
922  Vector<unsigned> nnz_start_index;
923  unsigned row_ptr = target_first_row[Color][my_rank];
924  int p = 0;
925  unsigned nnz_ptr = 0;
926  for (p = 0; p < int(nproc); p++)
927  {
928  if (First_row_from_proc[Color][p] == row_ptr &&
929  Nrow_local_from_proc[Color][p] != 0 &&
930  nnz_ptr != nnz_total)
931  {
932  nnz_start_proc.push_back(p);
933  nnz_start_index.push_back(nnz_ptr);
934  nnz_ptr += nnz_recv[Color][p];
935  row_ptr += Nrow_local_from_proc[Color][p];
936  p = -1;
937  }
938  }
939 
940  // storage for derived datatypes
941  Vector<MPI_Datatype> datatypes;
942 
943  // storage for received data
944  double* values_recv = new double[nnz_total];
945  int* column_index_recv = new int[nnz_total];
946  int* row_start_recv = new int[target_nrow_local[Color][my_rank]+1];
947 
948  ///////////////////////////////////////////////////////////////////////////
949  // SEND
950  ///////////////////////////////////////////////////////////////////////////
951  unsigned c_send = 0;
952  Vector<MPI_Request> send_req;
953 
954  // for every matrix we assemble the duplicate of the matrix on fewer
955  // processors and setup the preconditioner
956  for (unsigned i = 0; i < Nprec; i++)
957  {
958 
959  // get pointers to the underlying data in the current matrix
960  double* values_send = matrix_pt[i]->value();
961  int* row_start_send = matrix_pt[i]->row_start();
962  int* column_index_send = matrix_pt[i]->column_index();
963 
964  // send and receive the contents of the vector
965  for (unsigned p = 0; p < nproc; p++)
966  {
967 
968  // use mpi methods to send to and receive from all but my rank
969  if (p != my_rank)
970  {
971 
972  // send
973  if (nnz_send[i][p] != 0)
974  {
975 
976  // create 3 MPI contiguous datatypes
977  // + values
978  // + column_index
979  // + row_start
980 
981  // values
982  MPI_Datatype datatype_values;
983  MPI_Type_contiguous(int(nnz_send[i][p]),MPI_DOUBLE,
984  &datatype_values);
985  MPI_Type_commit(&datatype_values);
986  datatypes.push_back(datatype_values);
987 
988  // column index
989  MPI_Datatype datatype_column_index;
990  MPI_Type_contiguous(int(nnz_send[i][p]),MPI_INT,
991  &datatype_column_index);
992  MPI_Type_commit(&datatype_column_index);
993  datatypes.push_back(datatype_column_index);
994 
995  // row start
996  MPI_Datatype datatype_row_start;
997  MPI_Type_contiguous(int(Nrow_local_for_proc[i][p]),MPI_INT,
998  &datatype_row_start);
999  MPI_Type_commit(&datatype_row_start);
1000  datatypes.push_back(datatype_row_start);
1001 
1002  // assemble the typelist
1003  MPI_Datatype typelist[3];
1004  typelist[0] = datatype_values;
1005  typelist[1] = datatype_column_index;
1006  typelist[2] = datatype_row_start;
1007 
1008  // compute the offset for row_start
1009  int offset_n =
1010  First_row_for_proc[i][p]-matrix_pt[i]->first_row(my_rank);
1011 
1012  // compute the offset for the values and column_index
1013  int offset_nnz = row_start_send[offset_n];
1014 
1015  // next compute the displacements
1016  MPI_Aint displacements[3];
1017  MPI_Get_address(values_send + offset_nnz,&displacements[0]);
1018  MPI_Get_address(column_index_send + offset_nnz,&displacements[1]);
1019  MPI_Get_address(row_start_send + offset_n,&displacements[2]);
1020  for (int j = 2; j >= 0; j--)
1021  {
1022  displacements[j] -= displacements[0];
1023  }
1024 
1025  // set the block lengths
1026  int block_length[3];
1027  block_length[0] = block_length[1] = block_length[2] = 1;
1028 
1029  // now build the final datatype
1030  MPI_Datatype send_type;
1031  MPI_Type_create_struct(3,block_length,displacements,typelist,
1032  &send_type);
1033  MPI_Type_commit(&send_type);
1034  datatypes.push_back(send_type);
1035 
1036  // send
1037  int tag = this->compute_tag(nproc,my_rank,p,1);
1038  MPI_Request tr1;
1039  send_req.push_back(tr1);
1040  MPI_Isend(values_send + offset_nnz,1,send_type,
1041  p,tag,Global_communicator_pt->mpi_comm(),
1042  &send_req[c_send]);
1043  c_send++;
1044  }
1045  }
1046  }
1047  }
1048 
1049  ///////////////////////////////////////////////////////////////////////////
1050  // RECV
1051  ///////////////////////////////////////////////////////////////////////////
1052  unsigned c_recv = 0;
1053  Vector<MPI_Request> recv_req;
1054 
1055  // receive the contents of the vector
1056  for (unsigned p = 0; p < nproc; p++)
1057  {
1058 
1059  // use mpi methods to send to and receive from all but my rank
1060  if (p != my_rank)
1061  {
1062 
1063  // just receive
1064  if (nnz_recv[Color][p] != 0)
1065  {
1066 
1067  // create 3 MPI contiguous datatypes
1068  // + values
1069  // + column_index
1070  // + row_start
1071 
1072  // values
1073  MPI_Datatype datatype_values;
1074  MPI_Type_contiguous(int(nnz_recv[Color][p]),MPI_DOUBLE,
1075  &datatype_values);
1076  MPI_Type_commit(&datatype_values);
1077  datatypes.push_back(datatype_values);
1078 
1079  // column index
1080  MPI_Datatype datatype_column_index;
1081  MPI_Type_contiguous(int(nnz_recv[Color][p]),MPI_INT,
1082  &datatype_column_index);
1083  MPI_Type_commit(&datatype_column_index);
1084  datatypes.push_back(datatype_column_index);
1085 
1086  // row start
1087  MPI_Datatype datatype_row_start;
1088  MPI_Type_contiguous(int(Nrow_local_from_proc[Color][p]),MPI_INT,
1089  &datatype_row_start);
1090  MPI_Type_commit(&datatype_row_start);
1091  datatypes.push_back(datatype_row_start);
1092 
1093  // assemble the typelist
1094  MPI_Datatype typelist[3];
1095  typelist[0] = datatype_values;
1096  typelist[1] = datatype_column_index;
1097  typelist[2] = datatype_row_start;
1098 
1099  // compute the offset for row_start
1100  int offset_n =
1101  First_row_from_proc[Color][p]-target_first_row[Color][my_rank];
1102 
1103  // compute the offset for the values and column_index
1104  unsigned k = 0;
1105  while (nnz_start_proc[k] != p)
1106  {
1107  k++;
1108  }
1109  int offset_nnz = nnz_start_index[k];
1110 
1111  // next compute the displacements
1112  MPI_Aint displacements[3];
1113  MPI_Get_address(values_recv + offset_nnz,&displacements[0]);
1114  MPI_Get_address(column_index_recv + offset_nnz,&displacements[1]);
1115  MPI_Get_address(row_start_recv + offset_n,&displacements[2]);
1116  for (int j = 2; j >= 0; j--)
1117  {
1118  displacements[j] -= displacements[0];
1119  }
1120 
1121  // set the block lengths
1122  int block_length[3];
1123  block_length[0] = block_length[1] = block_length[2] = 1;
1124 
1125  // now build the final datatype
1126  MPI_Datatype recv_type;
1127  MPI_Type_create_struct(3,block_length,displacements,typelist,
1128  &recv_type);
1129  MPI_Type_commit(&recv_type);
1130  datatypes.push_back(recv_type);
1131 
1132  // recv
1133  int tag = this->compute_tag(nproc,p,my_rank,1);
1134  MPI_Request tr1;
1135  recv_req.push_back(tr1);
1136  MPI_Irecv(values_recv + offset_nnz,1,recv_type,
1137  p,tag,Global_communicator_pt->mpi_comm(),
1138  &recv_req[c_recv]);
1139  c_recv++;
1140  }
1141  }
1142  }
1143 
1144  // otherwise send to self (copy)
1145  if (nnz_recv[Color][my_rank] != 0)
1146  {
1147 
1148  // get pointers to the underlying data in the current matrix
1149  double* values_send = matrix_pt[Color]->value();
1150  int* row_start_send = matrix_pt[Color]->row_start();
1151  int* column_index_send = matrix_pt[Color]->column_index();
1152 
1153  // offset for row_start send to self
1154  unsigned offset_n_send =
1155  First_row_for_proc[Color][my_rank]-matrix_pt[Color]->first_row(my_rank);
1156 
1157  // offset for values and column+_index send to self
1158  unsigned offset_nnz_send = row_start_send[offset_n_send];
1159 
1160  // offset for row_start receive from self
1161  unsigned offset_n_recv =
1162  First_row_from_proc[Color][my_rank]-target_first_row[Color][my_rank];
1163 
1164  // offset for values and columm_index receive from self
1165  unsigned k = 0;
1166  while (nnz_start_proc[k] != my_rank)
1167  {
1168  k++;
1169  }
1170  unsigned offset_nnz_recv = nnz_start_index[k];
1171 
1172  // and send
1173 
1174  // values and column_index
1175  unsigned n_nnz = nnz_send[Color][my_rank];
1176  for (unsigned j = 0; j < n_nnz; j++)
1177  {
1178  values_recv[offset_nnz_recv + j] =
1179  values_send[offset_nnz_send + j];
1180  column_index_recv[offset_nnz_recv + j] =
1181  column_index_send[offset_nnz_send + j];
1182  }
1183 
1184  // row start
1185  unsigned n_n = Nrow_local_from_proc[Color][my_rank];
1186  for (unsigned j = 0; j < n_n; j++)
1187  {
1188  row_start_recv[offset_n_recv + j] =
1189  row_start_send[offset_n_send + j];
1190  }
1191  }
1192 
1193  // create the local distribution for this matrix
1194  LinearAlgebraDistribution* temp_dist_pt =
1195  new LinearAlgebraDistribution
1196  (Local_communicator_pt,target_first_row[Color][my_rank],
1197  target_nrow_local[Color][my_rank]);
1198 
1199  // create the corresponding matrix
1200  local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
1201  delete temp_dist_pt; // (dist has now been copied)
1202 
1203  ///////////////////////////////////////////////////////////////////////////
1204  // and WAIT...
1205  ///////////////////////////////////////////////////////////////////////////
1206  if (c_recv!=0)
1207  {
1208  Vector<MPI_Status> recv_stat(c_recv);
1209  MPI_Waitall(c_recv,&recv_req[0],&recv_stat[0]);
1210  recv_req.clear();
1211  recv_stat.clear();
1212  }
1213 
1214  // build the matrix
1215 
1216  // update the row start
1217  unsigned nproc_contrib = nnz_start_index.size();
1218  for (unsigned j = 0; j < nproc_contrib; j++)
1219  {
1220  unsigned first = First_row_from_proc[Color][nnz_start_proc[j]] -
1221  target_first_row[Color][my_rank];
1222  unsigned last = first + Nrow_local_from_proc[Color][nnz_start_proc[j]];
1223  unsigned nnz_inc = nnz_start_index[j]-row_start_recv[first];
1224  for (unsigned k = first; k < last; k++)
1225  {
1226  row_start_recv[k]+=nnz_inc;
1227  }
1228  }
1229  row_start_recv[target_nrow_local[Color][my_rank]] = int(nnz_total);
1230 
1231  // build the matrix without a copy of the data
1232  local_matrix_pt->build_without_copy(matrix_pt[Color]->ncol(),
1233  nnz_total,
1234  values_recv,
1235  column_index_recv,
1236  row_start_recv);
1237 
1238  // and finally wait for the sends
1239  if (c_recv!=0)
1240  {
1241  Vector<MPI_Status> send_stat(c_recv);
1242  MPI_Waitall(c_send,&send_req[0],&send_stat[0]);
1243  send_req.clear();
1244  send_stat.clear();
1245  }
1246 
1247  // and clear the datatype
1248  unsigned ndatatypes = datatypes.size();
1249  for (unsigned i = 0; i < ndatatypes; i++)
1250  {
1251  MPI_Type_free(&datatypes[i]);
1252  }
1253  }
1254 
1255 
1256 
1257  /////////////////////////////////////////////////////////////////////////////
1258  /////////////////////////////////////////////////////////////////////////////
1259  /////////////////////////////////////////////////////////////////////////////
1260  /////////////////////////////////////////////////////////////////////////////
1261 
1262 
1263  // METHOD 2
1264  else if (Method == 2)
1265  {
1266 
1267  // temporary storgage for nnz recv
1268  unsigned* nnz_recv_temp = new unsigned[nproc*Nprec];
1269  for (unsigned j = 0; j < nproc*Nprec; j++)
1270  {
1271  nnz_recv_temp[j] = 0;
1272  }
1273 
1274  // for every matrix we assemble the duplicate of the matrix on fewer
1275  // processors and setup the preconditioner
1276  for (unsigned i = 0; i < Nprec; i++)
1277  {
1278 
1279  // if the matrix is global (!distributed) then just construct a copy
1280  // on the subset of processors
1281  if (!matrix_pt[i]->distributed())
1282  {
1283 
1284  // if this matrix is to be preconditioned my this processor
1285  if (i == Color)
1286  {
1287 
1288  // create the local distribution for this matrix
1289  LinearAlgebraDistribution* temp_dist_pt =
1290  new LinearAlgebraDistribution(Local_communicator_pt,
1291  matrix_pt[i]->nrow(),
1292  false);
1293 
1294  // create the corresponding matrix
1295  local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
1296  delete temp_dist_pt; // (dist has now been copied)
1297 
1298  // get pointers to the underlying data
1299  double* values_pt = matrix_pt[i]->value();
1300  int* column_index_pt = matrix_pt[i]->column_index();
1301  int* row_start_pt = matrix_pt[i]->row_start();
1302 
1303  // build the matrix without a copy of the data
1304  local_matrix_pt->build_without_copy(matrix_pt[i]->ncol(),
1305  matrix_pt[i]->nnz(),
1306  values_pt,
1307  column_index_pt,
1308  row_start_pt);
1309  }
1310  }
1311 
1312  // if the matrix is global (!distributed) then just construct a copy
1313  // on the subset of processors
1314  else
1315  {
1316 
1317  // first compute the distribution of this preconditioner on its subset
1318  // of processors
1319 
1320  // number of rows for this preconditioner
1321  unsigned nrow = matrix_pt[i]->nrow();
1322 
1323  // setup First_row_for_local_prec and Nrow_local_for_local_prec
1324  target_first_row[i].resize(nproc);
1325  target_nrow_local[i].resize(nproc);
1326  unsigned nproc_local = Nproc_for_prec[i];
1327  for (unsigned p = 0; p < nproc_local; p++)
1328  {
1329  int pp = First_proc_for_prec[i] + p;
1330  target_first_row[i][pp] = unsigned(double(p*nrow)/
1331  double(nproc_local));
1332  }
1333  for (unsigned p = 0; p < nproc_local-1; p++)
1334  {
1335  int pp = First_proc_for_prec[i] + p;
1336  target_nrow_local[i][pp] = target_first_row[i][pp+1]
1337  - target_first_row[i][pp];
1338  }
1339  unsigned last_local_proc = First_proc_for_prec[i] + nproc_local - 1;
1340  target_nrow_local[i][last_local_proc] = nrow -
1341  target_first_row[i][last_local_proc];
1342 
1343  // get the details of the current distribution
1344  Vector<unsigned> current_first_row(nproc);
1345  Vector<unsigned> current_nrow_local(nproc);
1346  for (unsigned p = 0; p < nproc; p++)
1347  {
1348  current_first_row[p] = matrix_pt[i]->first_row(p);
1349  current_nrow_local[p] = matrix_pt[i]->nrow_local(p);
1350  }
1351 
1352  // resize storage for details of the data to be sent and received
1353  First_row_for_proc[i].resize(nproc,0);
1354  Nrow_local_for_proc[i].resize(nproc,0);
1355  First_row_from_proc[i].resize(nproc,0);
1356  Nrow_local_from_proc[i].resize(nproc,0);
1357 
1358  // for every processor compute first_row and nrow_local that will
1359  // will sent and received by this processor
1360  for (unsigned p = 0; p < nproc; p++)
1361  {
1362  // start with data to be sent
1363  if ((target_first_row[i][p] < (current_first_row[my_rank] +
1364  current_nrow_local[my_rank])) &&
1365  (current_first_row[my_rank] < (target_first_row[i][p] +
1366  target_nrow_local[i][p])))
1367  {
1368  First_row_for_proc[i][p] =
1369  std::max(current_first_row[my_rank],
1370  target_first_row[i][p]);
1371  Nrow_local_for_proc[i][p] =
1372  std::min((current_first_row[my_rank] +
1373  current_nrow_local[my_rank]),
1374  (target_first_row[i][p] +
1375  target_nrow_local[i][p])) - First_row_for_proc[i][p];
1376  }
1377 
1378  // and data to be received
1379  if ((target_first_row[i][my_rank] < (current_first_row[p] +
1380  current_nrow_local[p]))
1381  && (current_first_row[p] < (target_first_row[i][my_rank] +
1382  target_nrow_local[i][my_rank])))
1383  {
1384  First_row_from_proc[i][p] =
1385  std::max(current_first_row[p],
1386  target_first_row[i][my_rank]);
1387  Nrow_local_from_proc[i][p] =
1388  std::min((current_first_row[p] +
1389  current_nrow_local[p]),
1390  (target_first_row[i][my_rank] +
1391  target_nrow_local[i][my_rank]))-
1392  First_row_from_proc[i][p];
1393  }
1394  }
1395 
1396  // resize nnz_send
1397  nnz_send[i].resize(nproc);
1398 
1399  // compute the number of nnzs to be sent
1400  // and the number of send and receive requests to be made (nreq)
1401  for (unsigned p = 0; p < nproc; p++)
1402  {
1403  if (Nrow_local_for_proc[i][p] != 0)
1404  {
1405  int* row_start = matrix_pt[i]->row_start();
1406  unsigned k = First_row_for_proc[i][p]-current_first_row[my_rank];
1407  nnz_send[i][p] = row_start[k + Nrow_local_for_proc[i][p]] -
1408  row_start[k];
1409  }
1410  }
1411 
1412  // resize nnz_recv
1413  nnz_recv[i].resize(nproc);
1414 
1415  // send nnz to be sent to each processor
1416  for (unsigned p = 0; p < nproc; p++)
1417  {
1418 
1419  // send and recv
1420 
1421  // dont mpi send to self
1422  if (p != my_rank)
1423  {
1424 
1425  // non block send
1426  if (Nrow_local_for_proc[i][p] != 0)
1427  {
1428 
1429  // send to other processors
1430  int tag = this->compute_tag(nproc,my_rank,p,0);
1431  MPI_Request tr;
1432  req.push_back(tr);
1433  MPI_Isend(&nnz_send[i][p],1,MPI_UNSIGNED,p,tag,
1434  Global_communicator_pt->mpi_comm(),&req[c]);
1435  c++;
1436  }
1437 
1438  // non blocking recv
1439  if (Nrow_local_from_proc[i][p] != 0)
1440  {
1441  int tag = this->compute_tag(nproc,p,my_rank,0);
1442  MPI_Request tr;
1443  req.push_back(tr);
1444  MPI_Irecv(nnz_recv_temp + (i*nproc) + p,1,MPI_UNSIGNED,p,tag,
1445  Global_communicator_pt->mpi_comm(),&req[c]);
1446  c++;
1447  }
1448  }
1449  // receive from self
1450  else
1451  {
1452  if (Nrow_local_for_proc[i][p] != 0)
1453  {
1454  nnz_recv_temp[(i*nproc)+p] = nnz_send[i][p];
1455  }
1456  }
1457  }
1458  }
1459  }
1460  if (c!=0)
1461  {
1462  Vector<MPI_Status> stat(c);
1463  MPI_Waitall(c,&req[0],&stat[0]);
1464  req.clear();
1465  stat.clear();
1466  c=0;
1467  }
1468  for (unsigned i = 0; i < Nprec; i++)
1469  {
1470  for (unsigned p = 0; p < nproc; p++)
1471  {
1472  nnz_recv[i][p] = nnz_recv_temp[(i*nproc)+p];
1473  }
1474  }
1475  delete nnz_recv_temp;
1476 
1477  // get the number of nnzs to be received from each processor
1478 
1479  // total number of nnz to be reveived
1480  unsigned nnz_total = 0;
1481  for (unsigned p = 0; p < nproc; p++)
1482  {
1483  nnz_total += nnz_recv[Color][p];
1484  }
1485 
1486  // compute nnz block start
1487  Vector<unsigned> nnz_start_proc;
1488  Vector<unsigned> nnz_start_index;
1489  unsigned row_ptr = target_first_row[Color][my_rank];
1490  int p = 0;
1491  unsigned nnz_ptr = 0;
1492  for (p = 0; p < int(nproc); p++)
1493  {
1494  if (First_row_from_proc[Color][p] == row_ptr &&
1495  Nrow_local_from_proc[Color][p] != 0 &&
1496  nnz_ptr != nnz_total)
1497  {
1498  nnz_start_proc.push_back(p);
1499  nnz_start_index.push_back(nnz_ptr);
1500  nnz_ptr += nnz_recv[Color][p];
1501  row_ptr += Nrow_local_from_proc[Color][p];
1502  p = -1;
1503  }
1504  }
1505 
1506  // storage for derived datatypes
1507  Vector<MPI_Datatype> datatypes;
1508 
1509  // storage for received data
1510  double* values_recv = new double[nnz_total];
1511  int* column_index_recv = new int[nnz_total];
1512  int* row_start_recv = new int[target_nrow_local[Color][my_rank]+1];
1513 
1514  ///////////////////////////////////////////////////////////////////////////
1515  // RECV
1516  ///////////////////////////////////////////////////////////////////////////
1517  unsigned c_recv = 0;
1518  Vector<MPI_Request> recv_req;
1519 
1520  // receive the contents of the vector
1521  for (unsigned p = 0; p < nproc; p++)
1522  {
1523 
1524  // use mpi methods to send to and receive from all but my rank
1525  if (p != my_rank)
1526  {
1527 
1528  // just receive
1529  if (nnz_recv[Color][p] != 0)
1530  {
1531 
1532  // create 3 MPI contiguous datatypes
1533  // + values
1534  // + column_index
1535  // + row_start
1536 
1537  // values
1538  MPI_Datatype datatype_values;
1539  MPI_Type_contiguous(int(nnz_recv[Color][p]),MPI_DOUBLE,
1540  &datatype_values);
1541  MPI_Type_commit(&datatype_values);
1542  datatypes.push_back(datatype_values);
1543 
1544  // column index
1545  MPI_Datatype datatype_column_index;
1546  MPI_Type_contiguous(int(nnz_recv[Color][p]),MPI_INT,
1547  &datatype_column_index);
1548  MPI_Type_commit(&datatype_column_index);
1549  datatypes.push_back(datatype_column_index);
1550 
1551  // row start
1552  MPI_Datatype datatype_row_start;
1553  MPI_Type_contiguous(int(Nrow_local_from_proc[Color][p]),MPI_INT,
1554  &datatype_row_start);
1555  MPI_Type_commit(&datatype_row_start);
1556  datatypes.push_back(datatype_row_start);
1557 
1558  // assemble the typelist
1559  MPI_Datatype typelist[3];
1560  typelist[0] = datatype_values;
1561  typelist[1] = datatype_column_index;
1562  typelist[2] = datatype_row_start;
1563 
1564  // compute the offset for row_start
1565  int offset_n =
1566  First_row_from_proc[Color][p]-target_first_row[Color][my_rank];
1567 
1568  // compute the offset for the values and column_index
1569  unsigned k = 0;
1570  while (nnz_start_proc[k] != p)
1571  {
1572  k++;
1573  }
1574  int offset_nnz = nnz_start_index[k];
1575 
1576  // next compute the displacements
1577  MPI_Aint displacements[3];
1578  MPI_Get_address(values_recv + offset_nnz,&displacements[0]);
1579  MPI_Get_address(column_index_recv + offset_nnz,&displacements[1]);
1580  MPI_Get_address(row_start_recv + offset_n,&displacements[2]);
1581  for (int j = 2; j >= 0; j--)
1582  {
1583  displacements[j] -= displacements[0];
1584  }
1585 
1586  // set the block lengths
1587  int block_length[3];
1588  block_length[0] = block_length[1] = block_length[2] = 1;
1589 
1590  // now build the final datatype
1591  MPI_Datatype recv_type;
1592  MPI_Type_create_struct(3,block_length,displacements,typelist,
1593  &recv_type);
1594  MPI_Type_commit(&recv_type);
1595  datatypes.push_back(recv_type);
1596 
1597  // recv
1598  int tag = this->compute_tag(nproc,p,my_rank,1);
1599  MPI_Request tr1;
1600  recv_req.push_back(tr1);
1601  MPI_Irecv(values_recv + offset_nnz,1,recv_type,
1602  p,tag,Global_communicator_pt->mpi_comm(),
1603  &recv_req[c_recv]);
1604  c_recv++;
1605  }
1606  }
1607  }
1608 
1609  ///////////////////////////////////////////////////////////////////////////
1610  // SEND
1611  ///////////////////////////////////////////////////////////////////////////
1612  unsigned c_send = 0;
1613  Vector<MPI_Request> send_req;
1614 
1615  // for every matrix we assemble the duplicate of the matrix on fewer
1616  // processors and setup the preconditioner
1617  for (unsigned i = 0; i < Nprec; i++)
1618  {
1619 
1620  // get pointers to the underlying data in the current matrix
1621  double* values_send = matrix_pt[i]->value();
1622  int* row_start_send = matrix_pt[i]->row_start();
1623  int* column_index_send = matrix_pt[i]->column_index();
1624 
1625  // send and receive the contents of the vector
1626  for (unsigned p = 0; p < nproc; p++)
1627  {
1628 
1629  // use mpi methods to send to and receive from all but my rank
1630  if (p != my_rank)
1631  {
1632 
1633  // send
1634  if (nnz_send[i][p] != 0)
1635  {
1636 
1637  // create 3 MPI contiguous datatypes
1638  // + values
1639  // + column_index
1640  // + row_start
1641 
1642  // values
1643  MPI_Datatype datatype_values;
1644  MPI_Type_contiguous(int(nnz_send[i][p]),MPI_DOUBLE,
1645  &datatype_values);
1646  MPI_Type_commit(&datatype_values);
1647  datatypes.push_back(datatype_values);
1648 
1649  // column index
1650  MPI_Datatype datatype_column_index;
1651  MPI_Type_contiguous(int(nnz_send[i][p]),MPI_INT,
1652  &datatype_column_index);
1653  MPI_Type_commit(&datatype_column_index);
1654  datatypes.push_back(datatype_column_index);
1655 
1656  // row start
1657  MPI_Datatype datatype_row_start;
1658  MPI_Type_contiguous(int(Nrow_local_for_proc[i][p]),MPI_INT,
1659  &datatype_row_start);
1660  MPI_Type_commit(&datatype_row_start);
1661  datatypes.push_back(datatype_row_start);
1662 
1663  // assemble the typelist
1664  MPI_Datatype typelist[3];
1665  typelist[0] = datatype_values;
1666  typelist[1] = datatype_column_index;
1667  typelist[2] = datatype_row_start;
1668 
1669  // compute the offset for row_start
1670  int offset_n =
1671  First_row_for_proc[i][p]-matrix_pt[i]->first_row(my_rank);
1672 
1673  // compute the offset for the values and column_index
1674  int offset_nnz = row_start_send[offset_n];
1675 
1676  // next compute the displacements
1677  MPI_Aint displacements[3];
1678  MPI_Get_address(values_send + offset_nnz,&displacements[0]);
1679  MPI_Get_address(column_index_send + offset_nnz,&displacements[1]);
1680  MPI_Get_address(row_start_send + offset_n,&displacements[2]);
1681  for (int j = 2; j >= 0; j--)
1682  {
1683  displacements[j] -= displacements[0];
1684  }
1685 
1686  // set the block lengths
1687  int block_length[3];
1688  block_length[0] = block_length[1] = block_length[2] = 1;
1689 
1690  // now build the final datatype
1691  MPI_Datatype send_type;
1692  MPI_Type_create_struct(3,block_length,displacements,typelist,
1693  &send_type);
1694  MPI_Type_commit(&send_type);
1695  datatypes.push_back(send_type);
1696 
1697  // send
1698  int tag = this->compute_tag(nproc,my_rank,p,1);
1699  MPI_Request tr1;
1700  send_req.push_back(tr1);
1701  MPI_Isend(values_send + offset_nnz,1,send_type,
1702  p,tag,Global_communicator_pt->mpi_comm(),
1703  &send_req[c_send]);
1704  c_send++;
1705  }
1706  }
1707  }
1708  }
1709 
1710  // otherwise send to self (copy)
1711  if (nnz_recv[Color][my_rank] != 0)
1712  {
1713 
1714  // get pointers to the underlying data in the current matrix
1715  double* values_send = matrix_pt[Color]->value();
1716  int* row_start_send = matrix_pt[Color]->row_start();
1717  int* column_index_send = matrix_pt[Color]->column_index();
1718 
1719  // offset for row_start send to self
1720  unsigned offset_n_send =
1721  First_row_for_proc[Color][my_rank]-matrix_pt[Color]->first_row(my_rank);
1722 
1723  // offset for values and column+_index send to self
1724  unsigned offset_nnz_send = row_start_send[offset_n_send];
1725 
1726  // offset for row_start receive from self
1727  unsigned offset_n_recv =
1728  First_row_from_proc[Color][my_rank]-target_first_row[Color][my_rank];
1729 
1730  // offset for values and columm_index receive from self
1731  unsigned k = 0;
1732  while (nnz_start_proc[k] != my_rank)
1733  {
1734  k++;
1735  }
1736  unsigned offset_nnz_recv = nnz_start_index[k];
1737 
1738  // and send
1739 
1740  // values and column_index
1741  unsigned n_nnz = nnz_send[Color][my_rank];
1742  for (unsigned j = 0; j < n_nnz; j++)
1743  {
1744  values_recv[offset_nnz_recv + j] =
1745  values_send[offset_nnz_send + j];
1746  column_index_recv[offset_nnz_recv + j] =
1747  column_index_send[offset_nnz_send + j];
1748  }
1749 
1750  // row start
1751  unsigned n_n = Nrow_local_from_proc[Color][my_rank];
1752  for (unsigned j = 0; j < n_n; j++)
1753  {
1754  row_start_recv[offset_n_recv + j] =
1755  row_start_send[offset_n_send + j];
1756  }
1757  }
1758 
1759  // create the local distribution for this matrix
1760  LinearAlgebraDistribution* temp_dist_pt =
1761  new LinearAlgebraDistribution
1762  (Local_communicator_pt,target_first_row[Color][my_rank],
1763  target_nrow_local[Color][my_rank]);
1764 
1765  // create the corresponding matrix
1766  local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
1767  delete temp_dist_pt; // (dist has now been copied)
1768 
1769  ///////////////////////////////////////////////////////////////////////////
1770  // and WAIT...
1771  ///////////////////////////////////////////////////////////////////////////
1772  if (c_recv!=0)
1773  {
1774  Vector<MPI_Status> recv_stat(c_recv);
1775  MPI_Waitall(c_recv,&recv_req[0],&recv_stat[0]);
1776  recv_req.clear();
1777  recv_stat.clear();
1778  }
1779 
1780  // build the matrix
1781 
1782  // update the row start
1783  unsigned nproc_contrib = nnz_start_index.size();
1784  for (unsigned j = 0; j < nproc_contrib; j++)
1785  {
1786  unsigned first = First_row_from_proc[Color][nnz_start_proc[j]] -
1787  target_first_row[Color][my_rank];
1788  unsigned last = first + Nrow_local_from_proc[Color][nnz_start_proc[j]];
1789  unsigned nnz_inc = nnz_start_index[j]-row_start_recv[first];
1790  for (unsigned k = first; k < last; k++)
1791  {
1792  row_start_recv[k]+=nnz_inc;
1793  }
1794  }
1795  row_start_recv[target_nrow_local[Color][my_rank]] = int(nnz_total);
1796 
1797  // build the matrix without a copy of the data
1798  local_matrix_pt->build_without_copy(matrix_pt[Color]->ncol(),
1799  nnz_total,
1800  values_recv,
1801  column_index_recv,
1802  row_start_recv);
1803 
1804  // and finally wait for the sends
1805  if (c_send!=0)
1806  {
1807  Vector<MPI_Status> send_stat(c_send);
1808  MPI_Waitall(c_send,&send_req[0],&send_stat[0]);
1809  send_req.clear();
1810  send_stat.clear();
1811  }
1812 
1813  // and clear the datatype
1814  unsigned ndatatypes = datatypes.size();
1815  for (unsigned i = 0; i < ndatatypes; i++)
1816  {
1817  MPI_Type_free(&datatypes[i]);
1818  }
1819  }
1820 
1821 
1822 
1823 
1824  /////////////////////////////////////////////////////////////////////////////
1825  /////////////////////////////////////////////////////////////////////////////
1826  /////////////////////////////////////////////////////////////////////////////
1827  /////////////////////////////////////////////////////////////////////////////
1828 
1829 
1830  // METHOD 3
1831  else if (Method == 3)
1832  {
1833 
1834  // temporary storgage for nnz recv
1835  unsigned* nnz_recv_temp = new unsigned[nproc*Nprec];
1836  for (unsigned j = 0; j < nproc*Nprec; j++)
1837  {
1838  nnz_recv_temp[j] = 0;
1839  }
1840 
1841  // for every matrix we assemble the duplicate of the matrix on fewer
1842  // processors and setup the preconditioner
1843  for (unsigned i = 0; i < Nprec; i++)
1844  {
1845 
1846  // if the matrix is global (!distributed) then just construct a copy
1847  // on the subset of processors
1848  if (!matrix_pt[i]->distributed())
1849  {
1850 
1851  // if this matrix is to be preconditioned my this processor
1852  if (i == Color)
1853  {
1854 
1855  // create the local distribution for this matrix
1856  LinearAlgebraDistribution* temp_dist_pt =
1857  new LinearAlgebraDistribution(Local_communicator_pt,
1858  matrix_pt[i]->nrow(),
1859  false);
1860 
1861  // create the corresponding matrix
1862  local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
1863  delete temp_dist_pt; // (dist has now been copied)
1864 
1865  // get pointers to the underlying data
1866  double* values_pt = matrix_pt[i]->value();
1867  int* column_index_pt = matrix_pt[i]->column_index();
1868  int* row_start_pt = matrix_pt[i]->row_start();
1869 
1870  // build the matrix without a copy of the data
1871  local_matrix_pt->build_without_copy(matrix_pt[i]->ncol(),
1872  matrix_pt[i]->nnz(),
1873  values_pt,
1874  column_index_pt,
1875  row_start_pt);
1876  }
1877  }
1878 
1879  // if the matrix is global (!distributed) then just construct a copy
1880  // on the subset of processors
1881  else
1882  {
1883 
1884  // first compute the distribution of this preconditioner on its subset
1885  // of processors
1886 
1887  // number of rows for this preconditioner
1888  unsigned nrow = matrix_pt[i]->nrow();
1889 
1890  // setup First_row_for_local_prec and Nrow_local_for_local_prec
1891  target_first_row[i].resize(nproc);
1892  target_nrow_local[i].resize(nproc);
1893  unsigned nproc_local = Nproc_for_prec[i];
1894  for (unsigned p = 0; p < nproc_local; p++)
1895  {
1896  int pp = First_proc_for_prec[i] + p;
1897  target_first_row[i][pp] = unsigned(double(p*nrow)/
1898  double(nproc_local));
1899  }
1900  for (unsigned p = 0; p < nproc_local-1; p++)
1901  {
1902  int pp = First_proc_for_prec[i] + p;
1903  target_nrow_local[i][pp] = target_first_row[i][pp+1]
1904  - target_first_row[i][pp];
1905  }
1906  unsigned last_local_proc = First_proc_for_prec[i] + nproc_local - 1;
1907  target_nrow_local[i][last_local_proc] = nrow -
1908  target_first_row[i][last_local_proc];
1909 
1910  // get the details of the current distribution
1911  Vector<unsigned> current_first_row(nproc);
1912  Vector<unsigned> current_nrow_local(nproc);
1913  for (unsigned p = 0; p < nproc; p++)
1914  {
1915  current_first_row[p] = matrix_pt[i]->first_row(p);
1916  current_nrow_local[p] = matrix_pt[i]->nrow_local(p);
1917  }
1918 
1919  // resize storage for details of the data to be sent and received
1920  First_row_for_proc[i].resize(nproc,0);
1921  Nrow_local_for_proc[i].resize(nproc,0);
1922  First_row_from_proc[i].resize(nproc,0);
1923  Nrow_local_from_proc[i].resize(nproc,0);
1924 
1925  // for every processor compute first_row and nrow_local that will
1926  // will sent and received by this processor
1927  for (unsigned p = 0; p < nproc; p++)
1928  {
1929  // start with data to be sent
1930  if ((target_first_row[i][p] < (current_first_row[my_rank] +
1931  current_nrow_local[my_rank])) &&
1932  (current_first_row[my_rank] < (target_first_row[i][p] +
1933  target_nrow_local[i][p])))
1934  {
1935  First_row_for_proc[i][p] =
1936  std::max(current_first_row[my_rank],
1937  target_first_row[i][p]);
1938  Nrow_local_for_proc[i][p] =
1939  std::min((current_first_row[my_rank] +
1940  current_nrow_local[my_rank]),
1941  (target_first_row[i][p] +
1942  target_nrow_local[i][p])) - First_row_for_proc[i][p];
1943  }
1944 
1945  // and data to be received
1946  if ((target_first_row[i][my_rank] < (current_first_row[p] +
1947  current_nrow_local[p]))
1948  && (current_first_row[p] < (target_first_row[i][my_rank] +
1949  target_nrow_local[i][my_rank])))
1950  {
1951  First_row_from_proc[i][p] =
1952  std::max(current_first_row[p],
1953  target_first_row[i][my_rank]);
1954  Nrow_local_from_proc[i][p] =
1955  std::min((current_first_row[p] +
1956  current_nrow_local[p]),
1957  (target_first_row[i][my_rank] +
1958  target_nrow_local[i][my_rank]))-
1959  First_row_from_proc[i][p];
1960  }
1961  }
1962 
1963  // resize nnz_send
1964  nnz_send[i].resize(nproc);
1965 
1966  // compute the number of nnzs to be sent
1967  // and the number of send and receive requests to be made (nreq)
1968  for (unsigned p = 0; p < nproc; p++)
1969  {
1970  if (Nrow_local_for_proc[i][p] != 0)
1971  {
1972  int* row_start = matrix_pt[i]->row_start();
1973  unsigned k = First_row_for_proc[i][p]-current_first_row[my_rank];
1974  nnz_send[i][p] = row_start[k + Nrow_local_for_proc[i][p]] -
1975  row_start[k];
1976  }
1977  }
1978 
1979  // resize nnz_recv
1980  nnz_recv[i].resize(nproc);
1981 
1982  // send nnz to be sent to each processor
1983  for (unsigned p = 0; p < nproc; p++)
1984  {
1985 
1986  // send and recv
1987 
1988  // dont mpi send to self
1989  if (p != my_rank)
1990  {
1991 
1992  // non block send
1993  if (Nrow_local_for_proc[i][p] != 0)
1994  {
1995 
1996  // send to other processors
1997  int tag = this->compute_tag(nproc,my_rank,p,0);
1998  MPI_Request tr;
1999  req.push_back(tr);
2000  MPI_Isend(&nnz_send[i][p],1,MPI_UNSIGNED,p,tag,
2001  Global_communicator_pt->mpi_comm(),&req[c]);
2002  c++;
2003  }
2004  }
2005  // receive from self
2006  else
2007  {
2008  if (Nrow_local_for_proc[i][p] != 0)
2009  {
2010  nnz_recv_temp[(i*nproc)+p] = nnz_send[i][p];
2011  }
2012  }
2013  }
2014  }
2015  }
2016 
2017  for (unsigned i = 0; i < Nprec; i++)
2018  {
2019  // resize nnz_recv
2020  nnz_recv[i].resize(nproc);
2021 
2022  // receive nnz from other processors
2023  for (unsigned pp = 0; pp < nproc; pp++)
2024  {
2025 
2026  // next processor to receive from
2027  unsigned p = (nproc + my_rank - pp)%nproc;
2028 
2029  // dont mpi send to send
2030  if (p != my_rank)
2031  {
2032 
2033  // blocking recv
2034  if (Nrow_local_from_proc[i][p] != 0)
2035  {
2036  int tag = this->compute_tag(nproc,p,my_rank,0);
2037  MPI_Status stat;
2038  unsigned nnz_temp;
2039  MPI_Recv(&nnz_temp,1,MPI_UNSIGNED,p,tag,
2040  Global_communicator_pt->mpi_comm(),&stat);
2041  nnz_recv[i][p] = nnz_temp;
2042  }
2043  }
2044 
2045  // receive from self
2046  else
2047  {
2048  nnz_recv[i][p] = nnz_send[i][p];
2049  }
2050  }
2051  }
2052 
2053  // get the number of nnzs to be received from each processor
2054 
2055  // total number of nnz to be reveived
2056  unsigned nnz_total = 0;
2057  for (unsigned p = 0; p < nproc; p++)
2058  {
2059  nnz_total += nnz_recv[Color][p];
2060  }
2061 
2062  // compute nnz block start
2063  Vector<unsigned> nnz_start_proc;
2064  Vector<unsigned> nnz_start_index;
2065  unsigned row_ptr = target_first_row[Color][my_rank];
2066  int p = 0;
2067  unsigned nnz_ptr = 0;
2068  for (p = 0; p < int(nproc); p++)
2069  {
2070  if (First_row_from_proc[Color][p] == row_ptr &&
2071  Nrow_local_from_proc[Color][p] != 0 &&
2072  nnz_ptr != nnz_total)
2073  {
2074  nnz_start_proc.push_back(p);
2075  nnz_start_index.push_back(nnz_ptr);
2076  nnz_ptr += nnz_recv[Color][p];
2077  row_ptr += Nrow_local_from_proc[Color][p];
2078  p = -1;
2079  }
2080  }
2081 
2082  // storage for derived datatypes
2083  Vector<MPI_Datatype> datatypes;
2084 
2085  // storage for received data
2086  double* values_recv = new double[nnz_total];
2087  int* column_index_recv = new int[nnz_total];
2088  int* row_start_recv = new int[target_nrow_local[Color][my_rank]+1];
2089 
2090  ///////////////////////////////////////////////////////////////////////////
2091  // RECV
2092  ///////////////////////////////////////////////////////////////////////////
2093  unsigned c_recv = 0;
2094  Vector<MPI_Request> recv_req;
2095 
2096  // receive the contents of the vector
2097  for (unsigned p = 0; p < nproc; p++)
2098  {
2099 
2100  // use mpi methods to send to and receive from all but my rank
2101  if (p != my_rank)
2102  {
2103 
2104  // just receive
2105  if (nnz_recv[Color][p] != 0)
2106  {
2107 
2108  // create 3 MPI contiguous datatypes
2109  // + values
2110  // + column_index
2111  // + row_start
2112 
2113  // values
2114  MPI_Datatype datatype_values;
2115  MPI_Type_contiguous(int(nnz_recv[Color][p]),MPI_DOUBLE,
2116  &datatype_values);
2117  MPI_Type_commit(&datatype_values);
2118  datatypes.push_back(datatype_values);
2119 
2120  // column index
2121  MPI_Datatype datatype_column_index;
2122  MPI_Type_contiguous(int(nnz_recv[Color][p]),MPI_INT,
2123  &datatype_column_index);
2124  MPI_Type_commit(&datatype_column_index);
2125  datatypes.push_back(datatype_column_index);
2126 
2127  // row start
2128  MPI_Datatype datatype_row_start;
2129  MPI_Type_contiguous(int(Nrow_local_from_proc[Color][p]),MPI_INT,
2130  &datatype_row_start);
2131  MPI_Type_commit(&datatype_row_start);
2132  datatypes.push_back(datatype_row_start);
2133 
2134  // assemble the typelist
2135  MPI_Datatype typelist[3];
2136  typelist[0] = datatype_values;
2137  typelist[1] = datatype_column_index;
2138  typelist[2] = datatype_row_start;
2139 
2140  // compute the offset for row_start
2141  int offset_n =
2142  First_row_from_proc[Color][p]-target_first_row[Color][my_rank];
2143 
2144  // compute the offset for the values and column_index
2145  unsigned k = 0;
2146  while (nnz_start_proc[k] != p)
2147  {
2148  k++;
2149  }
2150  int offset_nnz = nnz_start_index[k];
2151 
2152  // next compute the displacements
2153  MPI_Aint displacements[3];
2154  MPI_Get_address(values_recv + offset_nnz,&displacements[0]);
2155  MPI_Get_address(column_index_recv + offset_nnz,&displacements[1]);
2156  MPI_Get_address(row_start_recv + offset_n,&displacements[2]);
2157  for (int j = 2; j >= 0; j--)
2158  {
2159  displacements[j] -= displacements[0];
2160  }
2161 
2162  // set the block lengths
2163  int block_length[3];
2164  block_length[0] = block_length[1] = block_length[2] = 1;
2165 
2166  // now build the final datatype
2167  MPI_Datatype recv_type;
2168  MPI_Type_create_struct(3,block_length,displacements,typelist,
2169  &recv_type);
2170  MPI_Type_commit(&recv_type);
2171  datatypes.push_back(recv_type);
2172 
2173  // recv
2174  int tag = this->compute_tag(nproc,p,my_rank,1);
2175  MPI_Request tr1;
2176  recv_req.push_back(tr1);
2177  MPI_Irecv(values_recv + offset_nnz,1,recv_type,
2178  p,tag,Global_communicator_pt->mpi_comm(),
2179  &recv_req[c_recv]);
2180  c_recv++;
2181  }
2182  }
2183  }
2184 
2185  ///////////////////////////////////////////////////////////////////////////
2186  // SEND
2187  ///////////////////////////////////////////////////////////////////////////
2188  unsigned c_send = 0;
2189  Vector<MPI_Request> send_req;
2190 
2191  // for every matrix we assemble the duplicate of the matrix on fewer
2192  // processors and setup the preconditioner
2193  for (unsigned i = 0; i < Nprec; i++)
2194  {
2195 
2196  // get pointers to the underlying data in the current matrix
2197  double* values_send = matrix_pt[i]->value();
2198  int* row_start_send = matrix_pt[i]->row_start();
2199  int* column_index_send = matrix_pt[i]->column_index();
2200 
2201  // send and receive the contents of the vector
2202  for (unsigned p = 0; p < nproc; p++)
2203  {
2204 
2205  // use mpi methods to send to and receive from all but my rank
2206  if (p != my_rank)
2207  {
2208 
2209  // send
2210  if (nnz_send[i][p] != 0)
2211  {
2212 
2213  // create 3 MPI contiguous datatypes
2214  // + values
2215  // + column_index
2216  // + row_start
2217 
2218  // values
2219  MPI_Datatype datatype_values;
2220  MPI_Type_contiguous(int(nnz_send[i][p]),MPI_DOUBLE,
2221  &datatype_values);
2222  MPI_Type_commit(&datatype_values);
2223  datatypes.push_back(datatype_values);
2224 
2225  // column index
2226  MPI_Datatype datatype_column_index;
2227  MPI_Type_contiguous(int(nnz_send[i][p]),MPI_INT,
2228  &datatype_column_index);
2229  MPI_Type_commit(&datatype_column_index);
2230  datatypes.push_back(datatype_column_index);
2231 
2232  // row start
2233  MPI_Datatype datatype_row_start;
2234  MPI_Type_contiguous(int(Nrow_local_for_proc[i][p]),MPI_INT,
2235  &datatype_row_start);
2236  MPI_Type_commit(&datatype_row_start);
2237  datatypes.push_back(datatype_row_start);
2238 
2239  // assemble the typelist
2240  MPI_Datatype typelist[3];
2241  typelist[0] = datatype_values;
2242  typelist[1] = datatype_column_index;
2243  typelist[2] = datatype_row_start;
2244 
2245  // compute the offset for row_start
2246  int offset_n =
2247  First_row_for_proc[i][p]-matrix_pt[i]->first_row(my_rank);
2248 
2249  // compute the offset for the values and column_index
2250  int offset_nnz = row_start_send[offset_n];
2251 
2252  // next compute the displacements
2253  MPI_Aint displacements[3];
2254  MPI_Get_address(values_send + offset_nnz,&displacements[0]);
2255  MPI_Get_address(column_index_send + offset_nnz,&displacements[1]);
2256  MPI_Get_address(row_start_send + offset_n,&displacements[2]);
2257  for (int j = 2; j >= 0; j--)
2258  {
2259  displacements[j] -= displacements[0];
2260  }
2261 
2262  // set the block lengths
2263  int block_length[3];
2264  block_length[0] = block_length[1] = block_length[2] = 1;
2265 
2266  // now build the final datatype
2267  MPI_Datatype send_type;
2268  MPI_Type_create_struct(3,block_length,displacements,typelist,
2269  &send_type);
2270  MPI_Type_commit(&send_type);
2271  datatypes.push_back(send_type);
2272 
2273  // send
2274  int tag = this->compute_tag(nproc,my_rank,p,1);
2275  MPI_Request tr1;
2276  send_req.push_back(tr1);
2277  MPI_Isend(values_send + offset_nnz,1,send_type,
2278  p,tag,Global_communicator_pt->mpi_comm(),
2279  &send_req[c_send]);
2280  c_send++;
2281  }
2282  }
2283  }
2284  }
2285 
2286  // otherwise send to self (copy)
2287  if (nnz_recv[Color][my_rank] != 0)
2288  {
2289 
2290  // get pointers to the underlying data in the current matrix
2291  double* values_send = matrix_pt[Color]->value();
2292  int* row_start_send = matrix_pt[Color]->row_start();
2293  int* column_index_send = matrix_pt[Color]->column_index();
2294 
2295  // offset for row_start send to self
2296  unsigned offset_n_send =
2297  First_row_for_proc[Color][my_rank]-matrix_pt[Color]->first_row(my_rank);
2298 
2299  // offset for values and column+_index send to self
2300  unsigned offset_nnz_send = row_start_send[offset_n_send];
2301 
2302  // offset for row_start receive from self
2303  unsigned offset_n_recv =
2304  First_row_from_proc[Color][my_rank]-target_first_row[Color][my_rank];
2305 
2306  // offset for values and columm_index receive from self
2307  unsigned k = 0;
2308  while (nnz_start_proc[k] != my_rank)
2309  {
2310  k++;
2311  }
2312  unsigned offset_nnz_recv = nnz_start_index[k];
2313 
2314  // and send
2315 
2316  // values and column_index
2317  unsigned n_nnz = nnz_send[Color][my_rank];
2318  for (unsigned j = 0; j < n_nnz; j++)
2319  {
2320  values_recv[offset_nnz_recv + j] =
2321  values_send[offset_nnz_send + j];
2322  column_index_recv[offset_nnz_recv + j] =
2323  column_index_send[offset_nnz_send + j];
2324  }
2325 
2326  // row start
2327  unsigned n_n = Nrow_local_from_proc[Color][my_rank];
2328  for (unsigned j = 0; j < n_n; j++)
2329  {
2330  row_start_recv[offset_n_recv + j] =
2331  row_start_send[offset_n_send + j];
2332  }
2333  }
2334 
2335  // create the local distribution for this matrix
2336  LinearAlgebraDistribution* temp_dist_pt =
2337  new LinearAlgebraDistribution
2338  (Local_communicator_pt,target_first_row[Color][my_rank],
2339  target_nrow_local[Color][my_rank]);
2340 
2341  // create the corresponding matrix
2342  local_matrix_pt = new CRDoubleMatrix(temp_dist_pt);
2343  delete temp_dist_pt; // (dist has now been copied)
2344 
2345  ///////////////////////////////////////////////////////////////////////////
2346  // and WAIT...
2347  ///////////////////////////////////////////////////////////////////////////
2348  if (c_recv!=0)
2349  {
2350  Vector<MPI_Status> recv_stat(c_recv);
2351  MPI_Waitall(c_recv,&recv_req[0],&recv_stat[0]);
2352  recv_req.clear();
2353  recv_stat.clear();
2354  }
2355 
2356  // build the matrix
2357 
2358  // update the row start
2359  unsigned nproc_contrib = nnz_start_index.size();
2360  for (unsigned j = 0; j < nproc_contrib; j++)
2361  {
2362  unsigned first = First_row_from_proc[Color][nnz_start_proc[j]] -
2363  target_first_row[Color][my_rank];
2364  unsigned last = first + Nrow_local_from_proc[Color][nnz_start_proc[j]];
2365  unsigned nnz_inc = nnz_start_index[j]-row_start_recv[first];
2366  for (unsigned k = first; k < last; k++)
2367  {
2368  row_start_recv[k]+=nnz_inc;
2369  }
2370  }
2371  row_start_recv[target_nrow_local[Color][my_rank]] = int(nnz_total);
2372 
2373  // build the matrix without a copy of the data
2374  local_matrix_pt->build_without_copy(matrix_pt[Color]->ncol(),
2375  nnz_total,
2376  values_recv,
2377  column_index_recv,
2378  row_start_recv);
2379 
2380  // and finally wait for the sends
2381  if (c_recv!=0)
2382  {
2383  Vector<MPI_Status> send_stat(c_recv);
2384  MPI_Waitall(c_send,&send_req[0],&send_stat[0]);
2385  send_req.clear();
2386  send_stat.clear();
2387  }
2388 
2389  // and clear the datatype
2390  unsigned ndatatypes = datatypes.size();
2391  for (unsigned i = 0; i < ndatatypes; i++)
2392  {
2393  MPI_Type_free(&datatypes[i]);
2394  }
2395  }
2396 
2397  // now setup the preconditioner
2398  Preconditioner_pt = prec_pt[Color];
2399  Preconditioner_pt->setup(local_matrix_pt);
2400 
2401  // clean up memory
2402  if (matrix_pt[0]->distributed())
2403  {
2404  delete local_matrix_pt;
2405  }
2406 
2407  // delete the preconditioners not used on this processor
2408  for (unsigned i = 0; i < Nprec; i++)
2409  {
2410  if (i != Color)
2411  {
2412  delete prec_pt[i];
2413  }
2414  }
2415  } //end of setup_preconditioners()
2416 
2417 //============================================================================
2418 /// \short Applies each preconditioner to the corresponding vector in
2419 /// r and z
2420 //=============================================================================
2421  void PreconditionerArray::solve_preconditioners(const Vector<DoubleVector> &r,
2422  Vector<DoubleVector> &z)
2423  {
2424 #ifdef PARANOID
2425  // check that a preconditioner has been setup
2426  if (Preconditioner_pt == 0)
2427  {
2428  std::ostringstream error_message;
2429  error_message << "The preconditioners have not been setup.";
2430  throw OomphLibError(error_message.str(),
2431  OOMPH_CURRENT_FUNCTION,
2432  OOMPH_EXCEPTION_LOCATION);
2433  }
2434 
2435  // check that r is the correct length
2436  if (r.size() != Nprec)
2437  {
2438  std::ostringstream error_message;
2439  error_message << "This PreconditionerArray has " << Nprec
2440  << " preconditioners but r only contains "
2441  << r.size() << " preconditioners.";
2442  throw OomphLibError(error_message.str(),
2443  OOMPH_CURRENT_FUNCTION,
2444  OOMPH_EXCEPTION_LOCATION);
2445  }
2446 
2447  // check that z is the correct length
2448  if (z.size() != Nprec)
2449  {
2450  std::ostringstream error_message;
2451  error_message << "This PreconditionerArray has " << Nprec
2452  << " preconditioners but z only contains "
2453  << z.size() << " preconditioners.";
2454  throw OomphLibError(error_message.str(),
2455  OOMPH_CURRENT_FUNCTION,
2456  OOMPH_EXCEPTION_LOCATION);
2457  }
2458  // check that the vector has the same distribution as the
2459  // preconditioner
2460  for (unsigned i = 0; i < Nprec; i++)
2461  {
2462  if (*r[i].distribution_pt() != *Distribution_pt[i])
2463  {
2464  std::ostringstream error_message;
2465  error_message << "The distribution of r[" << i << "] does not have the"
2466  << " the same distribution as the matrix_pt[" << i
2467  << "] that was passed to setup_preconditioners(...)";
2468  throw OomphLibError(error_message.str(),
2469  OOMPH_CURRENT_FUNCTION,
2470  OOMPH_EXCEPTION_LOCATION);
2471  }
2472  }
2473 #endif
2474 
2475  // the local r vector
2476  DoubleVector local_r(Preconditioner_pt->distribution_pt(),0.0);
2477 
2478  // number of processors
2479  unsigned nproc = Global_communicator_pt->nproc();
2480 
2481  // cache my global rank
2482  unsigned my_rank = Global_communicator_pt->my_rank();
2483 
2484  // send and receive requests
2485  Vector<MPI_Request> send_reqs;
2486  Vector<MPI_Request> recv_reqs;
2487 
2488  // cache first_row
2489  unsigned first_row = Preconditioner_pt->first_row();
2490 
2491  // local residual values for this processor
2492  double* local_r_values = local_r.values_pt();
2493 
2494  // for every vector we assemble the duplicate of the vector on the
2495  // appropirate subset of processors
2496 
2497  // first we post the non-blocking sends and recvs
2498  for (unsigned i = 0; i < Nprec; i++)
2499  {
2500 
2501  if (r[i].distributed())
2502  {
2503 
2504  // current first_row and nrow_local
2505  unsigned current_first_row = r[i].first_row();
2506 
2507  // send and receive the contents of the vector
2508  for (unsigned p = 0; p < nproc; p++)
2509  {
2510 
2511  // use mpi methods to send to and receive from all but my rank
2512  if (p != my_rank)
2513  {
2514 
2515  // send
2516  if (Nrow_local_for_proc[i][p] != 0)
2517  {
2518 
2519  // compute the offset for the values
2520  int offset_n =
2521  First_row_for_proc[i][p]-current_first_row;
2522 
2523  // send the values
2524  int tag = this->compute_tag(nproc,my_rank,p,0);
2525  MPI_Request tr;
2526  MPI_Isend(const_cast<double*>(r[i].values_pt())+offset_n,
2527  int(Nrow_local_for_proc[i][p]),MPI_DOUBLE,p,tag,
2528  Global_communicator_pt->mpi_comm(),&tr);
2529  send_reqs.push_back(tr);
2530  }
2531 
2532  // recv
2533  if (Nrow_local_from_proc[i][p] != 0)
2534  {
2535 
2536  // compute the offset for row_start
2537  int offset_n =
2538  First_row_from_proc[i][p]-first_row;
2539 
2540  // row_start
2541  int tag = this->compute_tag(nproc,p,my_rank,0);
2542  MPI_Request tr;
2543  MPI_Irecv(local_r_values + offset_n,
2544  int(Nrow_local_from_proc[i][p]),MPI_DOUBLE,p,tag,
2545  Global_communicator_pt->mpi_comm(),&tr);
2546  recv_reqs.push_back(tr);
2547  }
2548  }
2549  }
2550  }
2551  }
2552 
2553 
2554  // and now we send to self
2555  if (!r[Color].distributed())
2556  {
2557  // just copy to the new vector
2558  const double* r_pt = r[Color].values_pt();
2559  unsigned nrow_local = local_r.nrow_local();
2560  for (unsigned i = 0; i < nrow_local; i++)
2561  {
2562  local_r_values[i] = r_pt[i];
2563  }
2564  }
2565  else
2566  {
2567  // the incoming residual associated with the processor
2568  const double* r_pt = r[Color].values_pt();
2569 
2570  // current first_row and nrow_local
2571  unsigned current_first_row = r[Color].first_row();
2572 
2573  // cache first_row
2574  unsigned first_row = Preconditioner_pt->first_row();
2575 
2576  //
2577  if (Nrow_local_from_proc[Color][my_rank] != 0)
2578  {
2579  // offset for values send to self
2580  unsigned offset_n_send =
2581  First_row_for_proc[Color][my_rank]-current_first_row;
2582 
2583  // offset for values receive from self
2584  unsigned offset_n_recv =
2585  First_row_from_proc[Color][my_rank]-first_row;
2586 
2587  // send/receive
2588  unsigned n_n = Nrow_local_from_proc[Color][my_rank];
2589  for (unsigned j = 0; j < n_n; j++)
2590  {
2591  local_r_values[offset_n_recv + j] = r_pt[offset_n_send + j];
2592  }
2593  }
2594  }
2595 
2596  // wait for the receives to complete
2597  unsigned n_recv = recv_reqs.size();
2598  if (n_recv)
2599  {
2600  MPI_Waitall(n_recv,&recv_reqs[0],MPI_STATUS_IGNORE);
2601  }
2602  recv_reqs.clear();
2603 
2604  // next solve
2605  // apply the local preconditioner
2606  DoubleVector local_z;
2607  Preconditioner_pt->preconditioner_solve(local_r,local_z);
2608  local_r.clear();
2609 
2610  // the local z values
2611  double* local_z_values = local_z.values_pt();
2612 
2613  // setup the vectors
2614  for (unsigned i = 0; i < Nprec; i++)
2615  {
2616 
2617  // if z[i] is not setup then set it up
2618  if (!z[i].built())
2619  {
2620  z[i].build(r[i].distribution_pt(),0.0);
2621  }
2622  }
2623 
2624  // first we post the non-blocking sends and recvs
2625  for (unsigned i = 0; i < Nprec; i++)
2626  {
2627  if (r[i].distributed())
2628  {
2629 
2630  // current first_row and nrow_local
2631  unsigned current_first_row = r[i].first_row();
2632 
2633  // send and receive the contents of the vector
2634  for (unsigned p = 0; p < nproc; p++)
2635  {
2636 
2637  // use mpi methods to send to and receive from all but my rank
2638  if (p != my_rank)
2639  {
2640 
2641  // send
2642  if (Nrow_local_for_proc[i][p] != 0)
2643  {
2644 
2645  // compute the offset for the values
2646  int offset_n =
2647  First_row_for_proc[i][p]-current_first_row;
2648 
2649  // send the values
2650  int tag = this->compute_tag(nproc,my_rank,p,0);
2651  MPI_Request tr;
2652  MPI_Irecv(z[i].values_pt() + offset_n,
2653  int(Nrow_local_for_proc[i][p]),MPI_DOUBLE,p,tag,
2654  Global_communicator_pt->mpi_comm(),&tr);
2655  recv_reqs.push_back(tr);
2656  }
2657 
2658  // recv
2659  if (Nrow_local_from_proc[i][p] != 0)
2660  {
2661 
2662  // compute the offset for row_start
2663  int offset_n =
2664  First_row_from_proc[i][p]-first_row;
2665 
2666  // vector
2667  int tag = this->compute_tag(nproc,p,my_rank,0);
2668  MPI_Request tr;
2669  MPI_Isend(local_z_values + offset_n,
2670  int(Nrow_local_from_proc[i][p]),MPI_DOUBLE,p,tag,
2671  Global_communicator_pt->mpi_comm(),&tr);
2672  send_reqs.push_back(tr);
2673  }
2674  }
2675  }
2676  }
2677  // otherwise we need to share the results
2678  else
2679  {
2680  // number of processors associated with this preconditioner
2681  unsigned nproc_local = Local_communicator_pt->nproc();
2682 
2683  // my "proc number" for this preconditioner
2684  unsigned my_local_rank = Local_communicator_pt->my_rank();
2685 
2686  // sends to self completed later
2687  if (i != Color)
2688  {
2689  // post send requests
2690  for (unsigned j = my_local_rank; j < Nproc_for_prec[i];
2691  j += nproc_local)
2692  {
2693  int p = j + First_proc_for_prec[i];
2694  MPI_Request tr;
2695  MPI_Isend(local_z_values,z[Color].nrow(),MPI_DOUBLE,p,0,
2696  Global_communicator_pt->mpi_comm(),&tr);
2697  send_reqs.push_back(tr);
2698  }
2699 
2700  // compute the processor number to recv from
2701  int p = my_local_rank;
2702  while ((p - int(Nproc_for_prec[i])) >= 0)
2703  {
2704  p-= Nproc_for_prec[i];
2705  }
2706  p += First_proc_for_prec[i];
2707 
2708  // and recv
2709  MPI_Request tr;
2710  MPI_Irecv(z[i].values_pt(),z[i].nrow(),MPI_DOUBLE,p,0,
2711  Global_communicator_pt->mpi_comm(),&tr);
2712  recv_reqs.push_back(tr);
2713  }
2714  }
2715  }
2716 
2717  // and now we send to self
2718  if (!r[Color].distributed())
2719  {
2720  // just copy to the new vector
2721  double* z_pt = z[Color].values_pt();
2722  unsigned nrow_local = local_z.nrow_local();
2723  for (unsigned i = 0; i < nrow_local; i++)
2724  {
2725  z_pt[i] = local_z_values[i];
2726  }
2727  }
2728  else
2729  {
2730  //
2731  double* z_pt = z[Color].values_pt();
2732 
2733  // current first_row and nrow_local
2734  unsigned current_first_row = r[Color].first_row();
2735 
2736  // cache first_row
2737  unsigned first_row = Preconditioner_pt->first_row();
2738 
2739  //
2740  if (Nrow_local_from_proc[Color][my_rank] != 0)
2741  {
2742  // offset for values send to self
2743  unsigned offset_n_send =
2744  First_row_for_proc[Color][my_rank]-current_first_row;
2745 
2746  // offset for values receive from self
2747  unsigned offset_n_recv =
2748  First_row_from_proc[Color][my_rank]-first_row;
2749 
2750  // send/receive
2751  unsigned n_n = Nrow_local_from_proc[Color][my_rank];
2752  for (unsigned j = 0; j < n_n; j++)
2753  {
2754  z_pt[offset_n_send + j] =
2755  local_z_values[offset_n_recv + j];
2756  }
2757  }
2758  }
2759 
2760 
2761  // wait for the receives to complete
2762  n_recv = recv_reqs.size();
2763  if (n_recv)
2764  {
2765  MPI_Waitall(n_recv,&recv_reqs[0],MPI_STATUS_IGNORE);
2766  }
2767  recv_reqs.clear();
2768 
2769  // wait for the sends to complete
2770  unsigned n_send = send_reqs.size();
2771  if (n_send)
2772  {
2773  MPI_Waitall(n_send,&send_reqs[0],MPI_STATUS_IGNORE);
2774  }
2775  send_reqs.clear();
2776  }
2777 }
2778 
2779 // End of "if we have mpi"
2780 #endif
unsigned Method
the communication method in the setup_preconditioners(...) method
virtual void preconditioner_solve(const DoubleVector &r, DoubleVector &z)=0
Apply the preconditioner. Pure virtual generic interface function. This method should apply the preco...
Vector< unsigned > Nproc_for_prec
The nrow_local component of the distribution of the processors over the preconditioners.
cstr elem_len * i
Definition: cfortran.h:607
unsigned Color
the Color of this processor (or the preconditioner number)
int compute_tag(const int &nproc, const int &source, const int &dest, const int &type)
helper method for computing the MPI_Isend and MPI_Irecv tags
Preconditioner * Preconditioner_pt
The pointer to the local preconditioner on this processor.
OomphInfo oomph_info
void setup_preconditioners(Vector< CRDoubleMatrix *> matrix_pt, Vector< Preconditioner *> prec_pt, const OomphCommunicator *comm_pt)
Vector< Vector< unsigned > > First_row_for_proc
Storage (indexed [i][j]) for the first row that will be sent from this processor to processor j for p...
OomphCommunicator * Local_communicator_pt
Vector of communicators for the preconditioners.
Vector< LinearAlgebraDistribution * > Distribution_pt
Vector< unsigned > First_proc_for_prec
The first_row component of the distribution of the processors over the preconditioners.
OomphCommunicator * Global_communicator_pt
pointer to the global communicator for this preconditioner array
unsigned first_row() const
access function for the first row on this processor
void clean_up_memory()
Clean up memory.
void setup(DoubleMatrixBase *matrix_pt)
Setup the preconditioner: store the matrix pointer and the communicator pointer then call preconditio...
Vector< Vector< unsigned > > Nrow_local_for_proc
Storage (indexed [i][j]) for the nrow_local that will be sent from this processor to processor j for ...
Vector< Vector< unsigned > > First_row_from_proc
Storage (indexed [i][j]) for the first row that will be received by this processor from processor j f...
Vector< Vector< unsigned > > Nrow_local_from_proc
Storage (indexed [i][j]) for the nrow_local that will be received by this processor from processor j ...
void solve_preconditioners(const Vector< DoubleVector > &r, Vector< DoubleVector > &z)
Applies each preconditioner to the corresponding vector in r and z.
unsigned Nprec
the number of preconditioner in the array
LinearAlgebraDistribution * distribution_pt() const
access to the LinearAlgebraDistribution