partitioning.cc
Go to the documentation of this file.
1 //LIC// ====================================================================
2 //LIC// This file forms part of oomph-lib, the object-oriented,
3 //LIC// multi-physics finite-element library, available
4 //LIC// at http://www.oomph-lib.org.
5 //LIC//
6 //LIC// Version 1.0; svn revision $LastChangedRevision$
7 //LIC//
8 //LIC// $LastChangedDate$
9 //LIC//
10 //LIC// Copyright (C) 2006-2016 Matthias Heil and Andrew Hazel
11 //LIC//
12 //LIC// This library is free software; you can redistribute it and/or
13 //LIC// modify it under the terms of the GNU Lesser General Public
14 //LIC// License as published by the Free Software Foundation; either
15 //LIC// version 2.1 of the License, or (at your option) any later version.
16 //LIC//
17 //LIC// This library is distributed in the hope that it will be useful,
18 //LIC// but WITHOUT ANY WARRANTY; without even the implied warranty of
19 //LIC// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 //LIC// Lesser General Public License for more details.
21 //LIC//
22 //LIC// You should have received a copy of the GNU Lesser General Public
23 //LIC// License along with this library; if not, write to the Free Software
24 //LIC// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25 //LIC// 02110-1301 USA.
26 //LIC//
27 //LIC// The authors may be contacted at oomph-lib@maths.man.ac.uk.
28 //LIC//
29 //LIC//====================================================================
30 #include<float.h>
31 
32 #include "partitioning.h"
33 #include "mesh.h"
34 #include "refineable_mesh.h"
35 //Include to fill in additional_setup_shared_node_scheme() function
37 
38 
39 namespace oomph
40 {
41 
42 //====================================================================
43 /// Namespace for METIS graph partitioning routines
44 //====================================================================
45 namespace METIS
46 {
47 
48 
49 
50  /// \short Default function that translates spatial
51  /// error into weight for METIS partitioning (unit weight regardless
52  /// of input).
53  void default_error_to_weight_fct(const double& spatial_error,
54  const double& max_error,
55  const double& min_error,
56  int& weight)
57  {
58  weight=1;
59  }
60 
61  /// \short Function pointer to to function that translates spatial
62  /// error into weight for METIS partitioning.
64 
65 }
66 
67 
68 
69 
70 //==================================================================
71 /// Partition mesh uniformly by dividing elements
72 /// equally over the partitions, in the order
73 /// in which they are returned by problem.
74 /// On return, element_domain[ielem] contains the number
75 /// of the domain [0,1,...,ndomain-1] to which
76 /// element ielem has been assigned.
77 //==================================================================
79  const unsigned& ndomain,
80  Vector<unsigned>& element_domain)
81 {
82 
83  // Number of elements
84  unsigned nelem=problem_pt->mesh_pt()->nelement();
85 
86 #ifdef PARANOID
87  if (nelem!=element_domain.size())
88  {
89  std::ostringstream error_stream;
90  error_stream << "element_domain Vector has wrong length "
91  << nelem << " " << element_domain.size() << std::endl;
92 
93  throw OomphLibError(error_stream.str(),
94  OOMPH_CURRENT_FUNCTION,
95  OOMPH_EXCEPTION_LOCATION);
96  }
97 #endif
98 
99 
100 
101  // Uniform partitioning
102  unsigned nel_per_domain=int(float(nelem)/float(ndomain));
103  for (unsigned ielem=0;ielem<nelem;ielem++)
104  {
105  unsigned idomain=unsigned(float(ielem)/float(nel_per_domain));
106  element_domain[ielem]=idomain;
107  }
108 
109 }
110 
111 
112 //==================================================================
113 /// Use METIS to assign each element to a domain.
114 /// On return, element_domain[ielem] contains the number
115 /// of the domain [0,1,...,ndomain-1] to which
116 /// element ielem has been assigned.
117 /// - objective=0: minimise edgecut.
118 /// - objective=1: minimise total communications volume.
119 /// .
120 /// Partioning is based on dual graph of mesh.
121 //==================================================================
122 void METIS::partition_mesh(Problem* problem_pt, const unsigned& ndomain,
123  const unsigned& objective,
124  Vector<unsigned>& element_domain)
125 {
126  // Global mesh
127  Mesh* mesh_pt=problem_pt->mesh_pt();
128 
129  // Number of elements
130  unsigned nelem=mesh_pt->nelement();
131 
132 #ifdef PARANOID
133  if (nelem!=element_domain.size())
134  {
135  std::ostringstream error_stream;
136  error_stream << "element_domain Vector has wrong length "
137  << nelem << " " << element_domain.size() << std::endl;
138 
139  throw OomphLibError(error_stream.str(),
140  OOMPH_CURRENT_FUNCTION,
141  OOMPH_EXCEPTION_LOCATION);
142  }
143 #endif
144 
145  // Setup dual graph
146  //------------------
147 
148  // Start timer
149  clock_t cpu_start=clock();
150 
151  // Container to collect all elements associated with given global eqn number
152  std::map<unsigned,std::set<unsigned> > elements_connected_with_global_eqn;
153 
154  // Container for all unique global eqn numbers
155  std::set<unsigned> all_global_eqns;
156 
157  // Loop over all elements
158  for (unsigned e=0;e<nelem;e++)
159  {
160  GeneralisedElement* el_pt=mesh_pt->element_pt(e);
161 
162  // Add all global eqn numbers
163  unsigned ndof=el_pt->ndof();
164  for (unsigned j=0;j<ndof;j++)
165  {
166  // Get global eqn number
167  unsigned eqn_number=el_pt->eqn_number(j);
168  elements_connected_with_global_eqn[eqn_number].insert(e);
169  all_global_eqns.insert(eqn_number);
170  }
171  }
172 
173  // Now reverse the lookup scheme to find out all elements
174  // that are connected because they share the same global eqn
175  Vector<std::set<unsigned> > connected_elements(nelem);
176 
177  // Counter for total number of entries in connected_elements structure
178  unsigned count=0;
179 
180  // Loop over all global eqns
181  for (std::set<unsigned>::iterator it=all_global_eqns.begin();
182  it!=all_global_eqns.end();it++)
183  {
184  // Get set of elements connected with this data item
185  std::set<unsigned> elements=elements_connected_with_global_eqn[*it];
186 
187  // Double loop over connnected elements: Everybody's connected to
188  // everybody
189  for (std::set<unsigned>::iterator it1=elements.begin();
190  it1!=elements.end();it1++)
191  {
192  for (std::set<unsigned>::iterator it2=elements.begin();
193  it2!=elements.end();it2++)
194  {
195  if ((*it1)!=(*it2))
196  {
197  connected_elements[(*it1)].insert(*it2);
198  }
199  }
200  }
201  }
202 
203 
204  // Now convert into C-style packed array for interface with METIS
205  int* xadj = new int[nelem+1];
206  Vector<int> adjacency_vector;
207 
208  // Reserve (too much) space
209  adjacency_vector.reserve(count);
210 
211  // Initialise counters
212  unsigned ientry=0;
213 
214  // Loop over all elements
215  for (unsigned e=0;e<nelem;e++)
216  {
217  // First entry for current element
218  xadj[e]=ientry;
219 
220  // Loop over elements that are connected to current element
221  typedef std::set<unsigned>::iterator IT;
222  for (IT it=connected_elements[e].begin();
223  it!=connected_elements[e].end();it++)
224  {
225  // Copy into adjacency array
226  adjacency_vector.push_back(*it);
227 
228  // We've just made another entry
229  ientry++;
230  }
231 
232  // Entry after last entry for current element:
233  xadj[e+1]=ientry;
234 
235  }
236 
237  // End timer
238  clock_t cpu_end=clock();
239 
240  // Doc
241  double cpu0=double(cpu_end-cpu_start)/CLOCKS_PER_SEC;
242  oomph_info
243  << "CPU time for setup of METIS data structures [nelem="
244  << nelem <<"]: "
245  << cpu0
246  << " sec" << std::endl;
247 
248 
249  // Call METIS graph partitioner
250  //-----------------------------
251 
252  // Start timer
253  cpu_start=clock();
254 
255  // Number of vertices in graph
256  int nvertex=nelem;
257 
258  // No vertex weights
259  int* vwgt=0;
260 
261  // No edge weights
262  int* adjwgt=0;
263 
264  // Flag indicating that graph isn't weighted: 0; vertex weights only: 2
265  // Note that wgtflag==2 requires nodal weights to be stored in vwgt.
266  int wgtflag=0;
267 
268  // Use C-style numbering (first array entry is zero)
269  int numflag=0;
270 
271  // Number of desired partitions
272  int nparts=ndomain;
273 
274  // Use default options
275  int* options= new int[10];
276  options[0]=0;
277 
278  // Number of cut edges in graph
279  int* edgecut = new int[nelem];
280 
281  // Array containing the partition information
282  int* part = new int[nelem];
283 
284  // Can we get an error estimate?
285 
286  unsigned n_mesh=problem_pt->nsub_mesh();
287 
288  if (n_mesh==0)
289  {
290 
291  RefineableMeshBase* mmesh_pt=dynamic_cast<RefineableMeshBase*>(mesh_pt);
292  if (mmesh_pt!=0)
293  {
294 
295  // Bias distribution?
297  {
298  oomph_info <<
299  "Biasing element distribution via spatial error estimate\n";
300 
301  // Adjust flag and provide storage for weights
302  wgtflag=2;
303  vwgt=new int[nelem];
304 
305  // Get error for all elements
306  Vector<double> elemental_error(nelem);
307  mmesh_pt->spatial_error_estimator_pt()->
308  get_element_errors(mesh_pt,elemental_error);
309 
310  double max_error=*(std::max_element(elemental_error.begin(),
311  elemental_error.end()));
312  double min_error=*(std::min_element(elemental_error.begin(),
313  elemental_error.end()));
314 
315  // Bias weights
316  int weight=1;
317  for (unsigned e=0;e<nelem;e++)
318  {
319  // Translate error into weight
320  Error_to_weight_fct_pt(elemental_error[e],max_error,min_error,
321  weight);
322  vwgt[e]=weight;
323  }
324  }
325  }
326  }
327  else // There are submeshes
328  {
329  // Are any of the submeshes refineable?
330  bool refineable_submesh_exists=false;
331  // Vector to store "start and end point" for loops in submeshes
332  Vector<unsigned> loop_helper(n_mesh+1);
333  loop_helper[0]=0;
334 
335  // Loop over submeshes
336  for (unsigned i_mesh=0;i_mesh<n_mesh;i_mesh++)
337  {
338  // Store the end of the loop
339  loop_helper[i_mesh+1]=problem_pt->mesh_pt(i_mesh)->nelement()+
340  loop_helper[i_mesh];
341 
342  RefineableMeshBase* mmesh_pt=
343  dynamic_cast<RefineableMeshBase*>(problem_pt->mesh_pt(i_mesh));
344  if (mmesh_pt!=0)
345  {
346  refineable_submesh_exists=true;
347  }
348  }
349 
350  // If a refineable submesh exists
351  if (refineable_submesh_exists)
352  {
353  // Bias distribution?
355  {
356  oomph_info <<
357  "Biasing element distribution via spatial error estimate\n";
358 
359  // Adjust flag and provide storage for weights
360  wgtflag=2;
361  vwgt=new int[nelem];
362 
363  // Loop over submeshes
364  for (unsigned i_mesh=0;i_mesh<n_mesh;i_mesh++)
365  {
366  RefineableMeshBase* mmesh_pt=
367  dynamic_cast<RefineableMeshBase*>(problem_pt->mesh_pt(i_mesh));
368  if (mmesh_pt!=0)
369  {
370  // Get error for all elements
371  unsigned nsub_elem=loop_helper[i_mesh+1]-loop_helper[i_mesh];
372  Vector<double> elemental_error(nsub_elem);
373  mmesh_pt->spatial_error_estimator_pt()->
374  get_element_errors(problem_pt->mesh_pt(i_mesh),
375  elemental_error);
376 
377  double max_error=*(std::max_element(elemental_error.begin(),
378  elemental_error.end()));
379  double min_error=*(std::min_element(elemental_error.begin(),
380  elemental_error.end()));
381 
382  // Bias weights
383  int weight=1;
384  unsigned start=loop_helper[i_mesh];
385  unsigned end=loop_helper[i_mesh+1];
386  for (unsigned e=start;e<end;e++)
387  {
388  unsigned error_index=e-start;
389  // Translate error into weight
390  Error_to_weight_fct_pt(elemental_error[error_index],max_error,
391  min_error,weight);
392  vwgt[e]=weight;
393  }
394  }
395  else // This mesh is not refineable
396  {
397  // There's no error estimator, so use the default weight
398  int weight=1;
399  unsigned start=loop_helper[i_mesh];
400  unsigned end=loop_helper[i_mesh+1];
401  for (unsigned e=start;e<end;e++)
402  {
403  vwgt[e]=weight;
404  }
405  }
406  }
407  }
408  }
409  }
410 
411  // Call partitioner
412  if (objective==0)
413  {
414  // Partition with the objective of minimising the edge cut
415  METIS_PartGraphKway(&nvertex, xadj, &adjacency_vector[0], vwgt, adjwgt,
416  &wgtflag, &numflag, &nparts, options, edgecut, part);
417  }
418  else if (objective==1)
419  {
420  // Partition with the objective of minimising the total communication
421  // volume
422  METIS_PartGraphVKway(&nvertex, xadj, &adjacency_vector[0], vwgt, adjwgt,
423  &wgtflag, &numflag, &nparts, options, edgecut, part);
424  }
425  else
426  {
427  std::ostringstream error_stream;
428  error_stream
429  << "Wrong objective for METIS. objective = " << objective << std::endl;
430 
431  throw OomphLibError(error_stream.str(),
432  OOMPH_CURRENT_FUNCTION,
433  OOMPH_EXCEPTION_LOCATION);
434  }
435 
436 
437 #ifdef PARANOID
438  std::vector<bool> done(nparts,false);
439 #endif
440 
441  // Copy across
442  for (unsigned e=0;e<nelem;e++)
443  {
444  element_domain[e]=part[e];
445 #ifdef PARANOID
446  done[part[e]]=true;
447 #endif
448  }
449 
450 
451 #ifdef PARANOID
452  // Check
453  std::ostringstream error_stream;
454  bool shout=false;
455  for (int p=0;p<nparts;p++)
456  {
457  if (!done[p])
458  {
459  shout=true;
460  error_stream
461  << "No elements on processor " << p
462  << "when trying to partition " << nelem
463  << "elements over " << nparts << " processors!\n";
464  }
465  }
466  if (shout)
467  {
468  throw OomphLibError(error_stream.str(),
469  OOMPH_CURRENT_FUNCTION,
470  OOMPH_EXCEPTION_LOCATION);
471  }
472 #endif
473 
474 
475  // End timer
476  cpu_end=clock();
477 
478  // Doc
479  double cpu1=double(cpu_end-cpu_start)/CLOCKS_PER_SEC;
480  oomph_info
481  << "CPU time for METIS mesh partitioning [nelem="
482  << nelem <<"]: "
483  << cpu1
484  << " sec" << std::endl;
485 
486 
487  // Cleanup
488  delete [] xadj;
489  delete [] part;
490  delete [] edgecut;
491  delete [] options;
492 
493 }
494 
495 
496 #ifdef OOMPH_HAS_MPI
497 
498 
499 //==================================================================
500 /// \short Use METIS to assign each element in an already-distributed mesh
501 /// to a domain. On return, element_domain_on_this_proc[e] contains the number
502 /// of the domain [0,1,...,ndomain-1] to which non-halo element e on THE
503 /// CURRENT PROCESSOR ONLY has been assigned. The order of the non-halo
504 /// elements is the same as in the Problem's mesh, with the halo
505 /// elements being skipped.
506 /// Objective:
507 /// - objective=0: minimise edgecut.
508 /// - objective=1: minimise total communications volume.
509 /// .
510 /// The partioning is based on the dof graph of the complete mesh by
511 /// taking into
512 /// account which global equation numbers are affected by each element and
513 /// connecting elements which affect the same global equation number.
514 /// Partitioning is done such that all elements associated with the
515 /// same tree root move together. Non-refineable elements are
516 /// treated as their own root elements. If the optional boolean
517 /// flag is set to true (it defaults to false) each processor
518 /// assigns a dumb-but-repeatable equidistribution of its non-halo
519 /// elements over the domains and outputs the input that would have
520 /// gone into METIS in the file metis_input_for_validation.dat
521 //==================================================================
523  (Problem* problem_pt,const unsigned& objective,
524  Vector<unsigned>& element_domain_on_this_proc,
525  const bool& bypass_metis)
526  {
527  // Start timer
528  clock_t cpu_start=clock();
529 
530  // Communicator
531  OomphCommunicator* comm_pt=problem_pt->communicator_pt();
532 
533  // Number of processors / domains
534  unsigned n_proc=comm_pt->nproc();
535  unsigned my_rank=comm_pt->my_rank();
536 
537  // Global mesh
538  Mesh* mesh_pt=problem_pt->mesh_pt();
539 
540  // Total number of elements (halo and nonhalo) on this proc
541  unsigned n_elem=mesh_pt->nelement();
542 
543  // Get elemental assembly times
544  Vector<double> elemental_assembly_time=
545  problem_pt->elemental_assembly_time();
546 
547 #ifdef PARANOID
548  unsigned n=elemental_assembly_time.size();
549  if ((n!=0)&&(n!= n_elem))
550  {
551  std::ostringstream error_stream;
552  error_stream
553  << "Number of elements doesn't match the \n"
554  << "number of elemental assembly times: "
555  << n_elem << " " << n << std::endl;
556  throw OomphLibError(error_stream.str(),
557  OOMPH_CURRENT_FUNCTION,
558  OOMPH_EXCEPTION_LOCATION);
559  }
560 #endif
561 
562  // Can we base load balancing on assembly times?
563  bool can_load_balance_on_assembly_times=false;
564  if (elemental_assembly_time.size()!=0)
565  {
566  can_load_balance_on_assembly_times=true;
567  }
568 
569  // Storage for global eqn numbers on current processor
570  std::set<unsigned> global_eqns_on_this_proc;
571 
572  // Storage for pointers to root elements that are connected with given
573  // eqn number -- assembled on local processor
574  std::map<unsigned,std::set<GeneralisedElement*> >
575  root_elements_connected_with_global_eqn_on_this_proc;
576 
577  // Storage for long sequence of equation numbers as encountered
578  // by the root elements on this processor
579  Vector<unsigned> eqn_numbers_with_root_elements_on_this_proc;
580 
581  // Reserve number of elements x average/estimate (?) for number of dofs
582  // per element
583  eqn_numbers_with_root_elements_on_this_proc.reserve(n_elem*9);
584 
585  // Storage for the number of eqn numbers associated with each
586  // root element on this processors -- once this and the previous
587  // container have been collected from all processors we're
588  // able to reconstruct which root element (in the nominal "global" mesh)
589  // is connected with which global equations
590  Vector<unsigned> number_of_dofs_for_root_element;
591  number_of_dofs_for_root_element.reserve(n_elem);
592 
593  // Ditto for number of "leaf" elements connected with each root
594  Vector<unsigned> number_of_non_halo_elements_for_root_element;
595  number_of_non_halo_elements_for_root_element.reserve(n_elem);
596 
597  // Ditto for total assembly time of "leaf" elements connected with each root
598  Vector<double> total_assembly_time_for_root_element;
599  total_assembly_time_for_root_element.reserve(n_elem);
600 
601  // Map storing the number of the root elements on this processor
602  // (offset by one to bypass the zero default).
603  std::map<GeneralisedElement*,unsigned> root_el_number_plus_one;
604 
605  // Loop over non-halo elements on this processor
606  int number_of_root_elements=0;
607  unsigned number_of_non_halo_elements=0;
608  for (unsigned e=0; e<n_elem; e++)
609  {
610  double el_assembly_time=0.0;
611  GeneralisedElement* el_pt=mesh_pt->element_pt(e);
612  if (!el_pt->is_halo())
613  {
614  if (can_load_balance_on_assembly_times)
615  {
616  el_assembly_time=elemental_assembly_time[e];
617  }
618 
619  // Get the associated root element which is either...
620  GeneralisedElement* root_el_pt=0;
621  RefineableElement* ref_el_pt=dynamic_cast<RefineableElement*>(el_pt);
622  if (ref_el_pt!=0)
623  {
624  //...the actual root element
625  root_el_pt=ref_el_pt->root_element_pt();
626  }
627  // ...or the element itself
628  else
629  {
630  root_el_pt=el_pt;
631  }
632 
633  // Have we already encountered this root element?
634  // (offset of one to bypass the default return of zero)
635  bool already_encountered=false;
636  unsigned root_el_number=root_el_number_plus_one[root_el_pt];
637  if (root_el_number_plus_one[root_el_pt]==0)
638  {
639  // This is a new one
640  already_encountered=false;
641 
642  // Give it a number
643  number_of_root_elements++;
644  root_el_number_plus_one[root_el_pt]=number_of_root_elements;
645 
646  // Remove offset
647  root_el_number=number_of_root_elements-1;
648  }
649  else
650  {
651  // We've already visited this one before...
652  already_encountered=true;
653 
654  // Remove offset
655  root_el_number-=1;
656  }
657 
658 
659  // Get global equation numbers of actual element
660  unsigned n_dof=el_pt->ndof();
661  for (unsigned i=0; i<n_dof; i++)
662  {
663  unsigned eqn_no=el_pt->eqn_number(i);
664 
665  // Record which root elements are connected with this eqn number
666  root_elements_connected_with_global_eqn_on_this_proc[eqn_no].
667  insert(root_el_pt);
668 
669  // Record all global eqn numbers on this processor
670  global_eqns_on_this_proc.insert(eqn_no);
671 
672  // Add eqn number of the current element to the long sequence
673  // of eqn numbers
674  eqn_numbers_with_root_elements_on_this_proc.push_back(eqn_no);
675  }
676 
677  // Now record how many equations are associated with the current
678  // non-halo element
679  if (already_encountered)
680  {
681  number_of_dofs_for_root_element[root_el_number]+=n_dof;
682  number_of_non_halo_elements_for_root_element[root_el_number]++;
683  total_assembly_time_for_root_element[root_el_number]+=el_assembly_time;
684  }
685  else
686  {
687  number_of_dofs_for_root_element.push_back(n_dof);
688  number_of_non_halo_elements_for_root_element.push_back(1);
689  total_assembly_time_for_root_element.push_back(el_assembly_time);
690  }
691 
692  // Bump up number of non-halos
693  number_of_non_halo_elements++;
694  }
695  }
696 
697  // Tell everybody how many root elements
698  // are on each processor
699  unsigned root_processor=0;
700  Vector<int> number_of_root_elements_on_each_proc(n_proc,0);
701  MPI_Allgather(&number_of_root_elements, 1, MPI_INT,
702  &number_of_root_elements_on_each_proc[0], 1, MPI_INT,
703  comm_pt->mpi_comm());
704 
705 
706  // In the big sequence of concatenated root elements (enumerated individually
707  // on the various processors) where do the root elements from a given
708  // processor start? Also figure out how many root elements there are in
709  // total by summing up their numbers
710  Vector<int> start_index(n_proc,0);
711  unsigned total_number_of_root_elements=0;
712  for (unsigned i_proc=0; i_proc<n_proc; i_proc++)
713  {
714  total_number_of_root_elements+=number_of_root_elements_on_each_proc[i_proc];
715  if (i_proc!=0)
716  {
717  start_index[i_proc]=total_number_of_root_elements-
718  number_of_root_elements_on_each_proc[i_proc];
719  }
720  else
721  {
722  start_index[0]=0;
723  }
724  }
725 
726 
727 
728  // How many global equations are held on this processor?
729  int n_eqns_on_this_proc=
730  eqn_numbers_with_root_elements_on_this_proc.size();
731 
732  // Gather this information for all processors:
733  // n_eqns_on_each_proc[iproc] now contains the number of global
734  // equations held on processor iproc.
735  Vector<int> n_eqns_on_each_proc(n_proc,0);
736  MPI_Allgather(&n_eqns_on_this_proc, 1, MPI_INT,
737  &n_eqns_on_each_proc[0], 1, MPI_INT,
738  comm_pt->mpi_comm());
739 
740 
741 
742  // In the big sequence of equation numbers from the root elements
743  // (enumerated individually on the various processors) where do the
744  // equation numbers associated with the root elements from a given
745  // processor start? Also figure out how long the sequence of equation
746  // numbers is
747  Vector<int> start_eqns_index(n_proc,0);
748  unsigned total_n_eqn=0;
749  for (unsigned i_proc=0; i_proc<n_proc; i_proc++)
750  {
751  total_n_eqn+=n_eqns_on_each_proc[i_proc];
752  if (i_proc!=0)
753  {
754  start_eqns_index[i_proc]=total_n_eqn-n_eqns_on_each_proc[i_proc];
755  }
756  else
757  {
758  start_eqns_index[0]=0;
759  }
760  }
761 
762 
763  // Big vector that contains the number of dofs for each root element
764  // (concatenated in processor-by-processor order)
765  Vector<unsigned> number_of_dofs_for_global_root_element(
766  total_number_of_root_elements);
767  // Create at least one entry so we don't get a seg fault below
768  if (number_of_dofs_for_root_element.size()==0)
769  {
770  number_of_dofs_for_root_element.resize(1);
771  }
772  MPI_Gatherv(&number_of_dofs_for_root_element[0], // pointer to first entry in
773  // vector to be gathered on root
774  number_of_root_elements, // Number of entries to be sent
775  // from current processor
776  MPI_UNSIGNED,
777  &number_of_dofs_for_global_root_element[0], // Target -- this will
778  // store the concatenated
779  // vectors sent from
780  // everywhere
781  &number_of_root_elements_on_each_proc[0], // Pointer to
782  // vector containing
783  // the length of the
784  // vectors received
785  // from elsewhere
786  &start_index[0], // "offset" for storage of vector received
787  // from various processors in the global
788  // concatenated vector stored on root
789  MPI_UNSIGNED,
790  root_processor, comm_pt->mpi_comm());
791 
792 
793 
794  // ditto for number of non-halo elements associated with root element
795  Vector<unsigned> number_of_non_halo_elements_for_global_root_element(
796  total_number_of_root_elements);
797 
798  // Create at least one entry so we don't get a seg fault below
799  if (number_of_non_halo_elements_for_root_element.size()==0)
800  {
801  number_of_non_halo_elements_for_root_element.resize(1);
802  }
803  MPI_Gatherv(&number_of_non_halo_elements_for_root_element[0],
804  // pointer to first entry in
805  // vector to be gathered on root
806  number_of_root_elements, // Number of entries to be sent
807  // from current processor
808  MPI_UNSIGNED,
809  &number_of_non_halo_elements_for_global_root_element[0],
810  // Target -- this will
811  // store the concatenated
812  // vectors sent from
813  // everywhere
814  &number_of_root_elements_on_each_proc[0], // Pointer to
815  // vector containing
816  // the length of the
817  // vectors received
818  // from elsewhere
819  &start_index[0], // "offset" for storage of vector received
820  // from various processors in the global
821  // concatenated vector stored on root
822  MPI_UNSIGNED,
823  root_processor, comm_pt->mpi_comm());
824 
825 
826 
827  // ditto for assembly times elements associated with root element
828  Vector<double> total_assembly_time_for_global_root_element(
829  total_number_of_root_elements);
830 
831  // Create at least one entry so we don't get a seg fault below
832  if (total_assembly_time_for_root_element.size()==0)
833  {
834  total_assembly_time_for_root_element.resize(1);
835  }
836  MPI_Gatherv(&total_assembly_time_for_root_element[0],
837  // pointer to first entry in
838  // vector to be gathered on root
839  number_of_root_elements, // Number of entries to be sent
840  // from current processor
841  MPI_DOUBLE,
842  &total_assembly_time_for_global_root_element[0],
843  // Target -- this will
844  // store the concatenated
845  // vectors sent from
846  // everywhere
847  &number_of_root_elements_on_each_proc[0], // Pointer to
848  // vector containing
849  // the length of the
850  // vectors received
851  // from elsewhere
852  &start_index[0], // "offset" for storage of vector received
853  // from various processors in the global
854  // concatenated vector stored on root
855  MPI_DOUBLE,
856  root_processor, comm_pt->mpi_comm());
857 
858 
859  // Big vector to store the long sequence of global equation numbers
860  // associated with the long sequence of root elements
861  Vector<unsigned> eqn_numbers_with_root_elements(total_n_eqn);
862 
863  // Create at least one entry so we don't get a seg fault below
864  if (eqn_numbers_with_root_elements_on_this_proc.size()==0)
865  {
866  eqn_numbers_with_root_elements_on_this_proc.resize(1);
867  }
868  MPI_Gatherv(&eqn_numbers_with_root_elements_on_this_proc[0],
869  n_eqns_on_this_proc,
870  MPI_UNSIGNED,
871  &eqn_numbers_with_root_elements[0],
872  &n_eqns_on_each_proc[0],
873  &start_eqns_index[0],
874  MPI_UNSIGNED,
875  root_processor, comm_pt->mpi_comm());
876 
877  // Doc
878  clock_t cpu_end=clock();
879 
880  double cpu0=double(cpu_end-cpu_start)/CLOCKS_PER_SEC;
881  oomph_info
882  << "CPU time for global setup of METIS data structures [nroot_elem="
883  << total_number_of_root_elements <<"]: "
884  << cpu0
885  << " sec" << std::endl;
886 
887 
888 
889  // Now the root processor has gathered all the data needed to establish
890  // the root element connectivity (as in the serial case) so use METIS
891  // to determine "partitioning" for non-uniformly refined mesh
892  //----------------------------------------------------------------------
893 
894  // Vector to store target domain for each of the root elements (concatenated
895  // in processor-by-processor order)
896  Vector<unsigned> root_element_domain(total_number_of_root_elements,0);
897  if (my_rank==root_processor) //--
898  {
899  // Start timer
900  clock_t cpu_start=clock();
901 
902  // Repeat the steps used in the serial code: Storage for
903  // the global equations (on root processor)
904  std::set<unsigned> all_global_eqns_root_processor;
905 
906  // Set of root elements (as enumerated in the processor-by-processor order)
907  // associated with given global equation number
908  std::map<unsigned,std::set<unsigned> >
909  root_elements_connected_with_global_eqn_on_root_processor;
910 
911  // Retrace the steps of the serial code: Who's connected with who
912  unsigned count_all=0;
913  for (unsigned e=0; e<total_number_of_root_elements; e++)
914  {
915  unsigned n_eqn_no=number_of_dofs_for_global_root_element[e];
916  for (unsigned n=0; n<n_eqn_no; n++)
917  {
918  unsigned eqn_no=eqn_numbers_with_root_elements[count_all];
919  count_all++;
920  root_elements_connected_with_global_eqn_on_root_processor[eqn_no].
921  insert(e);
922  all_global_eqns_root_processor.insert(eqn_no);
923  }
924  }
925 
926  // Number of domains
927  unsigned ndomain=n_proc;
928 
929  // Now reverse the lookup scheme to find out all root elements
930  // that are connected because they share the same global eqn
931  Vector<std::set<unsigned> > connected_root_elements
932  (total_number_of_root_elements);
933 
934  // Counter for total number of entries in connected_root_elements structure
935  unsigned count=0;
936 
937  // Loop over all global eqns
938  for (std::set<unsigned>::iterator it=all_global_eqns_root_processor.begin();
939  it!=all_global_eqns_root_processor.end();it++)
940  {
941  // Get set of root elements connected with this data item
942  std::set<unsigned> root_elements=
943  root_elements_connected_with_global_eqn_on_root_processor[*it];
944 
945  // Double loop over connnected root elements: Everybody's connected to
946  // everybody
947  for (std::set<unsigned>::iterator it1=root_elements.begin();
948  it1!=root_elements.end();it1++)
949  {
950  for (std::set<unsigned>::iterator it2=root_elements.begin();
951  it2!=root_elements.end();it2++)
952  {
953  if ((*it1)!=(*it2))
954  {
955  connected_root_elements[(*it1)].insert(*it2);
956  }
957  }
958  }
959  }
960 
961  // End timer
962  clock_t cpu_end=clock();
963 
964  // Doc
965  double cpu0b=double(cpu_end-cpu_start)/CLOCKS_PER_SEC;
966  oomph_info
967  << "CPU time for setup of connected elements (load balance) [nroot_elem="
968  << total_number_of_root_elements <<"]: "
969  << cpu0b
970  << " sec" << std::endl;
971 
972  // Now convert into C-style packed array for interface with METIS
973  cpu_start=clock();
974  int* xadj = new int[total_number_of_root_elements+1];
975  Vector<int> adjacency_vector;
976 
977  // Reserve (too much) space
978  adjacency_vector.reserve(count);
979 
980  // Initialise counters
981  unsigned ientry=0;
982 
983  // Loop over all elements
984  for (unsigned e=0;e<total_number_of_root_elements;e++)
985  {
986  // First entry for current element
987  xadj[e]=ientry;
988 
989  // Loop over elements that are connected to current element
990  typedef std::set<unsigned>::iterator IT;
991  for (IT it=connected_root_elements[e].begin();
992  it!=connected_root_elements[e].end();it++)
993  {
994  // Copy into adjacency array
995  adjacency_vector.push_back(*it);
996 
997  // We've just made another entry
998  ientry++;
999  }
1000 
1001  // Entry after last entry for current element:
1002  xadj[e+1]=ientry;
1003 
1004  }
1005 
1006  // End timer
1007  cpu_end=clock();
1008 
1009  // Doc
1010  double cpu0=double(cpu_end-cpu_start)/CLOCKS_PER_SEC;
1011  oomph_info
1012  << "CPU time for setup of METIS data structures (load balance) [nroot_elem="
1013  << total_number_of_root_elements <<"]: "
1014  << cpu0
1015  << " sec" << std::endl;
1016 
1017 
1018  // Call METIS graph partitioner
1019  //-----------------------------
1020 
1021  // Start timer
1022  cpu_start=clock();
1023 
1024  // Number of vertices in graph
1025  int nvertex=total_number_of_root_elements;
1026 
1027  // No vertex weights
1028  int* vwgt=0;
1029 
1030  // No edge weights
1031  int* adjwgt=0;
1032 
1033  // Flag indicating that graph isn't weighted: 0; vertex weights only: 2
1034  // Note that wgtflag==2 requires nodal weights to be stored in vwgt.
1035  int wgtflag=0;
1036 
1037  // Use C-style numbering (first array entry is zero)
1038  int numflag=0;
1039 
1040  // Number of desired partitions
1041  int nparts=ndomain;
1042 
1043  // Use default options
1044  int* options= new int[10];
1045  options[0]=0;
1046 
1047  // Number of cut edges in graph
1048  int* edgecut = new int[total_number_of_root_elements];
1049 
1050  // Array containing the partition information
1051  int* part = new int[total_number_of_root_elements];
1052 
1053  // Now bias distribution by giving each root element
1054  // a weight equal to the number of elements associated with it
1055 
1056  // Adjust flag and provide storage for weights
1057  wgtflag=2;
1058  vwgt=new int[total_number_of_root_elements];
1059 
1060 
1061  // Load balance based on assembly times of all leaf
1062  // elements associated with root
1063  if (can_load_balance_on_assembly_times)
1064  {
1065  oomph_info << "Basing distribution on assembly times of elements\n";
1066 
1067  // Normalise
1068  double min_time=*(std::min_element(
1069  total_assembly_time_for_global_root_element.begin(),
1070  total_assembly_time_for_global_root_element.end()));
1071 #ifdef PARANOID
1072  if (min_time==0.0)
1073  {
1074  std::ostringstream error_stream;
1075  error_stream
1076  << "Minimum assemble time for element is zero!\n";
1077  throw OomphLibError(error_stream.str(),
1078  OOMPH_CURRENT_FUNCTION,
1079  OOMPH_EXCEPTION_LOCATION);
1080  }
1081 #endif
1082 
1083  // Bypass METIS (usually for validation) and use made-up but
1084  // repeatable timings
1085  if (bypass_metis)
1086  {
1087  for (unsigned e=0;e<total_number_of_root_elements;e++)
1088  {
1089  vwgt[e]=e;
1090  }
1091  }
1092  else
1093  {
1094  for (unsigned e=0;e<total_number_of_root_elements;e++)
1095  {
1096  // Use assembly times (relative to minimum) as weight
1097  vwgt[e]=int(total_assembly_time_for_global_root_element[e]/
1098  min_time);
1099  }
1100  }
1101  }
1102  // Load balanced based on number of leaf elements associated with
1103  // root
1104  else
1105  {
1106  oomph_info << "Basing distribution on number of elements\n";
1107  for (unsigned e=0;e<total_number_of_root_elements;e++)
1108  {
1109  vwgt[e]=number_of_non_halo_elements_for_global_root_element[e];
1110  }
1111  }
1112 
1113  // Bypass METIS (usually for validation)
1114  if (bypass_metis)
1115  {
1116 
1117  // Simple repeatable partition: Equidistribute root element
1118  for (unsigned e=0;e<total_number_of_root_elements;e++)
1119  {
1120  // Simple repeatable partition: Equidistribute elements on each
1121  // processor
1122  part[e]=(n_proc-1)-
1123  unsigned(double(e)/double(total_number_of_root_elements)*
1124  double(n_proc));
1125  }
1126 
1127  oomph_info
1128  << "Bypassing METIS for validation purposes.\n"
1129  << "Appending input for metis in metis_input_for_validation.dat\n";
1130  std::ofstream outfile;
1131  outfile.open("metis_input_for_validation.dat",std::ios_base::app);
1132 
1133  // Dump out relevant input to metis
1134  for (unsigned e=0;e<total_number_of_root_elements+1;e++)
1135  {
1136  outfile << xadj[e] << std::endl;
1137  }
1138  unsigned n=adjacency_vector.size();
1139  for (unsigned i=0;i<n;i++)
1140  {
1141  outfile << adjacency_vector[i] << std::endl;
1142  }
1143  for (unsigned e=0;e<total_number_of_root_elements;e++)
1144  {
1145  outfile << vwgt[e] << std::endl;
1146  }
1147  outfile.close();
1148 
1149  }
1150  // Actually use METIS (good but not always repeatable!)
1151  else
1152  {
1153  if (objective==0)
1154  {
1155  // Partition with the objective of minimising the edge cut
1156  METIS_PartGraphKway(&nvertex, xadj, &adjacency_vector[0], vwgt, adjwgt,
1157  &wgtflag, &numflag, &nparts, options, edgecut,
1158  part);
1159  }
1160  else if (objective==1)
1161  {
1162  // Partition with the objective of minimising the total communication
1163  // volume
1164  METIS_PartGraphVKway(&nvertex, xadj, &adjacency_vector[0], vwgt, adjwgt,
1165  &wgtflag, &numflag, &nparts, options, edgecut,
1166  part);
1167  }
1168  }
1169 
1170  // Copy across
1171  Vector<unsigned> total_weight_on_proc(n_proc,0);
1172  for (unsigned e=0;e<total_number_of_root_elements;e++)
1173  {
1174  root_element_domain[e]=part[e];
1175  total_weight_on_proc[part[e]]+=vwgt[e];
1176  }
1177 
1178  // Document success of partitioning
1179  for (unsigned j=0;j<n_proc;j++)
1180  {
1181  oomph_info << "Total weight on proc " << j
1182  << " is " << total_weight_on_proc[j] << std::endl;
1183  }
1184 
1185  // Doc
1186  double cpu1=double(cpu_end-cpu_start)/CLOCKS_PER_SEC;
1187  oomph_info
1188  << "CPU time for METIS mesh partitioning [nroot_elem="
1189  << total_number_of_root_elements <<"]: "
1190  << cpu1
1191  << " sec" << std::endl;
1192 
1193  // Cleanup
1194  delete [] xadj;
1195  delete [] part;
1196  delete [] vwgt;
1197  delete [] edgecut;
1198  delete [] options;
1199 
1200  }
1201 
1202  // Now scatter things back to processors: root_element_domain[] contains
1203  // the target domain for all elements (concatenated in processor-by
1204  // processor order on the root processor). Distribute this back
1205  // to the processors so that root_element_domain_on_this_proc[e] contains
1206  // the target domain for root element e (in whatever order the processor
1207  // decided to line up its root elements).
1208  cpu_start=clock();
1209  Vector<unsigned> root_element_domain_on_this_proc(number_of_root_elements);
1210 
1211  // Create at least one entry so we don't get a seg fault below
1212  if (root_element_domain_on_this_proc.size()==0)
1213  {
1214  root_element_domain_on_this_proc.resize(1);
1215  }
1216  MPI_Scatterv(&root_element_domain[0],
1217  &number_of_root_elements_on_each_proc[0],
1218  &start_index[0],
1219  MPI_UNSIGNED,
1220  &root_element_domain_on_this_proc[0],
1221  number_of_root_elements,
1222  MPI_UNSIGNED,
1223  root_processor,
1224  comm_pt->mpi_comm());
1225 
1226 
1227  // Now translate back into target domain for the actual (non-root)
1228  // elements
1229  element_domain_on_this_proc.resize(number_of_non_halo_elements);
1230  unsigned count_non_halo=0;
1231  for (unsigned e=0; e<n_elem; e++)
1232  {
1233  GeneralisedElement* el_pt=mesh_pt->element_pt(e);
1234  if (!el_pt->is_halo())
1235  {
1236  // Get the associated root element which is either...
1237  GeneralisedElement* root_el_pt=0;
1238  RefineableElement* ref_el_pt=dynamic_cast<RefineableElement*>(el_pt);
1239  if (ref_el_pt!=0)
1240  {
1241  //...the actual root element
1242  root_el_pt=ref_el_pt->root_element_pt();
1243  }
1244  // ...or the element itself
1245  else
1246  {
1247  root_el_pt=el_pt;
1248  }
1249 
1250  // Recover the root element number (offset by one)
1251  unsigned root_el_number=root_el_number_plus_one[root_el_pt]-1;
1252 
1253  // Copy target domain across from root element
1254  element_domain_on_this_proc[count_non_halo]=
1255  root_element_domain_on_this_proc[root_el_number];
1256 
1257  // Bump up counter for non-root elements
1258  count_non_halo++;
1259  }
1260  }
1261 
1262 
1263 
1264 #ifdef PARANOID
1265  if (count_non_halo!=number_of_non_halo_elements)
1266  {
1267  std::ostringstream error_stream;
1268  error_stream
1269  << "Non-halo counts don't match: "
1270  << count_non_halo << " " << number_of_non_halo_elements
1271  << std::endl;
1272 
1273  throw OomphLibError(error_stream.str(),
1274  OOMPH_CURRENT_FUNCTION,
1275  OOMPH_EXCEPTION_LOCATION);
1276  }
1277 #endif
1278 
1279  // End timer
1280  cpu_end=clock();
1281 
1282  // Doc
1283  double cpu2=double(cpu_end-cpu_start)/CLOCKS_PER_SEC;
1284  oomph_info
1285  << "CPU time for communication of partition to all processors [nroot_elem="
1286  << total_number_of_root_elements <<"]: "
1287  << cpu2
1288  << " sec" << std::endl;
1289 
1290 
1291 }
1292 
1293 
1294 
1295 
1296 #endif
1297 
1298 }
A Generalised Element class.
Definition: elements.h:76
void partition_distributed_mesh(Problem *problem_pt, const unsigned &objective, Vector< unsigned > &element_domain_on_this_proc, const bool &bypass_metis=false)
Use METIS to assign each element in an already-distributed mesh to a domain. On return, element_domain_on_this_proc[e] contains the number of the domain [0,1,...,ndomain-1] to which non-halo element e on THE CURRENT PROCESSOR ONLY has been assigned. The order of the non-halo elements is the same as in the Problem&#39;s mesh, with the halo elements being skipped.
void METIS_PartGraphVKway(int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *)
Metis graph partitioning function – decomposes nodal graph based on minimum communication volume...
cstr elem_len * i
Definition: cfortran.h:607
The Problem class.
Definition: problem.h:152
void partition_mesh(Problem *problem_pt, const unsigned &ndomain, const unsigned &objective, Vector< unsigned > &element_domain)
Use METIS to assign each element to a domain. On return, element_domain[ielem] contains the number of...
bool is_halo() const
Is this element a halo?
Definition: elements.h:1141
Vector< double > elemental_assembly_time()
Return vector of most-recent elemental assembly times (used for load balancing). Zero sized if no Jac...
Definition: problem.h:852
Mesh *& mesh_pt()
Return a pointer to the global mesh.
Definition: problem.h:1264
OomphInfo oomph_info
e
Definition: cfortran.h:575
OomphCommunicator * communicator_pt()
access function to the oomph-lib communicator
Definition: problem.h:1221
virtual RefineableElement * root_element_pt()
Pointer to the root element in refinement hierarchy (must be implemented in specific elements that do...
unsigned long nelement() const
Return number of elements in the mesh.
Definition: mesh.h:587
unsigned ndof() const
Return the number of equations/dofs in the element.
Definition: elements.h:834
ErrorEstimator *& spatial_error_estimator_pt()
Access to spatial error estimator.
GeneralisedElement *& element_pt(const unsigned long &e)
Return pointer to element e.
Definition: mesh.h:462
void default_error_to_weight_fct(const double &spatial_error, const double &max_error, const double &min_error, int &weight)
Default function that translates spatial error into weight for METIS partitioning (unit weight regard...
Definition: partitioning.cc:53
void start(const unsigned &i)
(Re-)start i-th timer
unsigned long eqn_number(const unsigned &ieqn_local) const
Return the global equation number corresponding to the ieqn_local-th local equation number...
Definition: elements.h:709
void(* ErrorToWeightFctPt)(const double &spatial_error, const double &max_error, const double &min_error, int &weight)
Typedef for function pointer to to function that translates spatial error into weight for METIS parti...
Definition: partitioning.h:94
void METIS_PartGraphKway(int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *)
Metis graph partitioning function – decomposes nodal graph based on minimum edgecut.
void uniform_partition_mesh(Problem *problem_pt, const unsigned &ndomain, Vector< unsigned > &element_domain)
Partition mesh uniformly by dividing elements equally over the partitions, in the order in which they...
Definition: partitioning.cc:78
unsigned nsub_mesh() const
Return number of submeshes.
Definition: problem.h:1289
ErrorToWeightFctPt Error_to_weight_fct_pt
Function pointer to to function that translates spatial error into weight for METIS partitioning...
Definition: partitioning.cc:63
A general mesh class.
Definition: mesh.h:74
An oomph-lib wrapper to the MPI_Comm communicator object. Just contains an MPI_Comm object (which is ...
Definition: communicator.h:57