54 const double& max_error,
55 const double& min_error,
79 const unsigned& ndomain,
87 if (nelem!=element_domain.size())
89 std::ostringstream error_stream;
90 error_stream <<
"element_domain Vector has wrong length " 91 << nelem <<
" " << element_domain.size() << std::endl;
94 OOMPH_CURRENT_FUNCTION,
95 OOMPH_EXCEPTION_LOCATION);
102 unsigned nel_per_domain=int(
float(nelem)/
float(ndomain));
103 for (
unsigned ielem=0;ielem<nelem;ielem++)
105 unsigned idomain=unsigned(
float(ielem)/
float(nel_per_domain));
106 element_domain[ielem]=idomain;
123 const unsigned& objective,
133 if (nelem!=element_domain.size())
135 std::ostringstream error_stream;
136 error_stream <<
"element_domain Vector has wrong length " 137 << nelem <<
" " << element_domain.size() << std::endl;
140 OOMPH_CURRENT_FUNCTION,
141 OOMPH_EXCEPTION_LOCATION);
149 clock_t cpu_start=clock();
152 std::map<unsigned,std::set<unsigned> > elements_connected_with_global_eqn;
155 std::set<unsigned> all_global_eqns;
158 for (
unsigned e=0;
e<nelem;
e++)
163 unsigned ndof=el_pt->
ndof();
164 for (
unsigned j=0;j<ndof;j++)
168 elements_connected_with_global_eqn[eqn_number].insert(
e);
169 all_global_eqns.insert(eqn_number);
181 for (std::set<unsigned>::iterator it=all_global_eqns.begin();
182 it!=all_global_eqns.end();it++)
185 std::set<unsigned> elements=elements_connected_with_global_eqn[*it];
189 for (std::set<unsigned>::iterator it1=elements.begin();
190 it1!=elements.end();it1++)
192 for (std::set<unsigned>::iterator it2=elements.begin();
193 it2!=elements.end();it2++)
197 connected_elements[(*it1)].insert(*it2);
205 int* xadj =
new int[nelem+1];
209 adjacency_vector.reserve(count);
215 for (
unsigned e=0;
e<nelem;
e++)
221 typedef std::set<unsigned>::iterator IT;
222 for (IT it=connected_elements[
e].begin();
223 it!=connected_elements[
e].end();it++)
226 adjacency_vector.push_back(*it);
238 clock_t cpu_end=clock();
241 double cpu0=double(cpu_end-cpu_start)/CLOCKS_PER_SEC;
243 <<
"CPU time for setup of METIS data structures [nelem=" 246 <<
" sec" << std::endl;
275 int* options=
new int[10];
279 int* edgecut =
new int[nelem];
282 int* part =
new int[nelem];
299 "Biasing element distribution via spatial error estimate\n";
308 get_element_errors(mesh_pt,elemental_error);
310 double max_error=*(std::max_element(elemental_error.begin(),
311 elemental_error.end()));
312 double min_error=*(std::min_element(elemental_error.begin(),
313 elemental_error.end()));
317 for (
unsigned e=0;
e<nelem;
e++)
330 bool refineable_submesh_exists=
false;
336 for (
unsigned i_mesh=0;i_mesh<n_mesh;i_mesh++)
346 refineable_submesh_exists=
true;
351 if (refineable_submesh_exists)
357 "Biasing element distribution via spatial error estimate\n";
364 for (
unsigned i_mesh=0;i_mesh<n_mesh;i_mesh++)
371 unsigned nsub_elem=loop_helper[i_mesh+1]-loop_helper[i_mesh];
374 get_element_errors(problem_pt->
mesh_pt(i_mesh),
377 double max_error=*(std::max_element(elemental_error.begin(),
378 elemental_error.end()));
379 double min_error=*(std::min_element(elemental_error.begin(),
380 elemental_error.end()));
384 unsigned start=loop_helper[i_mesh];
385 unsigned end=loop_helper[i_mesh+1];
386 for (
unsigned e=start;
e<end;
e++)
388 unsigned error_index=
e-
start;
399 unsigned start=loop_helper[i_mesh];
400 unsigned end=loop_helper[i_mesh+1];
401 for (
unsigned e=start;
e<end;
e++)
416 &wgtflag, &numflag, &nparts, options, edgecut, part);
418 else if (objective==1)
423 &wgtflag, &numflag, &nparts, options, edgecut, part);
427 std::ostringstream error_stream;
429 <<
"Wrong objective for METIS. objective = " << objective << std::endl;
432 OOMPH_CURRENT_FUNCTION,
433 OOMPH_EXCEPTION_LOCATION);
438 std::vector<bool> done(nparts,
false);
442 for (
unsigned e=0;
e<nelem;
e++)
444 element_domain[
e]=part[
e];
453 std::ostringstream error_stream;
455 for (
int p=0;p<nparts;p++)
461 <<
"No elements on processor " << p
462 <<
"when trying to partition " << nelem
463 <<
"elements over " << nparts <<
" processors!\n";
469 OOMPH_CURRENT_FUNCTION,
470 OOMPH_EXCEPTION_LOCATION);
479 double cpu1=double(cpu_end-cpu_start)/CLOCKS_PER_SEC;
481 <<
"CPU time for METIS mesh partitioning [nelem=" 484 <<
" sec" << std::endl;
523 (
Problem* problem_pt,
const unsigned& objective,
525 const bool& bypass_metis)
528 clock_t cpu_start=clock();
534 unsigned n_proc=comm_pt->nproc();
535 unsigned my_rank=comm_pt->my_rank();
541 unsigned n_elem=mesh_pt->
nelement();
548 unsigned n=elemental_assembly_time.size();
549 if ((n!=0)&&(n!= n_elem))
551 std::ostringstream error_stream;
553 <<
"Number of elements doesn't match the \n" 554 <<
"number of elemental assembly times: " 555 << n_elem <<
" " << n << std::endl;
557 OOMPH_CURRENT_FUNCTION,
558 OOMPH_EXCEPTION_LOCATION);
563 bool can_load_balance_on_assembly_times=
false;
564 if (elemental_assembly_time.size()!=0)
566 can_load_balance_on_assembly_times=
true;
570 std::set<unsigned> global_eqns_on_this_proc;
574 std::map<unsigned,std::set<GeneralisedElement*> >
575 root_elements_connected_with_global_eqn_on_this_proc;
583 eqn_numbers_with_root_elements_on_this_proc.reserve(n_elem*9);
591 number_of_dofs_for_root_element.reserve(n_elem);
595 number_of_non_halo_elements_for_root_element.reserve(n_elem);
599 total_assembly_time_for_root_element.reserve(n_elem);
603 std::map<GeneralisedElement*,unsigned> root_el_number_plus_one;
606 int number_of_root_elements=0;
607 unsigned number_of_non_halo_elements=0;
608 for (
unsigned e=0;
e<n_elem;
e++)
610 double el_assembly_time=0.0;
614 if (can_load_balance_on_assembly_times)
616 el_assembly_time=elemental_assembly_time[
e];
635 bool already_encountered=
false;
636 unsigned root_el_number=root_el_number_plus_one[root_el_pt];
637 if (root_el_number_plus_one[root_el_pt]==0)
640 already_encountered=
false;
643 number_of_root_elements++;
644 root_el_number_plus_one[root_el_pt]=number_of_root_elements;
647 root_el_number=number_of_root_elements-1;
652 already_encountered=
true;
660 unsigned n_dof=el_pt->
ndof();
661 for (
unsigned i=0;
i<n_dof;
i++)
666 root_elements_connected_with_global_eqn_on_this_proc[eqn_no].
670 global_eqns_on_this_proc.insert(eqn_no);
674 eqn_numbers_with_root_elements_on_this_proc.push_back(eqn_no);
679 if (already_encountered)
681 number_of_dofs_for_root_element[root_el_number]+=n_dof;
682 number_of_non_halo_elements_for_root_element[root_el_number]++;
683 total_assembly_time_for_root_element[root_el_number]+=el_assembly_time;
687 number_of_dofs_for_root_element.push_back(n_dof);
688 number_of_non_halo_elements_for_root_element.push_back(1);
689 total_assembly_time_for_root_element.push_back(el_assembly_time);
693 number_of_non_halo_elements++;
699 unsigned root_processor=0;
700 Vector<int> number_of_root_elements_on_each_proc(n_proc,0);
701 MPI_Allgather(&number_of_root_elements, 1, MPI_INT,
702 &number_of_root_elements_on_each_proc[0], 1, MPI_INT,
703 comm_pt->mpi_comm());
711 unsigned total_number_of_root_elements=0;
712 for (
unsigned i_proc=0; i_proc<n_proc; i_proc++)
714 total_number_of_root_elements+=number_of_root_elements_on_each_proc[i_proc];
717 start_index[i_proc]=total_number_of_root_elements-
718 number_of_root_elements_on_each_proc[i_proc];
729 int n_eqns_on_this_proc=
730 eqn_numbers_with_root_elements_on_this_proc.size();
736 MPI_Allgather(&n_eqns_on_this_proc, 1, MPI_INT,
737 &n_eqns_on_each_proc[0], 1, MPI_INT,
738 comm_pt->mpi_comm());
748 unsigned total_n_eqn=0;
749 for (
unsigned i_proc=0; i_proc<n_proc; i_proc++)
751 total_n_eqn+=n_eqns_on_each_proc[i_proc];
754 start_eqns_index[i_proc]=total_n_eqn-n_eqns_on_each_proc[i_proc];
758 start_eqns_index[0]=0;
766 total_number_of_root_elements);
768 if (number_of_dofs_for_root_element.size()==0)
770 number_of_dofs_for_root_element.resize(1);
772 MPI_Gatherv(&number_of_dofs_for_root_element[0],
774 number_of_root_elements,
777 &number_of_dofs_for_global_root_element[0],
781 &number_of_root_elements_on_each_proc[0],
790 root_processor, comm_pt->mpi_comm());
796 total_number_of_root_elements);
799 if (number_of_non_halo_elements_for_root_element.size()==0)
801 number_of_non_halo_elements_for_root_element.resize(1);
803 MPI_Gatherv(&number_of_non_halo_elements_for_root_element[0],
806 number_of_root_elements,
809 &number_of_non_halo_elements_for_global_root_element[0],
814 &number_of_root_elements_on_each_proc[0],
823 root_processor, comm_pt->mpi_comm());
829 total_number_of_root_elements);
832 if (total_assembly_time_for_root_element.size()==0)
834 total_assembly_time_for_root_element.resize(1);
836 MPI_Gatherv(&total_assembly_time_for_root_element[0],
839 number_of_root_elements,
842 &total_assembly_time_for_global_root_element[0],
847 &number_of_root_elements_on_each_proc[0],
856 root_processor, comm_pt->mpi_comm());
864 if (eqn_numbers_with_root_elements_on_this_proc.size()==0)
866 eqn_numbers_with_root_elements_on_this_proc.resize(1);
868 MPI_Gatherv(&eqn_numbers_with_root_elements_on_this_proc[0],
871 &eqn_numbers_with_root_elements[0],
872 &n_eqns_on_each_proc[0],
873 &start_eqns_index[0],
875 root_processor, comm_pt->mpi_comm());
878 clock_t cpu_end=clock();
880 double cpu0=double(cpu_end-cpu_start)/CLOCKS_PER_SEC;
882 <<
"CPU time for global setup of METIS data structures [nroot_elem=" 883 << total_number_of_root_elements <<
"]: " 885 <<
" sec" << std::endl;
897 if (my_rank==root_processor)
900 clock_t cpu_start=clock();
904 std::set<unsigned> all_global_eqns_root_processor;
908 std::map<unsigned,std::set<unsigned> >
909 root_elements_connected_with_global_eqn_on_root_processor;
912 unsigned count_all=0;
913 for (
unsigned e=0;
e<total_number_of_root_elements;
e++)
915 unsigned n_eqn_no=number_of_dofs_for_global_root_element[
e];
916 for (
unsigned n=0; n<n_eqn_no; n++)
918 unsigned eqn_no=eqn_numbers_with_root_elements[count_all];
920 root_elements_connected_with_global_eqn_on_root_processor[eqn_no].
922 all_global_eqns_root_processor.insert(eqn_no);
927 unsigned ndomain=n_proc;
932 (total_number_of_root_elements);
938 for (std::set<unsigned>::iterator it=all_global_eqns_root_processor.begin();
939 it!=all_global_eqns_root_processor.end();it++)
942 std::set<unsigned> root_elements=
943 root_elements_connected_with_global_eqn_on_root_processor[*it];
947 for (std::set<unsigned>::iterator it1=root_elements.begin();
948 it1!=root_elements.end();it1++)
950 for (std::set<unsigned>::iterator it2=root_elements.begin();
951 it2!=root_elements.end();it2++)
955 connected_root_elements[(*it1)].insert(*it2);
962 clock_t cpu_end=clock();
965 double cpu0b=double(cpu_end-cpu_start)/CLOCKS_PER_SEC;
967 <<
"CPU time for setup of connected elements (load balance) [nroot_elem=" 968 << total_number_of_root_elements <<
"]: " 970 <<
" sec" << std::endl;
974 int* xadj =
new int[total_number_of_root_elements+1];
978 adjacency_vector.reserve(count);
984 for (
unsigned e=0;
e<total_number_of_root_elements;
e++)
990 typedef std::set<unsigned>::iterator IT;
991 for (IT it=connected_root_elements[
e].begin();
992 it!=connected_root_elements[
e].end();it++)
995 adjacency_vector.push_back(*it);
1010 double cpu0=double(cpu_end-cpu_start)/CLOCKS_PER_SEC;
1012 <<
"CPU time for setup of METIS data structures (load balance) [nroot_elem=" 1013 << total_number_of_root_elements <<
"]: " 1015 <<
" sec" << std::endl;
1025 int nvertex=total_number_of_root_elements;
1044 int* options=
new int[10];
1048 int* edgecut =
new int[total_number_of_root_elements];
1051 int* part =
new int[total_number_of_root_elements];
1058 vwgt=
new int[total_number_of_root_elements];
1063 if (can_load_balance_on_assembly_times)
1065 oomph_info <<
"Basing distribution on assembly times of elements\n";
1068 double min_time=*(std::min_element(
1069 total_assembly_time_for_global_root_element.begin(),
1070 total_assembly_time_for_global_root_element.end()));
1074 std::ostringstream error_stream;
1076 <<
"Minimum assemble time for element is zero!\n";
1078 OOMPH_CURRENT_FUNCTION,
1079 OOMPH_EXCEPTION_LOCATION);
1087 for (
unsigned e=0;
e<total_number_of_root_elements;
e++)
1094 for (
unsigned e=0;
e<total_number_of_root_elements;
e++)
1097 vwgt[
e]=int(total_assembly_time_for_global_root_element[
e]/
1106 oomph_info <<
"Basing distribution on number of elements\n";
1107 for (
unsigned e=0;
e<total_number_of_root_elements;
e++)
1109 vwgt[
e]=number_of_non_halo_elements_for_global_root_element[
e];
1118 for (
unsigned e=0;
e<total_number_of_root_elements;
e++)
1123 unsigned(
double(
e)/double(total_number_of_root_elements)*
1128 <<
"Bypassing METIS for validation purposes.\n" 1129 <<
"Appending input for metis in metis_input_for_validation.dat\n";
1130 std::ofstream outfile;
1131 outfile.open(
"metis_input_for_validation.dat",std::ios_base::app);
1134 for (
unsigned e=0;
e<total_number_of_root_elements+1;
e++)
1136 outfile << xadj[
e] << std::endl;
1138 unsigned n=adjacency_vector.size();
1139 for (
unsigned i=0;
i<n;
i++)
1141 outfile << adjacency_vector[
i] << std::endl;
1143 for (
unsigned e=0;
e<total_number_of_root_elements;
e++)
1145 outfile << vwgt[
e] << std::endl;
1157 &wgtflag, &numflag, &nparts, options, edgecut,
1160 else if (objective==1)
1165 &wgtflag, &numflag, &nparts, options, edgecut,
1172 for (
unsigned e=0;
e<total_number_of_root_elements;
e++)
1174 root_element_domain[
e]=part[
e];
1175 total_weight_on_proc[part[
e]]+=vwgt[
e];
1179 for (
unsigned j=0;j<n_proc;j++)
1182 <<
" is " << total_weight_on_proc[j] << std::endl;
1186 double cpu1=double(cpu_end-cpu_start)/CLOCKS_PER_SEC;
1188 <<
"CPU time for METIS mesh partitioning [nroot_elem=" 1189 << total_number_of_root_elements <<
"]: " 1191 <<
" sec" << std::endl;
1209 Vector<unsigned> root_element_domain_on_this_proc(number_of_root_elements);
1212 if (root_element_domain_on_this_proc.size()==0)
1214 root_element_domain_on_this_proc.resize(1);
1216 MPI_Scatterv(&root_element_domain[0],
1217 &number_of_root_elements_on_each_proc[0],
1220 &root_element_domain_on_this_proc[0],
1221 number_of_root_elements,
1224 comm_pt->mpi_comm());
1229 element_domain_on_this_proc.resize(number_of_non_halo_elements);
1230 unsigned count_non_halo=0;
1231 for (
unsigned e=0;
e<n_elem;
e++)
1251 unsigned root_el_number=root_el_number_plus_one[root_el_pt]-1;
1254 element_domain_on_this_proc[count_non_halo]=
1255 root_element_domain_on_this_proc[root_el_number];
1265 if (count_non_halo!=number_of_non_halo_elements)
1267 std::ostringstream error_stream;
1269 <<
"Non-halo counts don't match: " 1270 << count_non_halo <<
" " << number_of_non_halo_elements
1274 OOMPH_CURRENT_FUNCTION,
1275 OOMPH_EXCEPTION_LOCATION);
1283 double cpu2=double(cpu_end-cpu_start)/CLOCKS_PER_SEC;
1285 <<
"CPU time for communication of partition to all processors [nroot_elem=" 1286 << total_number_of_root_elements <<
"]: " 1288 <<
" sec" << std::endl;
A Generalised Element class.
void partition_distributed_mesh(Problem *problem_pt, const unsigned &objective, Vector< unsigned > &element_domain_on_this_proc, const bool &bypass_metis=false)
Use METIS to assign each element in an already-distributed mesh to a domain. On return, element_domain_on_this_proc[e] contains the number of the domain [0,1,...,ndomain-1] to which non-halo element e on THE CURRENT PROCESSOR ONLY has been assigned. The order of the non-halo elements is the same as in the Problem's mesh, with the halo elements being skipped.
void METIS_PartGraphVKway(int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *)
Metis graph partitioning function – decomposes nodal graph based on minimum communication volume...
void partition_mesh(Problem *problem_pt, const unsigned &ndomain, const unsigned &objective, Vector< unsigned > &element_domain)
Use METIS to assign each element to a domain. On return, element_domain[ielem] contains the number of...
bool is_halo() const
Is this element a halo?
Vector< double > elemental_assembly_time()
Return vector of most-recent elemental assembly times (used for load balancing). Zero sized if no Jac...
Mesh *& mesh_pt()
Return a pointer to the global mesh.
OomphCommunicator * communicator_pt()
access function to the oomph-lib communicator
virtual RefineableElement * root_element_pt()
Pointer to the root element in refinement hierarchy (must be implemented in specific elements that do...
unsigned long nelement() const
Return number of elements in the mesh.
unsigned ndof() const
Return the number of equations/dofs in the element.
ErrorEstimator *& spatial_error_estimator_pt()
Access to spatial error estimator.
GeneralisedElement *& element_pt(const unsigned long &e)
Return pointer to element e.
void default_error_to_weight_fct(const double &spatial_error, const double &max_error, const double &min_error, int &weight)
Default function that translates spatial error into weight for METIS partitioning (unit weight regard...
void start(const unsigned &i)
(Re-)start i-th timer
unsigned long eqn_number(const unsigned &ieqn_local) const
Return the global equation number corresponding to the ieqn_local-th local equation number...
void(* ErrorToWeightFctPt)(const double &spatial_error, const double &max_error, const double &min_error, int &weight)
Typedef for function pointer to to function that translates spatial error into weight for METIS parti...
void METIS_PartGraphKway(int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *)
Metis graph partitioning function – decomposes nodal graph based on minimum edgecut.
void uniform_partition_mesh(Problem *problem_pt, const unsigned &ndomain, Vector< unsigned > &element_domain)
Partition mesh uniformly by dividing elements equally over the partitions, in the order in which they...
unsigned nsub_mesh() const
Return number of submeshes.
ErrorToWeightFctPt Error_to_weight_fct_pt
Function pointer to to function that translates spatial error into weight for METIS partitioning...
An oomph-lib wrapper to the MPI_Comm communicator object. Just contains an MPI_Comm object (which is ...