29 #ifdef USING_OOMPH_SUPERLU_DIST 30 #include "oomph_superlu_dist_3.0.h" 32 #include<superlu_defs.h> 33 #include<superlu_ddefs.h> 37 #include<supermatrix.h> 38 #include<old_colamd.h> 66 int* cr_index,
int* cr_start,
double** cc_values,
67 int** cc_index,
int** cc_start)
69 dCompRow_to_CompCol(nrow,ncol,nnz,cr_values,cr_index,cr_start,cc_values,
107 int n,
int nnz_local,
108 int nrow_local,
int first_row,
109 double *values,
int *col_index,
110 int *row_start,
double *b,
111 int nprow,
int npcol,
112 int doc,
void **data,
int *info,
116 superlu_options_t *options;
119 ScalePermstruct_t *ScalePermstruct;
120 LUstruct_t *LUstruct;
121 SOLVEstruct_t *SOLVEstruct;
130 int_t *rowptr, *colind;
131 int_t job, rowequ, colequ, iinfo, need_value,
i, j, irow, icol;
132 int_t m_loc, fst_row, nnz, nnz_loc, dist_mem_use;
133 int_t *colptr, *rowind;
134 NRformat_loc *Astore;
140 Glu_persist_t *Glu_persist;
141 Glu_freeable_t *Glu_freeable;
145 double *a, *X, *b_col;
147 double *C, *
R, *C1, *R1, *bcol, *x_col;
148 double amax,
t, colcnd, rowcnd, anorm;
149 char equed[1], norm[1];
151 static mem_usage_t symb_mem_usage;
153 int_t Equil, factored, notran, permc_spec;
180 grid = (gridinfo_t *) SUPERLU_MALLOC(
sizeof(gridinfo_t));
181 superlu_gridinit(comm, nprow, npcol, grid);
182 superlu_data->
grid = grid;
186 if ( iam >= nprow * npcol )
return;
189 options = (superlu_options_t *) SUPERLU_MALLOC(
sizeof(superlu_options_t));
190 A = (SuperMatrix *) SUPERLU_MALLOC(
sizeof(SuperMatrix));
192 (ScalePermstruct_t *) SUPERLU_MALLOC(
sizeof(ScalePermstruct_t));
193 LUstruct = (LUstruct_t *) SUPERLU_MALLOC(
sizeof(LUstruct_t));
194 SOLVEstruct = (SOLVEstruct_t *) SUPERLU_MALLOC(
sizeof(SOLVEstruct_t));
197 dCreate_CompRowLoc_Matrix_dist(A,m,n,nnz_local,nrow_local,first_row,
198 values,col_index,row_start,
199 SLU_NR_loc,SLU_D,SLU_GE);
202 set_default_options_dist(options);
205 options->Trans = NOTRANS;
210 options->RowPerm=LargeDiag;
214 options->ColPerm=MMD_AT_PLUS_A;
217 if (allow_permutations==0)
219 options->ColPerm=NATURAL;
220 options->RowPerm=NATURAL;
228 options->IterRefine = SLU_DOUBLE;
233 options->PrintStat = YES;
237 options->PrintStat = NO;
242 if ((!iam)&&(doc==0))
244 printf(
"\nPerforming SuperLU_DIST setup\n");
245 printf(
"Process grid\t%d X %d\n", grid->nprow, grid->npcol);
246 print_options_dist(options);
250 ScalePermstructInit(m, n, ScalePermstruct);
251 LUstructInit(m, n, LUstruct);
254 Glu_persist = LUstruct->Glu_persist;
255 Astore = (NRformat_loc *) A->Store;
256 nnz_loc = Astore->nnz_loc;
257 m_loc = Astore->m_loc;
258 fst_row = Astore->fst_row;
260 rowptr = Astore->rowptr;
261 colind = Astore->colind;
264 Fact = options->Fact;
265 factored = (Fact == FACTORED);
266 Equil = (!factored && options->Equil == YES);
267 notran = (options->Trans == NOTRANS);
268 rowequ = colequ = FALSE;
269 if ( factored || (Fact == SamePattern_SameRowPerm && Equil) )
271 rowequ = (ScalePermstruct->DiagScale == ROW) ||
272 (ScalePermstruct->DiagScale == BOTH);
273 colequ = (ScalePermstruct->DiagScale == COL) ||
274 (ScalePermstruct->DiagScale == BOTH);
278 rowequ = colequ = FALSE;
283 if ( Fact < 0 || Fact > FACTORED )
287 else if ( options->RowPerm < 0 || options->RowPerm > MY_PERMR )
291 else if ( options->ColPerm < 0 || options->ColPerm > MY_PERMC )
295 else if ( options->IterRefine < 0 || options->IterRefine > SLU_EXTRA )
299 else if ( options->IterRefine == SLU_EXTRA )
302 fprintf(stderr,
"Extra precise iterative refinement yet to support.\n");
304 else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NR_loc
305 || A->Dtype != SLU_D || A->Mtype != SLU_GE )
309 else if ( ldb < m_loc )
319 printf(
"Trouble in pdgstrf. Info=%i\n",*info);
322 printf(
"Error in options.\n");
326 printf(
"Error in matrix.\n");
330 printf(
"ldb < m_loc\n");
334 printf(
"nrhs < 0\n");
340 perm_r = ScalePermstruct->perm_r;
341 perm_c = ScalePermstruct->perm_c;
342 etree = LUstruct->etree;
343 R = ScalePermstruct->R;
344 C = ScalePermstruct->C;
351 switch ( ScalePermstruct->DiagScale )
354 if ( !(R = (
double *) doubleMalloc_dist(m)) )
355 ABORT(
"Malloc fails for R[].");
356 if ( !(C = (
double *) doubleMalloc_dist(n)) )
357 ABORT(
"Malloc fails for C[].");
358 ScalePermstruct->R =
R;
359 ScalePermstruct->C = C;
362 if ( !(C = (
double *) doubleMalloc_dist(n)) )
363 ABORT(
"Malloc fails for C[].");
364 ScalePermstruct->C = C;
367 if ( !(R = (
double *) doubleMalloc_dist(m)) )
368 ABORT(
"Malloc fails for R[].");
369 ScalePermstruct->R =
R;
379 t = SuperLU_timer_();
381 if ( Fact == SamePattern_SameRowPerm )
384 switch ( ScalePermstruct->DiagScale )
390 for (j = 0; j < m_loc; ++j)
392 for (i = rowptr[j]; i < rowptr[j+1]; ++
i)
400 for (j = 0; j < m_loc; ++j)
402 for (i = rowptr[j]; i < rowptr[j+1]; ++
i)
411 for (j = 0; j < m_loc; ++j)
413 for (i = rowptr[j]; i < rowptr[j+1]; ++
i)
416 a[
i] *= R[irow] * C[icol];
427 pdgsequ(A, R, C, &rowcnd, &colcnd, &amax, &iinfo, grid);
430 pdlaqgs(A, R, C, rowcnd, colcnd, amax, equed);
432 if ( lsame_(equed,
"R") )
434 ScalePermstruct->DiagScale = rowequ = ROW;
436 else if ( lsame_(equed,
"C") )
438 ScalePermstruct->DiagScale = colequ = COL;
440 else if ( lsame_(equed,
"B") )
442 ScalePermstruct->DiagScale = BOTH;
447 ScalePermstruct->DiagScale = NOEQUIL;
450 stat.utime[EQUIL] = SuperLU_timer_() -
t;
462 if ( Fact != SamePattern_SameRowPerm )
464 need_value = (options->RowPerm == LargeDiag);
465 pdCompRow_loc_to_CompCol_global(need_value, A, grid, &GA);
466 GAstore = (NCformat *) GA.Store;
467 colptr = GAstore->colptr;
468 rowind = GAstore->rowind;
470 if ( need_value ) a_GA = GAstore->nzval;
471 else assert(GAstore->nzval == NULL);
477 if ( options->RowPerm != NO )
479 t = SuperLU_timer_();
480 if ( Fact != SamePattern_SameRowPerm )
482 if ( options->RowPerm == MY_PERMR )
486 for (i = 0; i < colptr[n]; ++
i)
489 rowind[
i] = perm_r[irow];
499 if ( !(R1 = doubleMalloc_dist(m)) )
501 ABORT(
"SUPERLU_MALLOC fails for R1[]");
503 if ( !(C1 = doubleMalloc_dist(n)) )
505 ABORT(
"SUPERLU_MALLOC fails for C1[]");
512 dldperm(job, m, nnz, colptr, rowind, a_GA, perm_r, R1, C1);
514 MPI_Bcast( perm_r, m, mpi_int_t, 0, grid->comm );
515 if ( job == 5 && Equil )
517 MPI_Bcast( R1, m, MPI_DOUBLE, 0, grid->comm );
518 MPI_Bcast( C1, n, MPI_DOUBLE, 0, grid->comm );
523 MPI_Bcast( perm_r, m, mpi_int_t, 0, grid->comm );
524 if ( job == 5 && Equil )
526 MPI_Bcast( R1, m, MPI_DOUBLE, 0, grid->comm );
527 MPI_Bcast( C1, n, MPI_DOUBLE, 0, grid->comm );
535 for (i = 0; i < n; ++
i)
543 for (j = 0; j < m_loc; ++j)
545 for (i = rowptr[j]; i < rowptr[j+1]; ++
i)
548 a[
i] *= R1[irow] * C1[icol];
556 for (i = 0; i < m; ++
i)
563 for (i = 0; i < m; ++
i)
570 for (i = 0; i < n; ++
i)
577 for (i = 0; i < n; ++
i)
583 ScalePermstruct->DiagScale = BOTH;
589 for (j = 0; j < n; ++j)
591 for (i = colptr[j]; i < colptr[j+1]; ++
i)
594 rowind[
i] = perm_r[irow];
602 for (j = 0; j < n; ++j)
604 for (i = colptr[j]; i < colptr[j+1]; ++
i)
607 rowind[
i] = perm_r[irow];
613 t = SuperLU_timer_() -
t;
614 stat.utime[ROWPERM] =
t;
619 for (i = 0; i <m; ++
i) perm_r[i] = i;
623 if ( !factored || options->IterRefine )
627 *(
unsigned char *)norm =
'1';
629 *(
unsigned char *)norm =
'I';
630 anorm = pdlangs(norm, A, grid);
639 t = SuperLU_timer_();
648 permc_spec = options->ColPerm;
649 if ( permc_spec != MY_PERMC && Fact == DOFACT )
651 get_perm_c_dist(iam, permc_spec, &GA, perm_c);
654 stat.utime[COLPERM] = SuperLU_timer_() -
t;
660 if ( Fact != SamePattern_SameRowPerm )
662 int_t *GACcolbeg, *GACcolend, *GACrowind;
664 sp_colorder(options, &GA, perm_c, etree, &GAC);
667 GACstore = GAC.Store;
668 GACcolbeg = GACstore->colbeg;
669 GACcolend = GACstore->colend;
670 GACrowind = GACstore->rowind;
671 for (j = 0; j < n; ++j)
673 for (i = GACcolbeg[j]; i < GACcolend[j]; ++
i)
676 GACrowind[
i] = perm_c[irow];
682 t = SuperLU_timer_();
683 if ( !(Glu_freeable = (Glu_freeable_t *)
684 SUPERLU_MALLOC(
sizeof(Glu_freeable_t))) )
686 ABORT(
"Malloc fails for Glu_freeable.");
690 iinfo = symbfact(options, iam, &GAC, perm_c, etree,
691 Glu_persist, Glu_freeable);
693 stat.utime[SYMBFAC] = SuperLU_timer_() -
t;
697 QuerySpace_dist(n, -iinfo, Glu_freeable, &symb_mem_usage);
703 fprintf(stderr,
"symbfact() error returns %d\n", iinfo);
710 for (j = 0; j < nnz_loc; ++j)
712 colind[j] = perm_c[colind[j]];
718 t = SuperLU_timer_();
719 dist_mem_use = pddistribute(Fact, n, A, ScalePermstruct,
720 Glu_freeable, LUstruct, grid);
721 stat.utime[DIST] = SuperLU_timer_() -
t;
724 if ( Fact != SamePattern_SameRowPerm )
726 iinfo = symbfact_SubFree(Glu_freeable);
727 SUPERLU_FREE(Glu_freeable);
731 t = SuperLU_timer_();
732 pdgstrf(options, m, n, anorm, LUstruct, grid, &stat, info);
733 stat.utime[FACT] = SuperLU_timer_() -
t;
736 if ( Fact != SamePattern_SameRowPerm )
738 Destroy_CompCol_Matrix_dist(&GA);
739 Destroy_CompCol_Permuted_dist(&GAC);
745 printf(
"Trouble in pdgstrf. Info=%i\n",*info);
748 printf(
"U(%i,%i) is exactly zero. The factorization has\n",*info,*info);
749 printf(
"been completed, but the factor U is exactly singular,\n");
750 printf(
"and division by zero will occur if it is used to solve a\n");
751 printf(
"system of equations.\n");
755 printf(
"The %i-th argument had an illegal value.\n", *info);
762 if ( options->SolveInitialized == NO )
764 dSolveInit(options, A, perm_r, perm_c, nrhs, LUstruct, grid,
768 if ( options->IterRefine )
770 if ( options->RefineInitialized == NO || Fact == DOFACT )
773 if ( options->RefineInitialized )
775 pdgsmv_finalize(SOLVEstruct->gsmv_comm);
777 pdgsmv_init(A, SOLVEstruct->row_to_proc, grid,
778 SOLVEstruct->gsmv_comm);
780 options->RefineInitialized = YES;
785 if ((doc==0) && (!iam))
787 printf(
"\nstats after setup....\n");
788 PStatPrint(options, &stat, grid);
795 superlu_data->
options = options;
799 superlu_data->
colequ = colequ;
800 superlu_data->
rowequ = rowequ;
801 superlu_data->
anorm = anorm;
802 *data = superlu_data;
814 grid = superlu_data->
grid;
818 if ( iam >= nprow * npcol )
823 if ((doc==0)&&(!iam))
825 printf(
"\nPerforming SuperLU_DIST solve\n");
832 options = superlu_data->
options;
836 colequ = superlu_data->
colequ;
837 rowequ = superlu_data->
rowequ;
838 anorm = superlu_data->
anorm;
841 Astore = (NRformat_loc *) A->Store;
842 nnz_loc = Astore->nnz_loc;
843 m_loc = Astore->m_loc;
844 fst_row = Astore->fst_row;
845 colind = Astore->colind;
846 R = ScalePermstruct->R;
847 C = ScalePermstruct->C;
850 Fact = options->Fact;
851 factored = (Fact == FACTORED);
852 Equil = (!factored && options->Equil == YES);
853 notran = (options->Trans == NOTRANS);
863 for (j = 0; j < nrhs; ++j)
866 for (i = 0; i < m_loc; ++
i)
878 for (j = 0; j < nrhs; ++j)
881 for (i = 0; i < m_loc; ++
i)
892 if ( !(X = doubleMalloc_dist(((
size_t)ldx) * nrhs)) )
894 ABORT(
"Malloc fails for X[]");
898 for (j = 0; j < nrhs; ++j)
900 for (i = 0; i < m_loc; ++
i)
912 pdgstrs(n, LUstruct, ScalePermstruct, grid, X, m_loc,
913 fst_row, ldb, nrhs, SOLVEstruct, &stat, info);
917 printf(
"Trouble in pdgstrs. Info=%i\n",*info);
918 printf(
"The %i-th argument had an illegal value.\n", *info);
925 if ( options->IterRefine )
928 int_t *it, *colind_gsmv = SOLVEstruct->A_colind_gsmv;
929 SOLVEstruct_t *SOLVEstruct1;
931 t = SuperLU_timer_();
932 if ( options->RefineInitialized == NO || Fact == DOFACT )
938 SUPERLU_FREE(colind_gsmv);
940 if ( !(it = intMalloc_dist(nnz_loc)) )
942 ABORT(
"Malloc fails for colind_gsmv[]");
944 colind_gsmv = SOLVEstruct->A_colind_gsmv = it;
945 for (i = 0; i < nnz_loc; ++
i)
947 colind_gsmv[
i] = colind[
i];
950 else if ( Fact == SamePattern || Fact == SamePattern_SameRowPerm )
956 for (i = 0; i < m_loc; ++
i)
960 for (j = rowptr[i]; j < rowptr[i+1]; ++j)
963 p = SOLVEstruct->row_to_proc[jcol];
967 at = a[k]; a[k] = a[j]; a[j] = at;
975 for (i = 0; i < nnz_loc; ++
i)
977 colind[
i] = colind_gsmv[
i];
982 if ( !(berr = doubleMalloc_dist(nrhs)) )
984 ABORT(
"Malloc fails for berr[].");
987 pdgsrfs(n, A, anorm, LUstruct, ScalePermstruct, grid,
988 B, ldb, X, ldx, nrhs, SOLVEstruct, berr, &stat, info);
990 stat.utime[REFINE] = SuperLU_timer_() -
t;
995 printf(
"Trouble in pdgsrfs. Info=%i\n",*info);
996 printf(
"The %i-th argument had an illegal value.\n", *info);
1000 if ((doc==0) && (!iam))
1002 printf(
"\nstats after solve....\n");
1003 PStatPrint(options, &stat, grid);
1007 pdPermute_Dense_Matrix(fst_row, m_loc, SOLVEstruct->row_to_proc,
1008 SOLVEstruct->inv_perm_c,
1009 X, ldx, B, ldb, nrhs, grid);
1018 for (j = 0; j < nrhs; ++j)
1021 for (i = 0; i < m_loc; ++
i)
1023 b_col[
i] *= C[irow];
1033 for (j = 0; j < nrhs; ++j)
1036 for (i = 0; i < m_loc; ++
i)
1038 b_col[
i] *= R[irow];
1046 if ( options->IterRefine )
1061 grid = superlu_data->
grid;
1064 int iam = grid->iam;
1065 if ( iam >= nprow * npcol )
goto out;
1066 if ((doc==0)&&(!iam))
1068 printf(
"\nCleaning up memory allocated for SuperLU_DIST\n");
1074 A = superlu_data->
A;
1075 options = superlu_data->
options;
1083 R = ScalePermstruct->R;
1084 C = ScalePermstruct->C;
1087 Fact = options->Fact;
1088 factored = (Fact == FACTORED);
1089 Equil = (!factored && options->Equil == YES);
1090 notran = (options->Trans == NOTRANS);
1093 if ( Equil && Fact != SamePattern_SameRowPerm )
1095 switch ( ScalePermstruct->DiagScale )
1111 ScalePermstructFree(ScalePermstruct);
1112 Destroy_LU(n, grid, LUstruct);
1113 LUstructFree(LUstruct);
1114 dSolveFinalize(options, SOLVEstruct);
1118 Destroy_SuperMatrix_Store_dist(A);
1122 SUPERLU_FREE(ScalePermstruct);
1123 SUPERLU_FREE(LUstruct);
1124 SUPERLU_FREE(SOLVEstruct);
1125 SUPERLU_FREE(options);
1129 superlu_gridexit(grid);
1132 SUPERLU_FREE(superlu_data);
1179 int n_in,
int nnz_in,
1181 int *row_index,
int *col_start,
1182 double *b,
int nprow,
int npcol,
1183 int doc,
void **data,
int *info,
1187 superlu_options_t *options;
1191 ScalePermstruct_t *ScalePermstruct;
1192 LUstruct_t *LUstruct;
1201 int_t job, rowequ, colequ, iinfo, need_value,
i, j, irow;
1203 int_t *colptr, *rowind;
1204 int_t Equil, factored, notran, permc_spec, dist_mem_use;
1207 Glu_persist_t *Glu_persist;
1208 Glu_freeable_t *Glu_freeable;
1212 double *a, *X, *b_col;
1214 double *C, *
R, *C1, *R1, *b_work, *bcol, *x_col;
1215 double amax,
t, colcnd, rowcnd, anorm;
1216 char equed[1], norm[1];
1219 static mem_usage_t num_mem_usage, symb_mem_usage;
1250 grid = (gridinfo_t *) SUPERLU_MALLOC(
sizeof(gridinfo_t));
1251 superlu_gridinit(comm, nprow, npcol, grid);
1252 superlu_data->
grid = grid;
1256 if ( iam >= nprow * npcol )
1262 options = (superlu_options_t *) SUPERLU_MALLOC(
sizeof(superlu_options_t));
1263 A = (SuperMatrix *) SUPERLU_MALLOC(
sizeof(SuperMatrix));
1264 AC = (SuperMatrix *) SUPERLU_MALLOC(
sizeof(SuperMatrix));
1266 (ScalePermstruct_t *) SUPERLU_MALLOC(
sizeof(ScalePermstruct_t));
1267 LUstruct = (LUstruct_t *) SUPERLU_MALLOC(
sizeof(LUstruct_t));
1270 set_default_options_dist(options);
1273 options->Trans = NOTRANS;
1278 options->RowPerm=LargeDiag;
1282 options->ColPerm=MMD_AT_PLUS_A;
1285 if (allow_permutations==0)
1287 options->ColPerm=NATURAL;
1288 options->RowPerm=NATURAL;
1293 options->IterRefine = SLU_DOUBLE;
1298 options->PrintStat = YES;
1302 options->PrintStat = NO;
1306 if ((!iam)&&(doc==0))
1308 printf(
"\nPerforming SuperLU_DIST setup\n");
1309 printf(
"Process grid\t%d X %d\n", grid->nprow, grid->npcol);
1310 print_options_dist(options);
1315 dCreate_CompCol_Matrix_dist(A,m,n,nnz,values,row_index,col_start,
1316 SLU_NC,SLU_D,SLU_GE);
1319 ScalePermstructInit(m, n, ScalePermstruct);
1320 LUstructInit(m, n, LUstruct);
1324 Fact = options->Fact;
1325 if ( Fact < 0 || Fact > FACTORED )
1329 else if ( options->RowPerm < 0 || options->RowPerm > MY_PERMR )
1333 else if ( options->ColPerm < 0 || options->ColPerm > MY_PERMC )
1337 else if ( options->IterRefine < 0 || options->IterRefine > SLU_EXTRA )
1341 else if ( options->IterRefine == SLU_EXTRA )
1344 fprintf(stderr,
"Extra precise iterative refinement yet to support.\n");
1346 else if ( A->nrow != A->ncol || A->nrow < 0 ||
1347 A->Stype != SLU_NC || A->Dtype != SLU_D || A->Mtype != SLU_GE )
1351 else if ( ldb < A->nrow )
1355 else if ( nrhs < 0 )
1361 printf(
"Trouble in pdgstrf. Info=%i\n",-*info);
1364 printf(
"Error in options.\n");
1368 printf(
"Error in matrix.\n");
1372 printf(
"ldb < A->nrow\n");
1376 printf(
"nrhs < 0\n");
1382 factored = (Fact == FACTORED);
1383 Equil = (!factored && options->Equil == YES);
1384 notran = (options->Trans == NOTRANS);
1389 colptr = Astore->colptr;
1390 rowind = Astore->rowind;
1391 if ( factored || (Fact == SamePattern_SameRowPerm && Equil) )
1393 rowequ = (ScalePermstruct->DiagScale == ROW) ||
1394 (ScalePermstruct->DiagScale == BOTH);
1395 colequ = (ScalePermstruct->DiagScale == COL) ||
1396 (ScalePermstruct->DiagScale == BOTH);
1400 rowequ = colequ = FALSE;
1403 perm_r = ScalePermstruct->perm_r;
1404 perm_c = ScalePermstruct->perm_c;
1405 etree = LUstruct->etree;
1406 R = ScalePermstruct->R;
1407 C = ScalePermstruct->C;
1408 Glu_persist = LUstruct->Glu_persist;
1412 switch ( ScalePermstruct->DiagScale )
1415 if ( !(R = (
double *) doubleMalloc_dist(m)) )
1416 ABORT(
"Malloc fails for R[].");
1417 if ( !(C = (
double *) doubleMalloc_dist(n)) )
1418 ABORT(
"Malloc fails for C[].");
1419 ScalePermstruct->R =
R;
1420 ScalePermstruct->C = C;
1423 if ( !(C = (
double *) doubleMalloc_dist(n)) )
1424 ABORT(
"Malloc fails for C[].");
1425 ScalePermstruct->C = C;
1428 if ( !(R = (
double *) doubleMalloc_dist(m)) )
1429 ABORT(
"Malloc fails for R[].");
1430 ScalePermstruct->R =
R;
1440 t = SuperLU_timer_();
1442 if ( Fact == SamePattern_SameRowPerm )
1445 switch ( ScalePermstruct->DiagScale )
1450 for (j = 0; j < n; ++j)
1452 for (i = colptr[j]; i < colptr[j+1]; ++
i)
1460 for (j = 0; j < n; ++j)
1462 for (i = colptr[j]; i < colptr[j+1]; ++
i)
1469 for (j = 0; j < n; ++j)
1471 for (i = colptr[j]; i < colptr[j+1]; ++
i)
1474 a[
i] *= R[irow] * C[j];
1485 dgsequ_dist(A, R, C, &rowcnd, &colcnd, &amax, &iinfo);
1487 MPI_Bcast( &iinfo, 1, mpi_int_t, 0, grid->comm );
1490 MPI_Bcast( R, m, MPI_DOUBLE, 0, grid->comm );
1491 MPI_Bcast( C, n, MPI_DOUBLE, 0, grid->comm );
1492 MPI_Bcast( &rowcnd, 1, MPI_DOUBLE, 0, grid->comm );
1493 MPI_Bcast( &colcnd, 1, MPI_DOUBLE, 0, grid->comm );
1494 MPI_Bcast( &amax, 1, MPI_DOUBLE, 0, grid->comm );
1502 fprintf(stderr,
"The %d-th row of A is exactly zero\n",
1506 {fprintf(stderr,
"The %d-th column of A is exactly zero\n",
1514 MPI_Bcast( &iinfo, 1, mpi_int_t, 0, grid->comm );
1517 MPI_Bcast( R, m, MPI_DOUBLE, 0, grid->comm );
1518 MPI_Bcast( C, n, MPI_DOUBLE, 0, grid->comm );
1519 MPI_Bcast( &rowcnd, 1, MPI_DOUBLE, 0, grid->comm );
1520 MPI_Bcast( &colcnd, 1, MPI_DOUBLE, 0, grid->comm );
1521 MPI_Bcast( &amax, 1, MPI_DOUBLE, 0, grid->comm );
1525 ABORT(
"DGSEQU failed\n");
1530 dlaqgs_dist(A, R, C, rowcnd, colcnd, amax, equed);
1531 if ( lsame_(equed,
"R") )
1533 ScalePermstruct->DiagScale = rowequ = ROW;
1535 else if ( lsame_(equed,
"C") )
1537 ScalePermstruct->DiagScale = colequ = COL;
1539 else if ( lsame_(equed,
"B") )
1541 ScalePermstruct->DiagScale = BOTH;
1547 ScalePermstruct->DiagScale = NOEQUIL;
1551 stat.utime[EQUIL] = SuperLU_timer_() -
t;
1558 if ( options->RowPerm != NO )
1560 t = SuperLU_timer_();
1562 if ( Fact == SamePattern_SameRowPerm
1563 || options->RowPerm == MY_PERMR )
1568 for (i = 0; i < colptr[n]; ++
i)
1571 rowind[
i] = perm_r[irow];
1575 else if ( !factored )
1580 if ( !(R1 = (
double *) SUPERLU_MALLOC(m *
sizeof(
double))) )
1581 ABORT(
"SUPERLU_MALLOC fails for R1[]");
1582 if ( !(C1 = (
double *) SUPERLU_MALLOC(n *
sizeof(
double))) )
1583 ABORT(
"SUPERLU_MALLOC fails for C1[]");
1589 dldperm(job, m, nnz, colptr, rowind, a, perm_r, R1, C1);
1591 MPI_Bcast( perm_r, m, mpi_int_t, 0, grid->comm );
1592 if ( job == 5 && Equil )
1594 MPI_Bcast( R1, m, MPI_DOUBLE, 0, grid->comm );
1595 MPI_Bcast( C1, n, MPI_DOUBLE, 0, grid->comm );
1600 MPI_Bcast( perm_r, m, mpi_int_t, 0, grid->comm );
1601 if ( job == 5 && Equil )
1603 MPI_Bcast( R1, m, MPI_DOUBLE, 0, grid->comm );
1604 MPI_Bcast( C1, n, MPI_DOUBLE, 0, grid->comm );
1612 for (i = 0; i < n; ++
i)
1617 for (j = 0; j < n; ++j)
1619 for (i = colptr[j]; i < colptr[j+1]; ++
i)
1622 a[
i] *= R1[irow] * C1[j];
1623 rowind[
i] = perm_r[irow];
1630 for (i = 0; i < m; ++
i)
1637 for (i = 0; i < m; ++
i)
1644 for (i = 0; i < n; ++
i)
1651 for (i = 0; i < n; ++
i)
1657 ScalePermstruct->DiagScale = BOTH;
1658 rowequ = colequ = 1;
1665 for (i = colptr[0]; i < colptr[n]; ++
i)
1668 rowind[
i] = perm_r[irow];
1677 for (j = 0; j < n; ++j)
1679 for (i = colptr[j]; i < colptr[j+1]; ++
i)
1682 rowind[
i] = perm_r[irow];
1688 t = SuperLU_timer_() -
t;
1689 stat.utime[ROWPERM] =
t;
1692 if ( !factored || options->IterRefine )
1697 *(
unsigned char *)norm =
'1';
1701 *(
unsigned char *)norm =
'I';
1703 anorm = dlangs_dist(norm, A);
1713 t = SuperLU_timer_();
1722 permc_spec = options->ColPerm;
1723 if ( permc_spec != MY_PERMC && Fact == DOFACT )
1726 get_perm_c_dist(iam, permc_spec, A, perm_c);
1733 sp_colorder(options, A, perm_c, etree, AC);
1736 ACstore = AC->Store;
1737 for (j = 0; j < n; ++j)
1739 for (i = ACstore->colbeg[j]; i < ACstore->colend[j]; ++i)
1741 irow = ACstore->rowind[
i];
1742 ACstore->rowind[
i] = perm_c[irow];
1745 stat.utime[COLPERM] = SuperLU_timer_() -
t;
1749 if ( Fact != SamePattern_SameRowPerm )
1751 t = SuperLU_timer_();
1752 if ( !(Glu_freeable = (Glu_freeable_t *)
1753 SUPERLU_MALLOC(
sizeof(Glu_freeable_t))) )
1754 ABORT(
"Malloc fails for Glu_freeable.");
1756 iinfo = symbfact(options, iam, AC, perm_c, etree,
1757 Glu_persist, Glu_freeable);
1759 stat.utime[SYMBFAC] = SuperLU_timer_() -
t;
1763 QuerySpace_dist(n, -iinfo, Glu_freeable, &symb_mem_usage);
1769 fprintf(stderr,
"symbfact() error returns %d\n", iinfo);
1776 t = SuperLU_timer_();
1777 dist_mem_use = ddistribute(Fact, n, AC, Glu_freeable, LUstruct, grid);
1778 stat.utime[DIST] = SuperLU_timer_() -
t;
1781 if ( Fact != SamePattern_SameRowPerm )
1783 iinfo = symbfact_SubFree(Glu_freeable);
1784 SUPERLU_FREE(Glu_freeable);
1788 t = SuperLU_timer_();
1789 pdgstrf(options, m, n, anorm, LUstruct, grid, &stat, info);
1790 stat.utime[FACT] = SuperLU_timer_() -
t;
1792 else if ( options->IterRefine )
1798 sp_colorder(options, A, perm_c, NULL, AC);
1803 printf(
"Trouble in pdgstrf. Info=%i\n",*info);
1806 printf(
"U(%i,%i) is exactly zero. The factorization has\n",*info,*info);
1807 printf(
"been completed, but the factor U is exactly singular,\n");
1808 printf(
"and division by zero will occur if it is used to solve a\n");
1809 printf(
"system of equations.\n");
1813 printf(
"The %i-th argument had an illegal value.\n", *info);
1818 if ((doc==0) && (!iam))
1820 printf(
"\nstats after setup....\n");
1821 PStatPrint(options, &stat, grid);
1827 superlu_data->
A = A;
1828 superlu_data->
AC = AC;
1829 superlu_data->
options = options;
1832 superlu_data->
colequ = colequ;
1833 superlu_data->
rowequ = rowequ;
1834 superlu_data->
anorm = anorm;
1835 *data = superlu_data;
1847 grid = superlu_data->
grid;
1851 if ( iam >= nprow * npcol )
1856 if ((doc==0)&&(!iam))
1858 printf(
"\nPerforming SuperLU_DIST solve\n");
1864 A = superlu_data->
A;
1865 AC = superlu_data->
AC;
1866 options = superlu_data->
options;
1869 colequ = superlu_data->
colequ;
1870 rowequ = superlu_data->
rowequ;
1871 anorm = superlu_data->
anorm;
1875 colptr = Astore->colptr;
1876 rowind = Astore->rowind;
1877 R = ScalePermstruct->R;
1878 C = ScalePermstruct->C;
1879 perm_r = ScalePermstruct->perm_r;
1880 perm_c = ScalePermstruct->perm_c;
1883 Fact = options->Fact;
1884 factored = (Fact == FACTORED);
1885 Equil = (!factored && options->Equil == YES);
1886 notran = (options->Trans == NOTRANS);
1892 if ( !(b_work = doubleMalloc_dist(n)) )
1894 ABORT(
"Malloc fails for b_work[]");
1905 for (j = 0; j < nrhs; ++j)
1907 for (i = 0; i < m; ++
i)
1918 for (j = 0; j < nrhs; ++j)
1920 for (i = 0; i < m; ++
i)
1931 if ( options->RowPerm != NO )
1936 for (j = 0; j < nrhs; ++j)
1938 for (i = 0; i < m; ++
i)
1940 b_work[perm_r[
i]] = b_col[
i];
1942 for (i = 0; i < m; ++
i)
1944 b_col[
i] = b_work[
i];
1958 for (j = 0; j < nrhs; ++j)
1960 for (i = 0; i < m; ++
i)
1962 b_work[perm_c[
i]] = b_col[
i];
1964 for (i = 0; i < m; ++
i)
1966 b_col[
i] = b_work[
i];
1975 if ( !(X = doubleMalloc_dist(((
size_t)ldx) * nrhs)) )
1977 ABORT(
"Malloc fails for X[]");
1982 for (j = 0; j < nrhs; ++j)
1984 for (i = 0; i < ldb; ++
i)
1986 x_col[
i] = b_col[
i];
1995 pdgstrs_Bglobal(n, LUstruct, grid, X, ldb, nrhs, &stat, info);
1998 printf(
"Trouble in pdgstrs_Bglobal. Info=%i\n",*info);
1999 printf(
"The %i-th argument had an illegal value.\n", *info);
2006 if ( options->IterRefine )
2009 t = SuperLU_timer_();
2012 if ( !(berr = doubleMalloc_dist(nrhs)) )
2014 ABORT(
"Malloc fails for berr[].");
2017 pdgsrfs_ABXglobal(n, AC, anorm, LUstruct, grid, B, ldb,
2018 X, ldx, nrhs, berr, &stat, info);
2021 printf(
"Trouble in pdgsrfs_ABXglobal. Info=%i\n",*info);
2022 printf(
"The %i-th argument had an illegal value.\n", *info);
2024 stat.utime[REFINE] = SuperLU_timer_() -
t;
2028 if ((doc==0) && (!iam))
2030 printf(
"\nstats after solve....\n");
2031 PStatPrint(options, &stat, grid);
2035 for (j = 0; j < nrhs; j++)
2039 for (i = 0; i < n; ++
i)
2041 b_col[
i] = x_col[perm_c[
i]];
2052 for (j = 0; j < nrhs; ++j)
2054 for (i = 0; i < n; ++
i)
2065 for (j = 0; j < nrhs; ++j)
2067 for (i = 0; i < n; ++
i)
2077 if ( options->IterRefine )
2081 SUPERLU_FREE(b_work);
2093 grid = superlu_data->
grid;
2097 if ( iam >= nprow * npcol )
goto out;
2098 if ((doc==0)&&(!iam))
2100 printf(
"\nCleaning up memory allocated for SuperLU_DIST\n");
2106 A = superlu_data->
A;
2107 AC = superlu_data->
AC;
2108 options = superlu_data->
options;
2115 R = ScalePermstruct->R;
2116 C = ScalePermstruct->C;
2119 Fact = options->Fact;
2120 factored = (Fact == FACTORED);
2121 Equil = (!factored && options->Equil == YES);
2122 rowequ = colequ = FALSE;
2123 if ( factored || (Fact == SamePattern_SameRowPerm && Equil) )
2125 rowequ = (ScalePermstruct->DiagScale == ROW) ||
2126 (ScalePermstruct->DiagScale == BOTH);
2127 colequ = (ScalePermstruct->DiagScale == COL) ||
2128 (ScalePermstruct->DiagScale == BOTH);
2132 rowequ = colequ = FALSE;
2136 if ( Equil && Fact != SamePattern_SameRowPerm )
2138 switch ( ScalePermstruct->DiagScale )
2152 if ( !factored || (factored && options->IterRefine) )
2154 Destroy_CompCol_Permuted_dist(AC);
2158 ScalePermstructFree(ScalePermstruct);
2159 Destroy_LU(n, grid, LUstruct);
2160 LUstructFree(LUstruct);
2163 Destroy_SuperMatrix_Store_dist(A);
2168 SUPERLU_FREE(ScalePermstruct);
2169 SUPERLU_FREE(LUstruct);
2170 SUPERLU_FREE(options);
2174 superlu_gridexit(grid);
2177 SUPERLU_FREE(superlu_data);
void superlu_dist_global_matrix(int opt_flag, int allow_permutations, int n_in, int nnz_in, double *values, int *row_index, int *col_start, double *b, int nprow, int npcol, int doc, void **data, int *info, MPI_Comm comm)
SOLVEstruct_t * SOLVEstruct
void superlu_cr_to_cc(int nrow, int ncol, int nnz, double *cr_values, int *cr_index, int *cr_start, double **cc_values, int **cc_index, int **cc_start)
superlu_options_t * options
void superlu_dist_distributed_matrix(int opt_flag, int allow_permutations, int n, int nnz_local, int nrow_local, int first_row, double *values, int *col_index, int *row_start, double *b, int nprow, int npcol, int doc, void **data, int *info, MPI_Comm comm)
ScalePermstruct_t * ScalePermstruct