superlu_dist.c
Go to the documentation of this file.
1 
2 
3 /*----------------------------------------------------------------
4  * Interface to distributed SuperLU, created by JWB by adapting
5  * code in the superlu distribution, i.e. files /SRC/pdgssvx.c
6  * (function pdgssvx solves a system of linear equations) and
7  * /EXAMPLE/pddrive.c demo (a driver program to illustrate how to
8  * use pdgssvx).
9  * Essentially the code below performs the same functions as
10  * pdgssvx in a modified order which allows resolves. Much of the
11  * code taken from pdgssvx remains essentially unchanged other
12  * than changing the layout to match the oomph-lib standard. Comments
13  * from the original code have been left unchanged whenever possible
14  * to help match this code with that found in pdgssvx.c
15  *
16  * To update this driver code for use with later versions of
17  * Distributed SuperLU I suggest first looking at changes (if any) to
18  * the two distributed SuperLU files, and then making the corresponding
19  * changes to the code below.
20  *
21  * Adapted from code found in:
22  * -- Distributed SuperLU routine (version 2.0) --
23  * Lawrence Berkeley National Lab, Univ. of California Berkeley.
24  * March 15, 2003
25  *
26  *----------------------------------------------------------------
27  */
28 #include <math.h>
29 #ifdef USING_OOMPH_SUPERLU_DIST
30 #include "oomph_superlu_dist_3.0.h"
31 #else
32 #include<superlu_defs.h>
33 #include<superlu_ddefs.h>
34 #include<Cnames.h>
35 #include<machines.h>
36 #include<psymbfact.h>
37 #include<supermatrix.h>
38 #include<old_colamd.h>
39 #include<util_dist.h>
40 #endif
41 
42 
43 
44 /* ================================================= */
45 /* Struct for the lu factors */
46 /* ================================================= */
47 typedef struct
48 {
49  gridinfo_t* grid;
50  SuperMatrix* A;
51  SuperMatrix* AC;
52  ScalePermstruct_t* ScalePermstruct;
53  LUstruct_t* LUstruct;
54  SOLVEstruct_t* SOLVEstruct;
55  superlu_options_t* options;
56  int_t rowequ;
57  int_t colequ;
58  double anorm;
60 
61 //=============================================================================
62 // helper method - just calls the superlu method dCompRow_to_CompCol to convert
63 // the c-style vectors of a cr matrix to a cc matrix
64 //=============================================================================
65 void superlu_cr_to_cc(int nrow, int ncol, int nnz, double* cr_values,
66  int* cr_index, int* cr_start, double** cc_values,
67  int** cc_index, int** cc_start)
68 {
69  dCompRow_to_CompCol(nrow,ncol,nnz,cr_values,cr_index,cr_start,cc_values,
70  cc_index,cc_start);
71 }
72 
73 /*----------------------------------------------------------------
74  * Bridge to distributed SuperLU with distributed memory (version 2.0).
75  * Requires input of system matrix in compressed row form.
76  *
77  * Parameters:
78  * op_flag = int specifies the operation:
79  * 1, performs LU decomposition for the first time
80  * 2, performs triangular solve
81  * 3, free all the storage in the end
82  * - n = size of system (square matrix)
83  * - nnz = # of nonzero entries
84  * - values = 1D C array of nonzero entries
85  * - row_index = 1D C array of row indices
86  * - column_start = 1D C array of column start indices
87  * - b = 1D C array representing the rhs vector, is overwritten
88  * by solution.
89  * - nprow = # of rows in process grid
90  * - npcol = # of columns in process grid
91  * - doc = 0/1 for doc/no doc
92  * - data = pointer to structure to contain LU solver data.
93  * If *opt_flag == 1, it is an output. Otherwise, it it an input.
94  *
95  * Return value for *info:
96  * = 0: successful exit
97  * > 0: if *info = i, and i is
98  * <= A->ncol: U(i,i) is exactly zero. The factorization has
99  * been completed, but the factor U is exactly singular,
100  * so the solution could not be computed.
101  * > A->ncol: number of bytes allocated when memory allocation
102  * failure occurred, plus A->ncol.
103  * < 0: some other error
104  *----------------------------------------------------------------
105  */
106 void superlu_dist_distributed_matrix(int opt_flag, int allow_permutations,
107  int n, int nnz_local,
108  int nrow_local,int first_row,
109  double *values, int *col_index,
110  int *row_start, double *b,
111  int nprow, int npcol,
112  int doc, void **data, int *info,
113  MPI_Comm comm)
114 {
115  /* Some SuperLU structures */
116  superlu_options_t *options;
117  SuperLUStat_t stat;
118  SuperMatrix *A;
119  ScalePermstruct_t *ScalePermstruct;
120  LUstruct_t *LUstruct;
121  SOLVEstruct_t *SOLVEstruct;
122  gridinfo_t *grid;
123 
124  /* Structure to hold SuperLU structures and data */
125  superlu_dist_data *superlu_data;
126 
127  int_t *perm_r; /* row permutations from partial pivoting */
128  int_t *perm_c; /* column permutation vector */
129  int_t *etree; /* elimination tree */
130  int_t *rowptr, *colind; /* Local A in NR*/
131  int_t job, rowequ, colequ, iinfo, need_value, i, j, irow, icol;
132  int_t m_loc, fst_row, nnz, nnz_loc, dist_mem_use;
133  int_t *colptr, *rowind;
134  NRformat_loc *Astore;
135  SuperMatrix GA; /* Global A in NC format */
136  NCformat *GAstore;
137  double *a_GA;
138  SuperMatrix GAC; /* Global A in NCP format (add n end pointers) */
139  NCPformat *GACstore;
140  Glu_persist_t *Glu_persist;
141  Glu_freeable_t *Glu_freeable;
142 
143  /* Other stuff needed by SuperLU */
144  double *berr;
145  double *a, *X, *b_col;
146  double *B=b;
147  double *C, *R, *C1, *R1, *bcol, *x_col;
148  double amax, t, colcnd, rowcnd, anorm;
149  char equed[1], norm[1];
150  int ldx; /* LDA for matrix X (local). */
151  static mem_usage_t symb_mem_usage;
152  fact_t Fact;
153  int_t Equil, factored, notran, permc_spec;
154 
155  /* We're only doing single rhs problems
156  note: code will need modifying to deal with
157  multiple rhs (see function pdgssvx) */
158 
159  int nrhs=1;
160 
161  /* Square matrix */
162  int m=n;
163 
164  /* Set 'Leading dimension' of rhs vector */
165  int ldb=n;
166 
167  /* Initialize the statistics variables. */
168  PStatInit(&stat);
169 
170  /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
171  SET UP GRID, FACTORS, ETC
172  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
173  if (opt_flag==1)
174  {
175  /* Allocate data structure to store data between calls to this function */
176  superlu_data =
177  (superlu_dist_data *) SUPERLU_MALLOC(sizeof(superlu_dist_data));
178 
179  /* Initialize the superlu process grid. */
180  grid = (gridinfo_t *) SUPERLU_MALLOC(sizeof(gridinfo_t));
181  superlu_gridinit(comm, nprow, npcol, grid);
182  superlu_data->grid = grid;
183 
184  /* Bail out if I do not belong in the grid. */
185  int iam = grid->iam;
186  if ( iam >= nprow * npcol ) return;
187 
188  /* Allocate memory for SuperLU_DIST structures */
189  options = (superlu_options_t *) SUPERLU_MALLOC(sizeof(superlu_options_t));
190  A = (SuperMatrix *) SUPERLU_MALLOC(sizeof(SuperMatrix));
191  ScalePermstruct =
192  (ScalePermstruct_t *) SUPERLU_MALLOC(sizeof(ScalePermstruct_t));
193  LUstruct = (LUstruct_t *) SUPERLU_MALLOC(sizeof(LUstruct_t));
194  SOLVEstruct = (SOLVEstruct_t *) SUPERLU_MALLOC(sizeof(SOLVEstruct_t));
195 
196  /* Create SuperMatrix from compressed row representation */
197  dCreate_CompRowLoc_Matrix_dist(A,m,n,nnz_local,nrow_local,first_row,
198  values,col_index,row_start,
199  SLU_NR_loc,SLU_D,SLU_GE);
200 
201  /* Set the default options */
202  set_default_options_dist(options);
203 
204  /* Is the matrix transposed (NOTRANS or TRANS)? */
205  options->Trans = NOTRANS;
206 
207  /* Row permutations (NATURAL [= do nothing], */
208  /* LargeDiag [default], ...)? */
209  /* options->RowPerm=NATURAL; */
210  options->RowPerm=LargeDiag;
211 
212  /* Column permutations (NATURAL [= do nothing], */
213  /* MMD_AT_PLUS_A [default],...) */
214  options->ColPerm=MMD_AT_PLUS_A;
215 
216  /* Use natural ordering instead? */
217  if (allow_permutations==0)
218  {
219  options->ColPerm=NATURAL;
220  options->RowPerm=NATURAL;
221  }
222 
223 /* printf("\n\n\nSWITCHING OFF EQUILIBRATION\n\n\n"); */
224 /* options->Equil=NO; */
225 
226  /* Iterative refinement (essential as far as I can tell).*/
227  /* Can be "NO" or "DOUBLE"*/
228  options->IterRefine = SLU_DOUBLE;
229 
230  /* Print stats during solve? */
231  if (doc==0)
232  {
233  options->PrintStat = YES;
234  }
235  else
236  {
237  options->PrintStat = NO;
238  }
239 
240 
241  /* Doc output on process 0 if required: */
242  if ((!iam)&&(doc==0))
243  {
244  printf("\nPerforming SuperLU_DIST setup\n");
245  printf("Process grid\t%d X %d\n", grid->nprow, grid->npcol);
246  print_options_dist(options);
247  }
248 
249  /* Initialize ScalePermstruct and LUstruct. */
250  ScalePermstructInit(m, n, ScalePermstruct);
251  LUstructInit(m, n, LUstruct);
252 
253  /* Initialization. */
254  Glu_persist = LUstruct->Glu_persist;
255  Astore = (NRformat_loc *) A->Store;
256  nnz_loc = Astore->nnz_loc;
257  m_loc = Astore->m_loc;
258  fst_row = Astore->fst_row;
259  a = Astore->nzval;
260  rowptr = Astore->rowptr;
261  colind = Astore->colind;
262 
263  job = 5;
264  Fact = options->Fact;
265  factored = (Fact == FACTORED);
266  Equil = (!factored && options->Equil == YES);
267  notran = (options->Trans == NOTRANS);
268  rowequ = colequ = FALSE;
269  if ( factored || (Fact == SamePattern_SameRowPerm && Equil) )
270  {
271  rowequ = (ScalePermstruct->DiagScale == ROW) ||
272  (ScalePermstruct->DiagScale == BOTH);
273  colequ = (ScalePermstruct->DiagScale == COL) ||
274  (ScalePermstruct->DiagScale == BOTH);
275  }
276  else
277  {
278  rowequ = colequ = FALSE;
279  }
280 
281  /* Test the control parameters etc. */
282  *info = 0;
283  if ( Fact < 0 || Fact > FACTORED )
284  {
285  *info = -1;
286  }
287  else if ( options->RowPerm < 0 || options->RowPerm > MY_PERMR )
288  {
289  *info = -1;
290  }
291  else if ( options->ColPerm < 0 || options->ColPerm > MY_PERMC )
292  {
293  *info = -1;
294  }
295  else if ( options->IterRefine < 0 || options->IterRefine > SLU_EXTRA )
296  {
297  *info = -1;
298  }
299  else if ( options->IterRefine == SLU_EXTRA )
300  {
301  *info = -1;
302  fprintf(stderr, "Extra precise iterative refinement yet to support.\n");
303  }
304  else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NR_loc
305  || A->Dtype != SLU_D || A->Mtype != SLU_GE )
306  {
307  *info = -2;
308  }
309  else if ( ldb < m_loc )
310  {
311  *info = -5;
312  }
313  else if ( nrhs < 0 )
314  {
315  *info = -6;
316  }
317  if ( *info )
318  {
319  printf("Trouble in pdgstrf. Info=%i\n",*info);
320  if (*info==-1)
321  {
322  printf("Error in options.\n");
323  }
324  else if (*info==-2)
325  {
326  printf("Error in matrix.\n");
327  }
328  else if (*info==-5)
329  {
330  printf("ldb < m_loc\n");
331  }
332  else if (*info==-6)
333  {
334  printf("nrhs < 0\n");
335  }
336  return;
337  }
338 
339  /* The following arrays are replicated on all processes. */
340  perm_r = ScalePermstruct->perm_r;
341  perm_c = ScalePermstruct->perm_c;
342  etree = LUstruct->etree;
343  R = ScalePermstruct->R;
344  C = ScalePermstruct->C;
345 
346  /* Allocate storage. */
347  if ( Equil )
348  {
349  /* Not factored & ask for equilibration */
350  /* Allocate storage if not done so before. */
351  switch ( ScalePermstruct->DiagScale )
352  {
353  case NOEQUIL:
354  if ( !(R = (double *) doubleMalloc_dist(m)) )
355  ABORT("Malloc fails for R[].");
356  if ( !(C = (double *) doubleMalloc_dist(n)) )
357  ABORT("Malloc fails for C[].");
358  ScalePermstruct->R = R;
359  ScalePermstruct->C = C;
360  break;
361  case ROW:
362  if ( !(C = (double *) doubleMalloc_dist(n)) )
363  ABORT("Malloc fails for C[].");
364  ScalePermstruct->C = C;
365  break;
366  case COL:
367  if ( !(R = (double *) doubleMalloc_dist(m)) )
368  ABORT("Malloc fails for R[].");
369  ScalePermstruct->R = R;
370  break;
371  }
372  }
373 
374  /* ------------------------------------------------------------
375  Diagonal scaling to equilibrate the matrix.
376  ------------------------------------------------------------*/
377  if ( Equil )
378  {
379  t = SuperLU_timer_();
380 
381  if ( Fact == SamePattern_SameRowPerm )
382  {
383  /* Reuse R and C. */
384  switch ( ScalePermstruct->DiagScale )
385  {
386  case NOEQUIL:
387  break;
388  case ROW:
389  irow = fst_row;
390  for (j = 0; j < m_loc; ++j)
391  {
392  for (i = rowptr[j]; i < rowptr[j+1]; ++i)
393  {
394  a[i] *= R[irow]; /* Scale rows. */
395  }
396  ++irow;
397  }
398  break;
399  case COL:
400  for (j = 0; j < m_loc; ++j)
401  {
402  for (i = rowptr[j]; i < rowptr[j+1]; ++i)
403  {
404  icol = colind[i];
405  a[i] *= C[icol]; /* Scale columns. */
406  }
407  }
408  break;
409  case BOTH:
410  irow = fst_row;
411  for (j = 0; j < m_loc; ++j)
412  {
413  for (i = rowptr[j]; i < rowptr[j+1]; ++i)
414  {
415  icol = colind[i];
416  a[i] *= R[irow] * C[icol]; /* Scale rows and cols. */
417  }
418  ++irow;
419  }
420  break;
421  }
422  }
423  else
424  {
425  /* Compute R & C from scratch */
426  /* Compute the row and column scalings. */
427  pdgsequ(A, R, C, &rowcnd, &colcnd, &amax, &iinfo, grid);
428 
429  /* Equilibrate matrix A if it is badly-scaled. */
430  pdlaqgs(A, R, C, rowcnd, colcnd, amax, equed);
431 
432  if ( lsame_(equed, "R") )
433  {
434  ScalePermstruct->DiagScale = rowequ = ROW;
435  }
436  else if ( lsame_(equed, "C") )
437  {
438  ScalePermstruct->DiagScale = colequ = COL;
439  }
440  else if ( lsame_(equed, "B") )
441  {
442  ScalePermstruct->DiagScale = BOTH;
443  rowequ = ROW;
444  colequ = COL;
445  }
446  else
447  ScalePermstruct->DiagScale = NOEQUIL;
448  } /* if Fact ... */
449 
450  stat.utime[EQUIL] = SuperLU_timer_() - t;
451  } /* if Equil ... */
452 
453  if ( !factored )
454  {
455  /* Skip this if already factored. */
456  /*
457  * Gather A from the distributed compressed row format to
458  * global A in compressed column format.
459  * Numerical values are gathered only when a row permutation
460  * for large diagonal is sought after.
461  */
462  if ( Fact != SamePattern_SameRowPerm )
463  {
464  need_value = (options->RowPerm == LargeDiag);
465  pdCompRow_loc_to_CompCol_global(need_value, A, grid, &GA);
466  GAstore = (NCformat *) GA.Store;
467  colptr = GAstore->colptr;
468  rowind = GAstore->rowind;
469  nnz = GAstore->nnz;
470  if ( need_value ) a_GA = GAstore->nzval;
471  else assert(GAstore->nzval == NULL);
472  }
473 
474  /* ------------------------------------------------------------
475  Find the row permutation for A.
476  ------------------------------------------------------------*/
477  if ( options->RowPerm != NO )
478  {
479  t = SuperLU_timer_();
480  if ( Fact != SamePattern_SameRowPerm )
481  {
482  if ( options->RowPerm == MY_PERMR )
483  {
484  /* Use user's perm_r. */
485  /* Permute the global matrix GA for symbfact() */
486  for (i = 0; i < colptr[n]; ++i)
487  {
488  irow = rowind[i];
489  rowind[i] = perm_r[irow];
490  }
491  }
492  else
493  {
494  /* options->RowPerm == LargeDiag */
495  /* Get a new perm_r[] */
496  if ( job == 5 )
497  {
498  /* Allocate storage for scaling factors. */
499  if ( !(R1 = doubleMalloc_dist(m)) )
500  {
501  ABORT("SUPERLU_MALLOC fails for R1[]");
502  }
503  if ( !(C1 = doubleMalloc_dist(n)) )
504  {
505  ABORT("SUPERLU_MALLOC fails for C1[]");
506  }
507  }
508 
509  if ( !iam )
510  {
511  /* Process 0 finds a row permutation */
512  dldperm(job, m, nnz, colptr, rowind, a_GA, perm_r, R1, C1);
513 
514  MPI_Bcast( perm_r, m, mpi_int_t, 0, grid->comm );
515  if ( job == 5 && Equil )
516  {
517  MPI_Bcast( R1, m, MPI_DOUBLE, 0, grid->comm );
518  MPI_Bcast( C1, n, MPI_DOUBLE, 0, grid->comm );
519  }
520  }
521  else
522  {
523  MPI_Bcast( perm_r, m, mpi_int_t, 0, grid->comm );
524  if ( job == 5 && Equil )
525  {
526  MPI_Bcast( R1, m, MPI_DOUBLE, 0, grid->comm );
527  MPI_Bcast( C1, n, MPI_DOUBLE, 0, grid->comm );
528  }
529  }
530 
531  if ( job == 5 )
532  {
533  if ( Equil )
534  {
535  for (i = 0; i < n; ++i)
536  {
537  R1[i] = exp(R1[i]);
538  C1[i] = exp(C1[i]);
539  }
540 
541  /* Scale the distributed matrix */
542  irow = fst_row;
543  for (j = 0; j < m_loc; ++j)
544  {
545  for (i = rowptr[j]; i < rowptr[j+1]; ++i)
546  {
547  icol = colind[i];
548  a[i] *= R1[irow] * C1[icol];
549  }
550  ++irow;
551  }
552 
553  /* Multiply together the scaling factors. */
554  if ( rowequ )
555  {
556  for (i = 0; i < m; ++i)
557  {
558  R[i] *= R1[i];
559  }
560  }
561  else
562  {
563  for (i = 0; i < m; ++i)
564  {
565  R[i] = R1[i];
566  }
567  }
568  if ( colequ )
569  {
570  for (i = 0; i < n; ++i)
571  {
572  C[i] *= C1[i];
573  }
574  }
575  else
576  {
577  for (i = 0; i < n; ++i)
578  {
579  C[i] = C1[i];
580  }
581  }
582 
583  ScalePermstruct->DiagScale = BOTH;
584  rowequ = colequ = 1;
585 
586  } /* end Equil */
587 
588  /* Now permute global A to prepare for symbfact() */
589  for (j = 0; j < n; ++j)
590  {
591  for (i = colptr[j]; i < colptr[j+1]; ++i)
592  {
593  irow = rowind[i];
594  rowind[i] = perm_r[irow];
595  }
596  }
597  SUPERLU_FREE (R1);
598  SUPERLU_FREE (C1);
599  }
600  else
601  { /* job = 2,3,4 */
602  for (j = 0; j < n; ++j)
603  {
604  for (i = colptr[j]; i < colptr[j+1]; ++i)
605  {
606  irow = rowind[i];
607  rowind[i] = perm_r[irow];
608  } /* end for i ... */
609  } /* end for j ... */
610  } /* end else job ... */
611  } /* end if options->RowPerm ... */
612 
613  t = SuperLU_timer_() - t;
614  stat.utime[ROWPERM] = t;
615  } /* end if Fact ... */
616  }
617  else
618  { /* options->RowPerm == NOROWPERM */
619  for (i = 0; i <m; ++i) perm_r[i] = i;
620  }
621  } /* end if (!factored) */
622 
623  if ( !factored || options->IterRefine )
624  {
625  /* Compute norm(A), which will be used to adjust small diagonal. */
626  if ( notran )
627  *(unsigned char *)norm = '1';
628  else
629  *(unsigned char *)norm = 'I';
630  anorm = pdlangs(norm, A, grid);
631  }
632 
633 
634  /* ------------------------------------------------------------
635  Perform the LU factorization.
636  ------------------------------------------------------------*/
637  if ( !factored )
638  {
639  t = SuperLU_timer_();
640  /*
641  * Get column permutation vector perm_c[], according to permc_spec:
642  * permc_spec = NATURAL: natural ordering
643  * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
644  * permc_spec = MMD_ATA: minimum degree on structure of A'*A
645  * permc_spec = COLAMD: approximate minimum degree column ordering
646  * permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
647  */
648  permc_spec = options->ColPerm;
649  if ( permc_spec != MY_PERMC && Fact == DOFACT )
650  {
651  get_perm_c_dist(iam, permc_spec, &GA, perm_c);
652  }
653 
654  stat.utime[COLPERM] = SuperLU_timer_() - t;
655 
656  /* Compute the elimination tree of Pc*(A'+A)*Pc' or Pc*A'*A*Pc'
657  (a.k.a. column etree), depending on the choice of ColPerm.
658  Adjust perm_c[] to be consistent with a postorder of etree.
659  Permute columns of A to form A*Pc'. */
660  if ( Fact != SamePattern_SameRowPerm )
661  {
662  int_t *GACcolbeg, *GACcolend, *GACrowind;
663 
664  sp_colorder(options, &GA, perm_c, etree, &GAC);
665 
666  /* Form Pc*A*Pc' to preserve the diagonal of the matrix GAC. */
667  GACstore = GAC.Store;
668  GACcolbeg = GACstore->colbeg;
669  GACcolend = GACstore->colend;
670  GACrowind = GACstore->rowind;
671  for (j = 0; j < n; ++j)
672  {
673  for (i = GACcolbeg[j]; i < GACcolend[j]; ++i)
674  {
675  irow = GACrowind[i];
676  GACrowind[i] = perm_c[irow];
677  }
678  }
679 
680  /* Perform a symbolic factorization on Pc*Pr*A*Pc' and set up the
681  nonzero data structures for L & U. */
682  t = SuperLU_timer_();
683  if ( !(Glu_freeable = (Glu_freeable_t *)
684  SUPERLU_MALLOC(sizeof(Glu_freeable_t))) )
685  {
686  ABORT("Malloc fails for Glu_freeable.");
687  }
688 
689  /* Every process does this. */
690  iinfo = symbfact(options, iam, &GAC, perm_c, etree,
691  Glu_persist, Glu_freeable);
692 
693  stat.utime[SYMBFAC] = SuperLU_timer_() - t;
694  if ( iinfo < 0 )
695  {
696  /* Successful return */
697  QuerySpace_dist(n, -iinfo, Glu_freeable, &symb_mem_usage);
698  }
699  else
700  {
701  if ( !iam )
702  {
703  fprintf(stderr, "symbfact() error returns %d\n", iinfo);
704  exit(-1);
705  }
706  }
707  } /* end if Fact ... */
708 
709  /* Apply column permutation to the original distributed A */
710  for (j = 0; j < nnz_loc; ++j)
711  {
712  colind[j] = perm_c[colind[j]];
713  }
714 
715  /* Distribute Pc*Pr*diag(R)*A*diag(C)*Pc' into L and U storage.
716  NOTE: the row permutation Pc*Pr is applied internally in the
717  distribution routine. */
718  t = SuperLU_timer_();
719  dist_mem_use = pddistribute(Fact, n, A, ScalePermstruct,
720  Glu_freeable, LUstruct, grid);
721  stat.utime[DIST] = SuperLU_timer_() - t;
722 
723  /* Deallocate storage used in symbolic factorization. */
724  if ( Fact != SamePattern_SameRowPerm )
725  {
726  iinfo = symbfact_SubFree(Glu_freeable);
727  SUPERLU_FREE(Glu_freeable);
728  }
729 
730  /* Perform numerical factorization in parallel. */
731  t = SuperLU_timer_();
732  pdgstrf(options, m, n, anorm, LUstruct, grid, &stat, info);
733  stat.utime[FACT] = SuperLU_timer_() - t;
734 
735  /* Destroy GA and GAC */
736  if ( Fact != SamePattern_SameRowPerm )
737  {
738  Destroy_CompCol_Matrix_dist(&GA);
739  Destroy_CompCol_Permuted_dist(&GAC);
740  }
741  } /* end if (!factored) */
742 
743  if (*info!=0)
744  {
745  printf("Trouble in pdgstrf. Info=%i\n",*info);
746  if (*info>0)
747  {
748  printf("U(%i,%i) is exactly zero. The factorization has\n",*info,*info);
749  printf("been completed, but the factor U is exactly singular,\n");
750  printf("and division by zero will occur if it is used to solve a\n");
751  printf("system of equations.\n");
752  }
753  else
754  {
755  printf("The %i-th argument had an illegal value.\n", *info);
756  }
757  }
758 
759  /* ------------------------------------------------------------
760  Initialize the solver.
761  ------------------------------------------------------------*/
762  if ( options->SolveInitialized == NO )
763  {
764  dSolveInit(options, A, perm_r, perm_c, nrhs, LUstruct, grid,
765  SOLVEstruct);
766  }
767 
768  if ( options->IterRefine )
769  {
770  if ( options->RefineInitialized == NO || Fact == DOFACT )
771  {
772  /* All these cases need to re-initialize gsmv structure */
773  if ( options->RefineInitialized )
774  {
775  pdgsmv_finalize(SOLVEstruct->gsmv_comm);
776  }
777  pdgsmv_init(A, SOLVEstruct->row_to_proc, grid,
778  SOLVEstruct->gsmv_comm);
779 
780  options->RefineInitialized = YES;
781  }
782  }
783 
784  /* Print the statistics. */
785  if ((doc==0) && (!iam))
786  {
787  printf("\nstats after setup....\n");
788  PStatPrint(options, &stat, grid);
789  }
790 
791  /* ------------------------------------------------------------
792  Set up data structure.
793  ------------------------------------------------------------*/
794  superlu_data->A = A;
795  superlu_data->options = options;
796  superlu_data->ScalePermstruct = ScalePermstruct;
797  superlu_data->LUstruct = LUstruct;
798  superlu_data->SOLVEstruct = SOLVEstruct;
799  superlu_data->colequ = colequ;
800  superlu_data->rowequ = rowequ;
801  superlu_data->anorm = anorm;
802  *data = superlu_data;
803 
804  } /* End of setup */
805 
806 
807  /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
808  PERFORM A SOLVE
809  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
810  if (opt_flag==2)
811  {
812  /* Get pointer to the grid */
813  superlu_data = (superlu_dist_data *)*data;
814  grid = superlu_data->grid;
815 
816  /* Bail out if I do not belong in the grid. */
817  int iam = grid->iam;
818  if ( iam >= nprow * npcol )
819  {
820  return;
821  }
822 
823  if ((doc==0)&&(!iam))
824  {
825  printf("\nPerforming SuperLU_DIST solve\n");
826  }
827 
828  /* ------------------------------------------------------------
829  Set other pointers to data structure.
830  ------------------------------------------------------------*/
831  A = superlu_data->A;
832  options = superlu_data->options;
833  ScalePermstruct = superlu_data->ScalePermstruct;
834  LUstruct = superlu_data->LUstruct;
835  SOLVEstruct = superlu_data->SOLVEstruct;
836  colequ = superlu_data->colequ;
837  rowequ = superlu_data->rowequ;
838  anorm = superlu_data->anorm;
839 
840  /* Initialization. */
841  Astore = (NRformat_loc *) A->Store;
842  nnz_loc = Astore->nnz_loc;
843  m_loc = Astore->m_loc;
844  fst_row = Astore->fst_row;
845  colind = Astore->colind;
846  R = ScalePermstruct->R;
847  C = ScalePermstruct->C;
848 
849  /* Local control paramaters */
850  Fact = options->Fact;
851  factored = (Fact == FACTORED);
852  Equil = (!factored && options->Equil == YES);
853  notran = (options->Trans == NOTRANS);
854 
855  /* ------------------------------------------------------------
856  Scale the right-hand side if equilibration was performed.
857  ------------------------------------------------------------*/
858  if ( notran )
859  {
860  if ( rowequ )
861  {
862  b_col = B;
863  for (j = 0; j < nrhs; ++j)
864  {
865  irow = fst_row;
866  for (i = 0; i < m_loc; ++i)
867  {
868  b_col[i] *= R[irow];
869  ++irow;
870  }
871  b_col += ldb;
872  }
873  }
874  }
875  else if ( colequ )
876  {
877  b_col = B;
878  for (j = 0; j < nrhs; ++j)
879  {
880  irow = fst_row;
881  for (i = 0; i < m_loc; ++i)
882  {
883  b_col[i] *= C[irow];
884  ++irow;
885  }
886  b_col += ldb;
887  }
888  }
889 
890  /* Save a copy of the right-hand side. */
891  ldx = ldb;
892  if ( !(X = doubleMalloc_dist(((size_t)ldx) * nrhs)) )
893  {
894  ABORT("Malloc fails for X[]");
895  }
896  x_col = X;
897  b_col = B;
898  for (j = 0; j < nrhs; ++j)
899  {
900  for (i = 0; i < m_loc; ++i)
901  {
902  x_col[i] = b_col[i];
903  }
904  x_col += ldx;
905  b_col += ldb;
906  }
907 
908 
909  /* ------------------------------------------------------------
910  Solve the linear system.
911  ------------------------------------------------------------*/
912  pdgstrs(n, LUstruct, ScalePermstruct, grid, X, m_loc,
913  fst_row, ldb, nrhs, SOLVEstruct, &stat, info);
914 
915  if (*info!=0)
916  {
917  printf("Trouble in pdgstrs. Info=%i\n",*info);
918  printf("The %i-th argument had an illegal value.\n", *info);
919  }
920 
921  /* ------------------------------------------------------------
922  Use iterative refinement to improve the computed solution and
923  compute error bounds and backward error estimates for it.
924  ------------------------------------------------------------*/
925  if ( options->IterRefine )
926  {
927  /* Improve the solution by iterative refinement. */
928  int_t *it, *colind_gsmv = SOLVEstruct->A_colind_gsmv;
929  SOLVEstruct_t *SOLVEstruct1; /* Used by refinement. */
930 
931  t = SuperLU_timer_();
932  if ( options->RefineInitialized == NO || Fact == DOFACT )
933  {
934  /* Save a copy of the transformed local col indices
935  in colind_gsmv[]. */
936  if ( colind_gsmv )
937  {
938  SUPERLU_FREE(colind_gsmv);
939  }
940  if ( !(it = intMalloc_dist(nnz_loc)) )
941  {
942  ABORT("Malloc fails for colind_gsmv[]");
943  }
944  colind_gsmv = SOLVEstruct->A_colind_gsmv = it;
945  for (i = 0; i < nnz_loc; ++i)
946  {
947  colind_gsmv[i] = colind[i];
948  }
949  }
950  else if ( Fact == SamePattern || Fact == SamePattern_SameRowPerm )
951  {
952  double at;
953  int_t k, jcol, p;
954  /* Swap to beginning the part of A corresponding to the
955  local part of X, as was done in pdgsmv_init() */
956  for (i = 0; i < m_loc; ++i)
957  {
958  /* Loop through each row */
959  k = rowptr[i];
960  for (j = rowptr[i]; j < rowptr[i+1]; ++j)
961  {
962  jcol = colind[j];
963  p = SOLVEstruct->row_to_proc[jcol];
964  if ( p == iam )
965  {
966  /* Local */
967  at = a[k]; a[k] = a[j]; a[j] = at;
968  ++k;
969  }
970  }
971  }
972 
973  /* Re-use the local col indices of A obtained from the
974  previous call to pdgsmv_init() */
975  for (i = 0; i < nnz_loc; ++i)
976  {
977  colind[i] = colind_gsmv[i];
978  }
979  }
980 
981  /* Storage for backward error */
982  if ( !(berr = doubleMalloc_dist(nrhs)) )
983  {
984  ABORT("Malloc fails for berr[].");
985  }
986 
987  pdgsrfs(n, A, anorm, LUstruct, ScalePermstruct, grid,
988  B, ldb, X, ldx, nrhs, SOLVEstruct, berr, &stat, info);
989 
990  stat.utime[REFINE] = SuperLU_timer_() - t;
991  }
992 
993  if (*info!=0)
994  {
995  printf("Trouble in pdgsrfs. Info=%i\n",*info);
996  printf("The %i-th argument had an illegal value.\n", *info);
997  }
998 
999  /* Print the statistics. */
1000  if ((doc==0) && (!iam))
1001  {
1002  printf("\nstats after solve....\n");
1003  PStatPrint(options, &stat, grid);
1004  }
1005 
1006  /* Permute the solution matrix B <= Pc'*X. */
1007  pdPermute_Dense_Matrix(fst_row, m_loc, SOLVEstruct->row_to_proc,
1008  SOLVEstruct->inv_perm_c,
1009  X, ldx, B, ldb, nrhs, grid);
1010 
1011  /* Transform the solution matrix X to a solution of the original
1012  system before the equilibration. */
1013  if ( notran )
1014  {
1015  if ( colequ )
1016  {
1017  b_col = B;
1018  for (j = 0; j < nrhs; ++j)
1019  {
1020  irow = fst_row;
1021  for (i = 0; i < m_loc; ++i)
1022  {
1023  b_col[i] *= C[irow];
1024  ++irow;
1025  }
1026  b_col += ldb;
1027  }
1028  }
1029  }
1030  else if ( rowequ )
1031  {
1032  b_col = B;
1033  for (j = 0; j < nrhs; ++j)
1034  {
1035  irow = fst_row;
1036  for (i = 0; i < m_loc; ++i)
1037  {
1038  b_col[i] *= R[irow];
1039  ++irow;
1040  }
1041  b_col += ldb;
1042  }
1043  }
1044 
1045  /* Clean up memory */
1046  if ( options->IterRefine )
1047  {
1048  SUPERLU_FREE(berr);
1049  }
1050  SUPERLU_FREE(X);
1051 
1052  } /* End of solve */
1053 
1054  /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1055  PERFORM CLEAN UP OF MEMORY
1056  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
1057  if (opt_flag==3)
1058  {
1059  /* Get pointer to the process grid */
1060  superlu_data = (superlu_dist_data *)*data;
1061  grid = superlu_data->grid;
1062 
1063  /* Bail out if I do not belong in the grid. */
1064  int iam = grid->iam;
1065  if ( iam >= nprow * npcol ) goto out;
1066  if ((doc==0)&&(!iam))
1067  {
1068  printf("\nCleaning up memory allocated for SuperLU_DIST\n");
1069  }
1070 
1071  /* ------------------------------------------------------------
1072  Set pointers to the data structure.
1073  ------------------------------------------------------------*/
1074  A = superlu_data->A;
1075  options = superlu_data->options;
1076  ScalePermstruct = superlu_data->ScalePermstruct;
1077  LUstruct = superlu_data->LUstruct;
1078  SOLVEstruct = superlu_data->SOLVEstruct;
1079 
1080  /* -------------------------------
1081  Set the other pointers required
1082  -------------------------------*/
1083  R = ScalePermstruct->R;
1084  C = ScalePermstruct->C;
1085 
1086  /* Local control paramaters */
1087  Fact = options->Fact;
1088  factored = (Fact == FACTORED);
1089  Equil = (!factored && options->Equil == YES);
1090  notran = (options->Trans == NOTRANS);
1091 
1092  /* Deallocate R and/or C if it was not used. */
1093  if ( Equil && Fact != SamePattern_SameRowPerm )
1094  {
1095  switch ( ScalePermstruct->DiagScale )
1096  {
1097  case NOEQUIL:
1098  SUPERLU_FREE(R);
1099  SUPERLU_FREE(C);
1100  break;
1101  case ROW:
1102  SUPERLU_FREE(C);
1103  break;
1104  case COL:
1105  SUPERLU_FREE(R);
1106  break;
1107  }
1108  }
1109 
1110  /* Free storage */
1111  ScalePermstructFree(ScalePermstruct);
1112  Destroy_LU(n, grid, LUstruct);
1113  LUstructFree(LUstruct);
1114  dSolveFinalize(options, SOLVEstruct);
1115  //Destroy_CompRowLoc_Matrix_dist(&A);
1116 
1117  // Only destroy the store part of the matrix
1118  Destroy_SuperMatrix_Store_dist(A);
1119 
1120  /* Deallocate memory */
1121  SUPERLU_FREE(A);
1122  SUPERLU_FREE(ScalePermstruct);
1123  SUPERLU_FREE(LUstruct);
1124  SUPERLU_FREE(SOLVEstruct);
1125  SUPERLU_FREE(options);
1126 
1127  /* Release the superlu process grid. */
1128  out:
1129  superlu_gridexit(grid);
1130 
1131  SUPERLU_FREE(grid);
1132  SUPERLU_FREE(superlu_data);
1133 
1134  }
1135 
1136  /* Free storage */
1137  PStatFree(&stat);
1138 
1139  return;
1140 
1141 }
1142 
1143 
1144 
1145 /*----------------------------------------------------------------
1146  * Bridge to distributed SuperLU with distributed memory (version 2.0).
1147  * Requires input of system matrix in compressed row form.
1148  *
1149  * Parameters:
1150  * op_flag = int specifies the operation:
1151  * 1, performs LU decomposition for the first time
1152  * 2, performs triangular solve
1153  * 3, free all the storage in the end
1154  * - n = size of system (square matrix)
1155  * - nnz = # of nonzero entries
1156  * - values = 1D C array of nonzero entries
1157  * - row_index = 1D C array of row indices
1158  * - column_start = 1D C array of column start indices
1159  * - b = 1D C array representing the rhs vector, is overwritten
1160  * by solution.
1161  * - nprow = # of rows in process grid
1162  * - npcol = # of columns in process grid
1163  * - doc = 0/1 for doc/no doc
1164  * - data = pointer to structure to contain LU solver data.
1165  * If *opt_flag == 1, it is an output. Otherwise, it it an input.
1166  *
1167  * Return value of *info:
1168  * = 0: successful exit
1169  * > 0: if *info = i, and i is
1170  * <= A->ncol: U(i,i) is exactly zero. The factorization has
1171  * been completed, but the factor U is exactly singular,
1172  * so the solution could not be computed.
1173  * > A->ncol: number of bytes allocated when memory allocation
1174  * failure occurred, plus A->ncol.
1175  * < 0: some other error
1176  *----------------------------------------------------------------
1177  */
1178 void superlu_dist_global_matrix(int opt_flag, int allow_permutations,
1179  int n_in, int nnz_in,
1180  double *values,
1181  int *row_index, int *col_start,
1182  double *b, int nprow, int npcol,
1183  int doc, void **data, int *info,
1184  MPI_Comm comm)
1185 {
1186  /* Some SuperLU structures */
1187  superlu_options_t *options;
1188  SuperLUStat_t stat;
1189  SuperMatrix *A;
1190  SuperMatrix *AC;
1191  ScalePermstruct_t *ScalePermstruct;
1192  LUstruct_t *LUstruct;
1193  gridinfo_t *grid;
1194 
1195  /* Structure to hold SuperLU structures and data */
1196  superlu_dist_data *superlu_data;
1197 
1198  int_t *perm_r; /* row permutations from partial pivoting */
1199  int_t *perm_c; /* column permutation vector */
1200  int_t *etree; /* elimination tree */
1201  int_t job, rowequ, colequ, iinfo, need_value, i, j, irow;
1202  int_t m, n, nnz;
1203  int_t *colptr, *rowind;
1204  int_t Equil, factored, notran, permc_spec, dist_mem_use;
1205  NCformat *Astore;
1206  NCPformat *ACstore;
1207  Glu_persist_t *Glu_persist;
1208  Glu_freeable_t *Glu_freeable;
1209 
1210  /* Other stuff needed by SuperLU */
1211  double *berr;
1212  double *a, *X, *b_col;
1213  double *B=b;
1214  double *C, *R, *C1, *R1, *b_work, *bcol, *x_col;
1215  double amax, t, colcnd, rowcnd, anorm;
1216  char equed[1], norm[1];
1217  int ldx; /* LDA for matrix X (local). */
1218  int iam;
1219  static mem_usage_t num_mem_usage, symb_mem_usage;
1220  fact_t Fact;
1221 
1222 
1223  /* We're only doing single rhs problems
1224  note: code will need modifying to deal with
1225  multiple rhs (see function pdgssvx) */
1226  int nrhs=1;
1227 
1228  /* Square matrix */
1229  n=n_in;
1230  m=n_in;
1231  nnz=nnz_in;
1232 
1233  /* Set 'Leading dimension' of rhs vector */
1234  int ldb=n;
1235 
1236 
1237  /* Initialize the statistics variables. */
1238  PStatInit(&stat);
1239 
1240  /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1241  SET UP GRID, FACTORS, ETC
1242  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
1243  if (opt_flag==1)
1244  {
1245  /* Allocate data structure to store data between calls to this function */
1246  superlu_data =
1247  (superlu_dist_data *) SUPERLU_MALLOC(sizeof(superlu_dist_data));
1248 
1249  /* Initialize the superlu process grid. */
1250  grid = (gridinfo_t *) SUPERLU_MALLOC(sizeof(gridinfo_t));
1251  superlu_gridinit(comm, nprow, npcol, grid);
1252  superlu_data->grid = grid;
1253 
1254  /* Bail out if I do not belong in the grid. */
1255  iam = grid->iam;
1256  if ( iam >= nprow * npcol )
1257  {
1258  return;
1259  }
1260 
1261  /* Allocate memory for SuperLU_DIST structures */
1262  options = (superlu_options_t *) SUPERLU_MALLOC(sizeof(superlu_options_t));
1263  A = (SuperMatrix *) SUPERLU_MALLOC(sizeof(SuperMatrix));
1264  AC = (SuperMatrix *) SUPERLU_MALLOC(sizeof(SuperMatrix));
1265  ScalePermstruct =
1266  (ScalePermstruct_t *) SUPERLU_MALLOC(sizeof(ScalePermstruct_t));
1267  LUstruct = (LUstruct_t *) SUPERLU_MALLOC(sizeof(LUstruct_t));
1268 
1269  /* Set the default options */
1270  set_default_options_dist(options);
1271 
1272  /* Is the matrix transposed (NOTRANS or TRANS)? */
1273  options->Trans = NOTRANS;
1274 
1275  /* Row permutations (NATURAL [= do nothing], */
1276  /* LargeDiag [default], ...)? */
1277  /* options->RowPerm=NATURAL; */
1278  options->RowPerm=LargeDiag;
1279 
1280  /* Column permutations (NATURAL [= do nothing], */
1281  /* MMD_AT_PLUS_A [default],...) */
1282  options->ColPerm=MMD_AT_PLUS_A;
1283 
1284  /* Use natural ordering instead? */
1285  if (allow_permutations==0)
1286  {
1287  options->ColPerm=NATURAL;
1288  options->RowPerm=NATURAL;
1289  }
1290 
1291  /* Iterative refinement (essential as far as I can tell).*/
1292  /* Can be "NO" or "DOUBLE"*/
1293  options->IterRefine = SLU_DOUBLE;
1294 
1295  /* Print stats during solve? */
1296  if (doc==0)
1297  {
1298  options->PrintStat = YES;
1299  }
1300  else
1301  {
1302  options->PrintStat = NO;
1303  }
1304 
1305  /* Doc output on process 0 if required: */
1306  if ((!iam)&&(doc==0))
1307  {
1308  printf("\nPerforming SuperLU_DIST setup\n");
1309  printf("Process grid\t%d X %d\n", grid->nprow, grid->npcol);
1310  print_options_dist(options);
1311  }
1312 
1313 
1314  /* Create SuperMatrix from compressed column representation */
1315  dCreate_CompCol_Matrix_dist(A,m,n,nnz,values,row_index,col_start,
1316  SLU_NC,SLU_D,SLU_GE);
1317 
1318  /* Initialize ScalePermstruct and LUstruct. */
1319  ScalePermstructInit(m, n, ScalePermstruct);
1320  LUstructInit(m, n, LUstruct);
1321 
1322  /* Test the control parameters etc. */
1323  *info = 0;
1324  Fact = options->Fact;
1325  if ( Fact < 0 || Fact > FACTORED )
1326  {
1327  *info = -1;
1328  }
1329  else if ( options->RowPerm < 0 || options->RowPerm > MY_PERMR )
1330  {
1331  *info = -1;
1332  }
1333  else if ( options->ColPerm < 0 || options->ColPerm > MY_PERMC )
1334  {
1335  *info = -1;
1336  }
1337  else if ( options->IterRefine < 0 || options->IterRefine > SLU_EXTRA )
1338  {
1339  *info = -1;
1340  }
1341  else if ( options->IterRefine == SLU_EXTRA )
1342  {
1343  *info = -1;
1344  fprintf(stderr, "Extra precise iterative refinement yet to support.\n");
1345  }
1346  else if ( A->nrow != A->ncol || A->nrow < 0 ||
1347  A->Stype != SLU_NC || A->Dtype != SLU_D || A->Mtype != SLU_GE )
1348  {
1349  *info = -2;
1350  }
1351  else if ( ldb < A->nrow )
1352  {
1353  *info = -5;
1354  }
1355  else if ( nrhs < 0 )
1356  {
1357  *info = -6;
1358  }
1359  if ( *info )
1360  {
1361  printf("Trouble in pdgstrf. Info=%i\n",-*info);
1362  if (*info==-1)
1363  {
1364  printf("Error in options.\n");
1365  }
1366  else if (*info==-2)
1367  {
1368  printf("Error in matrix.\n");
1369  }
1370  else if (*info==-5)
1371  {
1372  printf("ldb < A->nrow\n");
1373  }
1374  else if (*info==-6)
1375  {
1376  printf("nrhs < 0\n");
1377  }
1378  return;
1379  }
1380 
1381  /* Initialization. */
1382  factored = (Fact == FACTORED);
1383  Equil = (!factored && options->Equil == YES);
1384  notran = (options->Trans == NOTRANS);
1385  job = 5;
1386  Astore = A->Store;
1387  nnz = Astore->nnz;
1388  a = Astore->nzval;
1389  colptr = Astore->colptr;
1390  rowind = Astore->rowind;
1391  if ( factored || (Fact == SamePattern_SameRowPerm && Equil) )
1392  {
1393  rowequ = (ScalePermstruct->DiagScale == ROW) ||
1394  (ScalePermstruct->DiagScale == BOTH);
1395  colequ = (ScalePermstruct->DiagScale == COL) ||
1396  (ScalePermstruct->DiagScale == BOTH);
1397  }
1398  else
1399  {
1400  rowequ = colequ = FALSE;
1401  }
1402 
1403  perm_r = ScalePermstruct->perm_r;
1404  perm_c = ScalePermstruct->perm_c;
1405  etree = LUstruct->etree;
1406  R = ScalePermstruct->R;
1407  C = ScalePermstruct->C;
1408  Glu_persist = LUstruct->Glu_persist;
1409  if ( Equil )
1410  {
1411  /* Allocate storage if not done so before. */
1412  switch ( ScalePermstruct->DiagScale )
1413  {
1414  case NOEQUIL:
1415  if ( !(R = (double *) doubleMalloc_dist(m)) )
1416  ABORT("Malloc fails for R[].");
1417  if ( !(C = (double *) doubleMalloc_dist(n)) )
1418  ABORT("Malloc fails for C[].");
1419  ScalePermstruct->R = R;
1420  ScalePermstruct->C = C;
1421  break;
1422  case ROW:
1423  if ( !(C = (double *) doubleMalloc_dist(n)) )
1424  ABORT("Malloc fails for C[].");
1425  ScalePermstruct->C = C;
1426  break;
1427  case COL:
1428  if ( !(R = (double *) doubleMalloc_dist(m)) )
1429  ABORT("Malloc fails for R[].");
1430  ScalePermstruct->R = R;
1431  break;
1432  }
1433  }
1434 
1435  /* ------------------------------------------------------------
1436  Diagonal scaling to equilibrate the matrix.
1437  ------------------------------------------------------------*/
1438  if ( Equil )
1439  {
1440  t = SuperLU_timer_();
1441 
1442  if ( Fact == SamePattern_SameRowPerm )
1443  {
1444  /* Reuse R and C. */
1445  switch ( ScalePermstruct->DiagScale )
1446  {
1447  case NOEQUIL:
1448  break;
1449  case ROW:
1450  for (j = 0; j < n; ++j)
1451  {
1452  for (i = colptr[j]; i < colptr[j+1]; ++i)
1453  {
1454  irow = rowind[i];
1455  a[i] *= R[irow]; /* Scale rows. */
1456  }
1457  }
1458  break;
1459  case COL:
1460  for (j = 0; j < n; ++j)
1461  {
1462  for (i = colptr[j]; i < colptr[j+1]; ++i)
1463  {
1464  a[i] *= C[j]; /* Scale columns. */
1465  }
1466  }
1467  break;
1468  case BOTH:
1469  for (j = 0; j < n; ++j)
1470  {
1471  for (i = colptr[j]; i < colptr[j+1]; ++i)
1472  {
1473  irow = rowind[i];
1474  a[i] *= R[irow] * C[j]; /* Scale rows and columns. */
1475  }
1476  }
1477  break;
1478  }
1479  }
1480  else
1481  {
1482  if ( !iam )
1483  {
1484  /* Compute row and column scalings to equilibrate matrix A. */
1485  dgsequ_dist(A, R, C, &rowcnd, &colcnd, &amax, &iinfo);
1486 
1487  MPI_Bcast( &iinfo, 1, mpi_int_t, 0, grid->comm );
1488  if ( iinfo == 0 )
1489  {
1490  MPI_Bcast( R, m, MPI_DOUBLE, 0, grid->comm );
1491  MPI_Bcast( C, n, MPI_DOUBLE, 0, grid->comm );
1492  MPI_Bcast( &rowcnd, 1, MPI_DOUBLE, 0, grid->comm );
1493  MPI_Bcast( &colcnd, 1, MPI_DOUBLE, 0, grid->comm );
1494  MPI_Bcast( &amax, 1, MPI_DOUBLE, 0, grid->comm );
1495  }
1496  else
1497  {
1498  if ( iinfo > 0 )
1499  {
1500  if ( iinfo <= m )
1501  {
1502  fprintf(stderr, "The %d-th row of A is exactly zero\n",
1503  iinfo);
1504  }
1505  else
1506  {fprintf(stderr, "The %d-th column of A is exactly zero\n",
1507  iinfo-n);
1508  }
1509  exit(-1);
1510  }
1511  }
1512  } else
1513  {
1514  MPI_Bcast( &iinfo, 1, mpi_int_t, 0, grid->comm );
1515  if ( iinfo == 0 )
1516  {
1517  MPI_Bcast( R, m, MPI_DOUBLE, 0, grid->comm );
1518  MPI_Bcast( C, n, MPI_DOUBLE, 0, grid->comm );
1519  MPI_Bcast( &rowcnd, 1, MPI_DOUBLE, 0, grid->comm );
1520  MPI_Bcast( &colcnd, 1, MPI_DOUBLE, 0, grid->comm );
1521  MPI_Bcast( &amax, 1, MPI_DOUBLE, 0, grid->comm );
1522  }
1523  else
1524  {
1525  ABORT("DGSEQU failed\n");
1526  }
1527  }
1528 
1529  /* Equilibrate matrix A. */
1530  dlaqgs_dist(A, R, C, rowcnd, colcnd, amax, equed);
1531  if ( lsame_(equed, "R") )
1532  {
1533  ScalePermstruct->DiagScale = rowequ = ROW;
1534  }
1535  else if ( lsame_(equed, "C") )
1536  {
1537  ScalePermstruct->DiagScale = colequ = COL;
1538  }
1539  else if ( lsame_(equed, "B") )
1540  {
1541  ScalePermstruct->DiagScale = BOTH;
1542  rowequ = ROW;
1543  colequ = COL;
1544  }
1545  else
1546  {
1547  ScalePermstruct->DiagScale = NOEQUIL;
1548  }
1549  } /* if Fact ... */
1550 
1551  stat.utime[EQUIL] = SuperLU_timer_() - t;
1552  } /* if Equil ... */
1553 
1554 
1555  /* ------------------------------------------------------------
1556  Permute rows of A.
1557  ------------------------------------------------------------*/
1558  if ( options->RowPerm != NO )
1559  {
1560  t = SuperLU_timer_();
1561 
1562  if ( Fact == SamePattern_SameRowPerm /* Reuse perm_r. */
1563  || options->RowPerm == MY_PERMR )
1564  {
1565  /* Use my perm_r. */
1566  /* for (j = 0; j < n; ++j) {
1567  for (i = colptr[j]; i < colptr[j+1]; ++i) {*/
1568  for (i = 0; i < colptr[n]; ++i)
1569  {
1570  irow = rowind[i];
1571  rowind[i] = perm_r[irow];
1572 /* }*/
1573  }
1574  }
1575  else if ( !factored )
1576  {
1577  if ( job == 5 )
1578  {
1579  /* Allocate storage for scaling factors. */
1580  if ( !(R1 = (double *) SUPERLU_MALLOC(m * sizeof(double))) )
1581  ABORT("SUPERLU_MALLOC fails for R1[]");
1582  if ( !(C1 = (double *) SUPERLU_MALLOC(n * sizeof(double))) )
1583  ABORT("SUPERLU_MALLOC fails for C1[]");
1584  }
1585 
1586  if ( !iam )
1587  {
1588  /* Process 0 finds a row permutation for large diagonal. */
1589  dldperm(job, m, nnz, colptr, rowind, a, perm_r, R1, C1);
1590 
1591  MPI_Bcast( perm_r, m, mpi_int_t, 0, grid->comm );
1592  if ( job == 5 && Equil )
1593  {
1594  MPI_Bcast( R1, m, MPI_DOUBLE, 0, grid->comm );
1595  MPI_Bcast( C1, n, MPI_DOUBLE, 0, grid->comm );
1596  }
1597  }
1598  else
1599  {
1600  MPI_Bcast( perm_r, m, mpi_int_t, 0, grid->comm );
1601  if ( job == 5 && Equil )
1602  {
1603  MPI_Bcast( R1, m, MPI_DOUBLE, 0, grid->comm );
1604  MPI_Bcast( C1, n, MPI_DOUBLE, 0, grid->comm );
1605  }
1606  }
1607 
1608  if ( job == 5 )
1609  {
1610  if ( Equil )
1611  {
1612  for (i = 0; i < n; ++i)
1613  {
1614  R1[i] = exp(R1[i]);
1615  C1[i] = exp(C1[i]);
1616  }
1617  for (j = 0; j < n; ++j)
1618  {
1619  for (i = colptr[j]; i < colptr[j+1]; ++i)
1620  {
1621  irow = rowind[i];
1622  a[i] *= R1[irow] * C1[j]; /* Scale the matrix. */
1623  rowind[i] = perm_r[irow];
1624  }
1625  }
1626 
1627  /* Multiply together the scaling factors. */
1628  if ( rowequ )
1629  {
1630  for (i = 0; i < m; ++i)
1631  {
1632  R[i] *= R1[i];
1633  }
1634  }
1635  else
1636  {
1637  for (i = 0; i < m; ++i)
1638  {
1639  R[i] = R1[i];
1640  }
1641  }
1642  if ( colequ )
1643  {
1644  for (i = 0; i < n; ++i)
1645  {
1646  C[i] *= C1[i];
1647  }
1648  }
1649  else
1650  {
1651  for (i = 0; i < n; ++i)
1652  {
1653  C[i] = C1[i];
1654  }
1655  }
1656 
1657  ScalePermstruct->DiagScale = BOTH;
1658  rowequ = colequ = 1;
1659  }
1660  else
1661  {
1662  /* No equilibration. */
1663 /* for (j = 0; j < n; ++j) {
1664  for (i = colptr[j]; i < colptr[j+1]; ++i) {*/
1665  for (i = colptr[0]; i < colptr[n]; ++i)
1666  {
1667  irow = rowind[i];
1668  rowind[i] = perm_r[irow];
1669  }
1670 /* }*/
1671  }
1672  SUPERLU_FREE (R1);
1673  SUPERLU_FREE (C1);
1674  }
1675  else
1676  { /* job = 2,3,4 */
1677  for (j = 0; j < n; ++j)
1678  {
1679  for (i = colptr[j]; i < colptr[j+1]; ++i)
1680  {
1681  irow = rowind[i];
1682  rowind[i] = perm_r[irow];
1683  }
1684  }
1685  }
1686  } /* else !factored */
1687 
1688  t = SuperLU_timer_() - t;
1689  stat.utime[ROWPERM] = t;
1690  } /* if options->RowPerm ... */
1691 
1692  if ( !factored || options->IterRefine )
1693  {
1694  /* Compute norm(A), which will be used to adjust small diagonal. */
1695  if ( notran )
1696  {
1697  *(unsigned char *)norm = '1';
1698  }
1699  else
1700  {
1701  *(unsigned char *)norm = 'I';
1702  }
1703  anorm = dlangs_dist(norm, A);
1704  }
1705 
1706 
1707 
1708  /* ------------------------------------------------------------
1709  Perform the LU factorization.
1710  ------------------------------------------------------------*/
1711  if ( !factored )
1712  {
1713  t = SuperLU_timer_();
1714  /*
1715  * Get column permutation vector perm_c[], according to permc_spec:
1716  * permc_spec = NATURAL: natural ordering
1717  * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
1718  * permc_spec = MMD_ATA: minimum degree on structure of A'*A
1719  * permc_spec = COLAMD: approximate minimum degree column ordering
1720  * permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
1721  */
1722  permc_spec = options->ColPerm;
1723  if ( permc_spec != MY_PERMC && Fact == DOFACT )
1724  {
1725  /* Use an ordering provided by SuperLU */
1726  get_perm_c_dist(iam, permc_spec, A, perm_c);
1727  }
1728 
1729  /* Compute the elimination tree of Pc*(A'+A)*Pc' or Pc*A'*A*Pc'
1730  (a.k.a. column etree), depending on the choice of ColPerm.
1731  Adjust perm_c[] to be consistent with a postorder of etree.
1732  Permute columns of A to form A*Pc'. */
1733  sp_colorder(options, A, perm_c, etree, AC);
1734 
1735  /* Form Pc*A*Pc' to preserve the diagonal of the matrix Pr*A. */
1736  ACstore = AC->Store;
1737  for (j = 0; j < n; ++j)
1738  {
1739  for (i = ACstore->colbeg[j]; i < ACstore->colend[j]; ++i)
1740  {
1741  irow = ACstore->rowind[i];
1742  ACstore->rowind[i] = perm_c[irow];
1743  }
1744  }
1745  stat.utime[COLPERM] = SuperLU_timer_() - t;
1746 
1747  /* Perform a symbolic factorization on matrix A and set up the
1748  nonzero data structures which are suitable for supernodal GENP. */
1749  if ( Fact != SamePattern_SameRowPerm )
1750  {
1751  t = SuperLU_timer_();
1752  if ( !(Glu_freeable = (Glu_freeable_t *)
1753  SUPERLU_MALLOC(sizeof(Glu_freeable_t))) )
1754  ABORT("Malloc fails for Glu_freeable.");
1755 
1756  iinfo = symbfact(options, iam, AC, perm_c, etree,
1757  Glu_persist, Glu_freeable);
1758 
1759  stat.utime[SYMBFAC] = SuperLU_timer_() - t;
1760 
1761  if ( iinfo < 0 )
1762  {
1763  QuerySpace_dist(n, -iinfo, Glu_freeable, &symb_mem_usage);
1764  }
1765  else
1766  {
1767  if ( !iam )
1768  {
1769  fprintf(stderr, "symbfact() error returns %d\n", iinfo);
1770  exit(-1);
1771  }
1772  }
1773  }
1774 
1775  /* Distribute the L and U factors onto the process grid. */
1776  t = SuperLU_timer_();
1777  dist_mem_use = ddistribute(Fact, n, AC, Glu_freeable, LUstruct, grid);
1778  stat.utime[DIST] = SuperLU_timer_() - t;
1779 
1780  /* Deallocate storage used in symbolic factor. */
1781  if ( Fact != SamePattern_SameRowPerm )
1782  {
1783  iinfo = symbfact_SubFree(Glu_freeable);
1784  SUPERLU_FREE(Glu_freeable);
1785  }
1786 
1787  /* Perform numerical factorization in parallel. */
1788  t = SuperLU_timer_();
1789  pdgstrf(options, m, n, anorm, LUstruct, grid, &stat, info);
1790  stat.utime[FACT] = SuperLU_timer_() - t;
1791  }
1792  else if ( options->IterRefine )
1793  {
1794  /* options->Fact==FACTORED */
1795  /* Permute columns of A to form A*Pc' using the existing perm_c.
1796  * NOTE: rows of A were previously permuted to Pc*A.
1797  */
1798  sp_colorder(options, A, perm_c, NULL, AC);
1799  } /* if !factored ... */
1800 
1801  if (*info!=0)
1802  {
1803  printf("Trouble in pdgstrf. Info=%i\n",*info);
1804  if (*info>0)
1805  {
1806  printf("U(%i,%i) is exactly zero. The factorization has\n",*info,*info);
1807  printf("been completed, but the factor U is exactly singular,\n");
1808  printf("and division by zero will occur if it is used to solve a\n");
1809  printf("system of equations.\n");
1810  }
1811  else
1812  {
1813  printf("The %i-th argument had an illegal value.\n", *info);
1814  }
1815  }
1816 
1817  /* Print the statistics. */
1818  if ((doc==0) && (!iam))
1819  {
1820  printf("\nstats after setup....\n");
1821  PStatPrint(options, &stat, grid);
1822  }
1823 
1824  /* ------------------------------------------------------------
1825  Set up data structure.
1826  ------------------------------------------------------------*/
1827  superlu_data->A = A;
1828  superlu_data->AC = AC;
1829  superlu_data->options = options;
1830  superlu_data->ScalePermstruct = ScalePermstruct;
1831  superlu_data->LUstruct = LUstruct;
1832  superlu_data->colequ = colequ;
1833  superlu_data->rowequ = rowequ;
1834  superlu_data->anorm = anorm;
1835  *data = superlu_data;
1836 
1837  } /* End of setup */
1838 
1839 
1840  /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1841  PERFORM A SOLVE
1842  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
1843  if (opt_flag==2)
1844  {
1845  /* Get pointer to the grid */
1846  superlu_data = (superlu_dist_data *)*data;
1847  grid = superlu_data->grid;
1848 
1849  /* Bail out if I do not belong in the grid. */
1850  iam = grid->iam;
1851  if ( iam >= nprow * npcol )
1852  {
1853  return;
1854  }
1855 
1856  if ((doc==0)&&(!iam))
1857  {
1858  printf("\nPerforming SuperLU_DIST solve\n");
1859  }
1860 
1861  /* ------------------------------------------------------------
1862  Set other pointers to data structure.
1863  ------------------------------------------------------------*/
1864  A = superlu_data->A;
1865  AC = superlu_data->AC;
1866  options = superlu_data->options;
1867  ScalePermstruct = superlu_data->ScalePermstruct;
1868  LUstruct = superlu_data->LUstruct;
1869  colequ = superlu_data->colequ;
1870  rowequ = superlu_data->rowequ;
1871  anorm = superlu_data->anorm;
1872 
1873  /* Initialization. */
1874  Astore = A->Store;
1875  colptr = Astore->colptr;
1876  rowind = Astore->rowind;
1877  R = ScalePermstruct->R;
1878  C = ScalePermstruct->C;
1879  perm_r = ScalePermstruct->perm_r;
1880  perm_c = ScalePermstruct->perm_c;
1881 
1882  /* Local control paramaters */
1883  Fact = options->Fact;
1884  factored = (Fact == FACTORED);
1885  Equil = (!factored && options->Equil == YES);
1886  notran = (options->Trans == NOTRANS);
1887 
1888 
1889  /* ------------------------------------------------------------
1890  Compute the solution matrix X.
1891  ------------------------------------------------------------*/
1892  if ( !(b_work = doubleMalloc_dist(n)) )
1893  {
1894  ABORT("Malloc fails for b_work[]");
1895  }
1896 
1897  /* ------------------------------------------------------------
1898  Scale the right-hand side if equilibration was performed.
1899  ------------------------------------------------------------*/
1900  if ( notran )
1901  {
1902  if ( rowequ )
1903  {
1904  b_col = B;
1905  for (j = 0; j < nrhs; ++j)
1906  {
1907  for (i = 0; i < m; ++i)
1908  {
1909  b_col[i] *= R[i];
1910  }
1911  b_col += ldb;
1912  }
1913  }
1914  }
1915  else if ( colequ )
1916  {
1917  b_col = B;
1918  for (j = 0; j < nrhs; ++j)
1919  {
1920  for (i = 0; i < m; ++i)
1921  {
1922  b_col[i] *= C[i];
1923  }
1924  b_col += ldb;
1925  }
1926  }
1927 
1928  /* ------------------------------------------------------------
1929  Permute the right-hand side to form Pr*B.
1930  ------------------------------------------------------------*/
1931  if ( options->RowPerm != NO )
1932  {
1933  if ( notran )
1934  {
1935  b_col = B;
1936  for (j = 0; j < nrhs; ++j)
1937  {
1938  for (i = 0; i < m; ++i)
1939  {
1940  b_work[perm_r[i]] = b_col[i];
1941  }
1942  for (i = 0; i < m; ++i)
1943  {
1944  b_col[i] = b_work[i];
1945  }
1946  b_col += ldb;
1947  }
1948  }
1949  }
1950 
1951 
1952  /* ------------------------------------------------------------
1953  Permute the right-hand side to form Pc*B.
1954  ------------------------------------------------------------*/
1955  if ( notran )
1956  {
1957  b_col = B;
1958  for (j = 0; j < nrhs; ++j)
1959  {
1960  for (i = 0; i < m; ++i)
1961  {
1962  b_work[perm_c[i]] = b_col[i];
1963  }
1964  for (i = 0; i < m; ++i)
1965  {
1966  b_col[i] = b_work[i];
1967  }
1968  b_col += ldb;
1969  }
1970  }
1971 
1972 
1973  /* Save a copy of the right-hand side. */
1974  ldx = ldb;
1975  if ( !(X = doubleMalloc_dist(((size_t)ldx) * nrhs)) )
1976  {
1977  ABORT("Malloc fails for X[]");
1978  }
1979 
1980  x_col = X;
1981  b_col = B;
1982  for (j = 0; j < nrhs; ++j)
1983  {
1984  for (i = 0; i < ldb; ++i)
1985  {
1986  x_col[i] = b_col[i];
1987  }
1988  x_col += ldx;
1989  b_col += ldb;
1990  }
1991 
1992  /* ------------------------------------------------------------
1993  Solve the linear system.
1994  ------------------------------------------------------------*/
1995  pdgstrs_Bglobal(n, LUstruct, grid, X, ldb, nrhs, &stat, info);
1996  if (*info!=0)
1997  {
1998  printf("Trouble in pdgstrs_Bglobal. Info=%i\n",*info);
1999  printf("The %i-th argument had an illegal value.\n", *info);
2000  }
2001 
2002  /* ------------------------------------------------------------
2003  Use iterative refinement to improve the computed solution and
2004  compute error bounds and backward error estimates for it.
2005  ------------------------------------------------------------*/
2006  if ( options->IterRefine )
2007  {
2008  /* Improve the solution by iterative refinement. */
2009  t = SuperLU_timer_();
2010 
2011  /* Storage for backward error */
2012  if ( !(berr = doubleMalloc_dist(nrhs)) )
2013  {
2014  ABORT("Malloc fails for berr[].");
2015  }
2016 
2017  pdgsrfs_ABXglobal(n, AC, anorm, LUstruct, grid, B, ldb,
2018  X, ldx, nrhs, berr, &stat, info);
2019  if (*info!=0)
2020  {
2021  printf("Trouble in pdgsrfs_ABXglobal. Info=%i\n",*info);
2022  printf("The %i-th argument had an illegal value.\n", *info);
2023  }
2024  stat.utime[REFINE] = SuperLU_timer_() - t;
2025  }
2026 
2027  /* Print the statistics. */
2028  if ((doc==0) && (!iam))
2029  {
2030  printf("\nstats after solve....\n");
2031  PStatPrint(options, &stat, grid);
2032  }
2033 
2034  /* Permute the solution matrix X <= Pc'*X. */
2035  for (j = 0; j < nrhs; j++)
2036  {
2037  b_col = &B[j*ldb];
2038  x_col = &X[j*ldx];
2039  for (i = 0; i < n; ++i)
2040  {
2041  b_col[i] = x_col[perm_c[i]];
2042  }
2043  }
2044 
2045  /* Transform the solution matrix X to a solution of the original system
2046  before the equilibration. */
2047  if ( notran )
2048  {
2049  if ( colequ )
2050  {
2051  b_col = B;
2052  for (j = 0; j < nrhs; ++j)
2053  {
2054  for (i = 0; i < n; ++i)
2055  {
2056  b_col[i] *= C[i];
2057  }
2058  b_col += ldb;
2059  }
2060  }
2061  }
2062  else if ( rowequ )
2063  {
2064  b_col = B;
2065  for (j = 0; j < nrhs; ++j)
2066  {
2067  for (i = 0; i < n; ++i)
2068  {
2069  b_col[i] *= R[i];
2070  }
2071  b_col += ldb;
2072  }
2073  }
2074 
2075 
2076  /* Clean up memory */
2077  if ( options->IterRefine )
2078  {
2079  SUPERLU_FREE(berr);
2080  }
2081  SUPERLU_FREE(b_work);
2082  SUPERLU_FREE(X);
2083 
2084  } /* End of solve */
2085 
2086  /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2087  PERFORM CLEAN UP OF MEMORY
2088  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
2089  if (opt_flag==3)
2090  {
2091  /* Get pointer to the process grid */
2092  superlu_data = (superlu_dist_data *)*data;
2093  grid = superlu_data->grid;
2094 
2095  /* Bail out if I do not belong in the grid. */
2096  iam = grid->iam;
2097  if ( iam >= nprow * npcol ) goto out;
2098  if ((doc==0)&&(!iam))
2099  {
2100  printf("\nCleaning up memory allocated for SuperLU_DIST\n");
2101  }
2102 
2103  /* ------------------------------------------------------------
2104  Set pointers to the data structure.
2105  ------------------------------------------------------------*/
2106  A = superlu_data->A;
2107  AC = superlu_data->AC;
2108  options = superlu_data->options;
2109  ScalePermstruct = superlu_data->ScalePermstruct;
2110  LUstruct = superlu_data->LUstruct;
2111 
2112  /* -------------------------------
2113  Set the other pointers required
2114  -------------------------------*/
2115  R = ScalePermstruct->R;
2116  C = ScalePermstruct->C;
2117 
2118  /* Local control paramaters */
2119  Fact = options->Fact;
2120  factored = (Fact == FACTORED);
2121  Equil = (!factored && options->Equil == YES);
2122  rowequ = colequ = FALSE;
2123  if ( factored || (Fact == SamePattern_SameRowPerm && Equil) )
2124  {
2125  rowequ = (ScalePermstruct->DiagScale == ROW) ||
2126  (ScalePermstruct->DiagScale == BOTH);
2127  colequ = (ScalePermstruct->DiagScale == COL) ||
2128  (ScalePermstruct->DiagScale == BOTH);
2129  }
2130  else
2131  {
2132  rowequ = colequ = FALSE;
2133  }
2134 
2135  /* Deallocate storage. */
2136  if ( Equil && Fact != SamePattern_SameRowPerm )
2137  {
2138  switch ( ScalePermstruct->DiagScale )
2139  {
2140  case NOEQUIL:
2141  SUPERLU_FREE(R);
2142  SUPERLU_FREE(C);
2143  break;
2144  case ROW:
2145  SUPERLU_FREE(C);
2146  break;
2147  case COL:
2148  SUPERLU_FREE(R);
2149  break;
2150  }
2151  }
2152  if ( !factored || (factored && options->IterRefine) )
2153  {
2154  Destroy_CompCol_Permuted_dist(AC);
2155  }
2156 
2157  /* Free storage */
2158  ScalePermstructFree(ScalePermstruct);
2159  Destroy_LU(n, grid, LUstruct);
2160  LUstructFree(LUstruct);
2161  //Destroy_CompRowLoc_Matrix_dist(&A);
2162  // Only destroy the store part of the matrix
2163  Destroy_SuperMatrix_Store_dist(A);
2164 
2165  /* Deallocate memory */
2166  SUPERLU_FREE(A);
2167  SUPERLU_FREE(AC);
2168  SUPERLU_FREE(ScalePermstruct);
2169  SUPERLU_FREE(LUstruct);
2170  SUPERLU_FREE(options);
2171 
2172  /* Release the superlu process grid. */
2173  out:
2174  superlu_gridexit(grid);
2175 
2176  SUPERLU_FREE(grid);
2177  SUPERLU_FREE(superlu_data);
2178  }
2179 
2180  /* Free storage */
2181  PStatFree(&stat);
2182 
2183  return;
2184 }
2185 
2186 
void superlu_dist_global_matrix(int opt_flag, int allow_permutations, int n_in, int nnz_in, double *values, int *row_index, int *col_start, double *b, int nprow, int npcol, int doc, void **data, int *info, MPI_Comm comm)
SOLVEstruct_t * SOLVEstruct
Definition: superlu_dist.c:54
void superlu_cr_to_cc(int nrow, int ncol, int nnz, double *cr_values, int *cr_index, int *cr_start, double **cc_values, int **cc_index, int **cc_start)
Definition: superlu_dist.c:65
superlu_options_t * options
Definition: superlu_dist.c:55
cstr elem_len * i
Definition: cfortran.h:607
char t
Definition: cfortran.h:572
void superlu_dist_distributed_matrix(int opt_flag, int allow_permutations, int n, int nnz_local, int nrow_local, int first_row, double *values, int *col_index, int *row_start, double *b, int nprow, int npcol, int doc, void **data, int *info, MPI_Comm comm)
Definition: superlu_dist.c:106
LUstruct_t * LUstruct
Definition: superlu_dist.c:53
SuperMatrix * AC
Definition: superlu_dist.c:51
ScalePermstruct_t * ScalePermstruct
Definition: superlu_dist.c:52
SuperMatrix * A
Definition: superlu_dist.c:50
gridinfo_t * grid
Definition: superlu_dist.c:49
if(e >s)
Definition: cfortran.h:576