oomph-lib: linear_solver.cc Source File

Go to the documentation of this file.
 //LIC// ====================================================================
 //LIC// This file forms part of oomph-lib, the object-oriented, 
 //LIC// multi-physics finite-element library, available 
 //LIC// at http://www.oomph-lib.org.
 //LIC// 
 //LIC//    Version 1.0; svn revision $LastChangedRevision$
 //LIC//
 //LIC// $LastChangedDate$
 //LIC// 
 //LIC// Copyright (C) 2006-2016 Matthias Heil and Andrew Hazel
 //LIC// 
 //LIC// This library is free software; you can redistribute it and/or
 //LIC// modify it under the terms of the GNU Lesser General Public
 //LIC// License as published by the Free Software Foundation; either
 //LIC// version 2.1 of the License, or (at your option) any later version.
 //LIC// 
 //LIC// This library is distributed in the hope that it will be useful,
 //LIC// but WITHOUT ANY WARRANTY; without even the implied warranty of
 //LIC// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 //LIC// Lesser General Public License for more details.
 //LIC// 
 //LIC// You should have received a copy of the GNU Lesser General Public
 //LIC// License along with this library; if not, write to the Free Software
 //LIC// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 //LIC// 02110-1301  USA.
 //LIC// 
 //LIC// The authors may be contacted at oomph-lib@maths.man.ac.uk.
 //LIC// 
 //LIC//====================================================================
 //The actual solve functions for dense LU linear solvers.
 
 // Config header generated by autoconfig
 #ifdef HAVE_CONFIG_H
   #include <oomph-lib-config.h>
 #endif
 
 #ifdef OOMPH_HAS_MPI
 #include "mpi.h"
 #endif
 
 //oomph-lib includes
 #include "Vector.h"
 #include "linear_solver.h"
 #include "matrices.h"
 #include "problem.h"
 
 
 namespace oomph
 {
 
 
 //=============================================================================
 /// Solver: Takes pointer to problem and returns the results Vector
 /// which contains the solution of the linear system defined by
 /// the problem's fully assembled Jacobian and residual Vector.
 //=============================================================================
  void DenseLU::solve(Problem* const &problem_pt, DoubleVector &result)
  {
   //Initialise timer
   double t_start = TimingHelpers::timer();
   
   //Find # of degrees of freedom (variables)
   const unsigned n_dof = problem_pt->ndof();
 
   //Allocate storage for the residuals vector and the jacobian matrix
   DoubleVector residuals;
   DenseDoubleMatrix jacobian(n_dof);
   
   // initialise timer
   double t_start_jacobian = TimingHelpers::timer();
   
   //Get the full jacobian and residuals of the problem
   problem_pt->get_jacobian(residuals,jacobian);
   
   // compute jacobian setup time
   double t_end_jacobian = TimingHelpers::timer();
   Jacobian_setup_time = t_end_jacobian - t_start_jacobian;
 
   //Report the time
   if(Doc_time)
    {
     oomph_info << std::endl << "CPU for setup of Dense Jacobian [sec]: " 
                << Jacobian_setup_time << std::endl;
    }
 
   //Solve by dense LU decomposition VERY INEFFICIENT!
   solve(&jacobian,residuals,result);
   
   //Set the sign of the determinant of the jacobian
   problem_pt->sign_of_jacobian() = Sign_of_determinant_of_matrix;
   
   // Finalise/doc timings
   double t_end = TimingHelpers::timer();
   double total_time=t_end-t_start;
   if(Doc_time)
    {
     oomph_info << "CPU for DenseLU LinearSolver [sec]: " 
                << total_time << std::endl << std::endl;
    }
  }
 
 
 //=============================================================================
 /// Delete the storage that has been allocated for the LU factors, if
 /// the matrix data is not itself being overwritten.
 //=============================================================================
 void DenseLU::clean_up_memory()
 {
  // delete the Distribution_pt
  this->clear_distribution();
 
  //Clean up the LU factor storage, if it has been allocated
  //N.B. we don't need to check the index storage as well.
  if(LU_factors!=0)
   {
    //Delete the pointer to the LU factors
    delete[] LU_factors;
    //Null out the vector
    LU_factors = 0;
    //Delete the pointer to the Index
    delete[] Index;
    //Null out
    Index=0;
   }
 }
 
 //=============================================================================
 /// LU decompose the matrix.
 /// WARNING: this class does not perform any PARANOID checks on the vectors - 
 /// these are all performed in the solve(...) method.
 //=============================================================================
 void DenseLU::factorise(DoubleMatrixBase* const &matrix_pt)
 {
  //Set the number of unknowns
  const unsigned long n = matrix_pt->nrow();
  
  //Small constant
  const double small_number=1.0e-20;
 
  //Vector scaling stores the implicit scaling of each row
  Vector<double> scaling(n);
 
  //Integer to store the sign that must multiply the determinant as
  //a consequence of the row/column interchanges
  int signature = 1;
 
  //Loop over rows to get implicit scaling information
  for(unsigned long i=0;i<n;i++)
   {
    double largest_entry=0.0;
    for(unsigned long j=0;j<n;j++)
     {
      double tmp = std::fabs((*matrix_pt)(i,j));
      if(tmp > largest_entry) largest_entry = tmp;
     }
    if(largest_entry==0.0) 
     {
      throw OomphLibError("Singular Matrix",
                          OOMPH_CURRENT_FUNCTION,
                          OOMPH_EXCEPTION_LOCATION);
     }
    //Save the scaling
    scaling[i] = 1.0/largest_entry;
   }
 
  //Firsly, we shall delete any previous LU storage.
  //If the user calls this function twice without changing the matrix
  //then it is their own inefficiency, not ours (this time).
  clean_up_memory();
 
  //Allocate storage for the LU factors, the index and store
  //the number of unknowns
  LU_factors = new double[n*n];
  Index = new long[n];
 
  //Now we know that memory has been allocated, copy over
  //the matrix values
  unsigned count=0;
  for(unsigned long i=0;i<n;i++)
   {
    for(unsigned long j=0;j<n;j++)
     {
      LU_factors[count] = (*matrix_pt)(i,j);
      ++count;
     }
   }
 
  //Loop over columns
  for(unsigned long j=0;j<n;j++)
   {
    //Initialise imax
    unsigned long imax=0;
 
    for(unsigned long i=0;i<j;i++)
     {
      double sum = LU_factors[n*i+j];
      for(unsigned long k=0;k<i;k++) 
       {
        sum -= LU_factors[n*i+k]*LU_factors[n*k+j];
       }
      LU_factors[n*i+j] = sum;
     }
 
    //Initialise search for largest pivot element
    double largest_entry=0.0;
    for(unsigned long i=j;i<n;i++)
     {
      double sum = LU_factors[n*i+j];
      for(unsigned long k=0;k<j;k++) 
       {
        sum -= LU_factors[n*i+k]*LU_factors[n*k+j];
       }
      LU_factors[n*i+j] = sum;
      //Set temporary
      double tmp = scaling[i]*std::fabs(sum);
      if(tmp >= largest_entry)
       {
        largest_entry = tmp;
        imax = i;
       }
     }
 
    //Test to see if we need to interchange rows
    if(j != imax)
     {
      for(unsigned long k=0;k<n;k++)
       {
        double tmp = LU_factors[n*imax+k];
        LU_factors[n*imax+k] = LU_factors[n*j+k];
        LU_factors[n*j+k] = tmp;
       }
      //Change the parity of signature
      signature = -signature;
 
      //Interchange scale factor
      scaling[imax] = scaling[j];
     }
    
    //Set the index
    Index[j] = imax;
    if(LU_factors[n*j+j] == 0.0) 
     {
      LU_factors[n*j+j] = small_number;
     }
    //Divide by pivot element
    if(j != n-1)
     {
      double tmp = 1.0/LU_factors[n*j+j];
      for(unsigned long i=j+1;i<n;i++) 
       {
        LU_factors[n*i+j] *= tmp;
       }
     }
   
   } //End of loop over columns
 
  
  //Now multiply all the diagonal terms together to get the determinant
  //Note that we need to use the mantissa, exponent formulation to
  //avoid underflow errors
  double determinant_mantissa=1.0;
  int determinant_exponent = 0, iexp;
  for(unsigned i=0; i<n; i++)
   {
    //Multiply by the next diagonal entry's mantissa
    //and return the exponent
    determinant_mantissa *= frexp(LU_factors[n*i+i], &iexp);
 
    //Add the new exponent to the current exponent
    determinant_exponent += iexp;
 
    // normalise
    determinant_mantissa = frexp(determinant_mantissa,&iexp);
    determinant_exponent += iexp;
   }
 
  //If paranoid issue a warning that the matrix is near singular
 // #ifdef PARANOID
 //  int tiny_exponent = -60;
 //  if(determinant_exponent < tiny_exponent)
 //   {
 //    std::ostringstream warning_stream;
 //    warning_stream << "The determinant of the matrix is very close to zero.\n"
 //                   << "It is " << determinant_mantissa << " x 2^" 
 //                   << determinant_exponent << "\n";
 //    warning_stream << "The results will depend on the exact details of the\n"
 //                   << "floating point implementation ... just to let you know\n";
 //    OomphLibWarning(warning_stream.str(),
 //                    "DenseLU::factorise()",
 //                    OOMPH_EXCEPTION_LOCATION);
 //   }
 // #endif
 
  //Integer to store the sign of the determinant
  int sign = 0;
 
  //Find the sign of the determinant
  if(determinant_mantissa > 0.0) {sign = 1;}
  if(determinant_mantissa < 0.0) {sign = -1;}
  
  //Multiply the sign by the signature
  sign *= signature;
  
  //Return the sign of the determinant
  Sign_of_determinant_of_matrix = sign;
  }
 
 //=============================================================================
 /// Do the backsubstitution for the DenseLU solver. 
 /// WARNING: this class does not perform any PARANOID checks on the vectors - 
 /// these are all performed in the solve(...) method.
 //=============================================================================
 void DenseLU::backsub(const DoubleVector &rhs,
                       DoubleVector &result)
 {
  // Get pointers to first entries 
  const double* rhs_pt = rhs.values_pt();
  double* result_pt = result.values_pt();
 
  //Copy the rhs vector into the result vector
  const unsigned long n = rhs.nrow();
  for(unsigned long i=0;i<n;++i) 
   {
    result_pt[i] = rhs_pt[i];
   }
  
  // Loop over all rows for forward substition
  unsigned long k=0;
  for(unsigned long i=0;i<n;i++)
   {
    unsigned long ip = Index[i];
    double sum = result_pt[ip];
    result_pt[ip] = result_pt[i];
    if(k != 0)
     {
      for(unsigned long j=k-1;j<i;j++) 
       {
        sum -= LU_factors[n*i+j]*result_pt[j];
       }
     }
    else if(sum != 0.0)
     {
      k = i+1;
     }
    result_pt[i] = sum;
   }
 
  //Now do the back substitution
  for (long i=long(n)-1;i>=0;i--)
   {
    double sum = result_pt[i];
    for(long j=i+1;j<long(n);j++) 
     {
      sum -= LU_factors[n*i+j]*result_pt[j];
     }
    result_pt[i] = sum/LU_factors[n*i+i];
   }
 }
 
 //=============================================================================
 /// Do the backsubstitution for the DenseLU solver.
 /// WARNING: this class does not perform any PARANOID checks on the vectors -
 /// these are all performed in the solve(...) method. So, if you call backsub
 /// directly, you have been warned...
 //=============================================================================
 void DenseLU::backsub(const Vector<double> &rhs,
                       Vector<double> &result)
 {
  //Copy the rhs vector into the result vector
  const unsigned long n = rhs.size();
  for(unsigned long i=0;i<n;++i)
   {
    result[i] = rhs[i];
   }
  
  // Loop over all rows for forward substition
  unsigned long k=0;
  for(unsigned long i=0;i<n;i++)
   {
    unsigned long ip = Index[i];
    double sum = result[ip];
    result[ip] = result[i];
    if(k != 0)
     {
      for(unsigned long j=k-1;j<i;j++)
       {
        sum -= LU_factors[n*i+j]*result[j];
       }
     }
    else if(sum != 0.0)
     {
      k = i+1;
     }
    result[i] = sum;
   }
  
   //Now do the back substitution
   for (long i=long(n)-1;i>=0;i--)
    {
     double sum = result[i];
     for(long j=i+1;j<long(n);j++)
      {
       sum -= LU_factors[n*i+j]*result[j];
      }
     result[i] = sum/LU_factors[n*i+i];
    }
 }
 
 
 //=============================================================================
  /// \short Linear-algebra-type solver: Takes pointer to a matrix and rhs 
  /// vector and returns the solution of the linear system. 
 //============================================================================
  void DenseLU::solve(DoubleMatrixBase* const &matrix_pt,
                      const DoubleVector &rhs,
                      DoubleVector &result)
 {
 #ifdef PARANOID
  // check that the rhs vector is not distributed
  if (rhs.distribution_pt()->distributed())
   {
    std::ostringstream error_message_stream;
    error_message_stream 
     << "The vectors rhs and result must not be distributed";
    throw OomphLibError(error_message_stream.str(),
                        OOMPH_CURRENT_FUNCTION,
                        OOMPH_EXCEPTION_LOCATION);
   }
 
  // check that the matrix is square
  if (matrix_pt->nrow() != matrix_pt->ncol())
   {
    std::ostringstream error_message_stream;
    error_message_stream 
     << "The matrix at matrix_pt must be square.";
    throw OomphLibError(error_message_stream.str(),
                        OOMPH_CURRENT_FUNCTION,
                        OOMPH_EXCEPTION_LOCATION);    
   }
  // check that the matrix and the rhs vector have the same nrow()
  if (matrix_pt->nrow() != rhs.nrow())
   {
    std::ostringstream error_message_stream;
    error_message_stream 
     << "The matrix and the rhs vector must have the same number of rows.";
    throw OomphLibError(error_message_stream.str(),
                        OOMPH_CURRENT_FUNCTION,
                        OOMPH_EXCEPTION_LOCATION);
   }
  
  // if the matrix is distributable then it too should have the same 
  // communicator as the rhs vector and should not be distributed
  DistributableLinearAlgebraObject* dist_matrix_pt = 
   dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt);
  if (dist_matrix_pt != 0)
   {
    if (dist_matrix_pt->distribution_pt()->communicator_pt()->nproc() > 1 && 
        dist_matrix_pt->distribution_pt()->distributed() == true)
     {
      throw OomphLibError(
       "Matrix must not be distributed or only one processor",
       OOMPH_CURRENT_FUNCTION,
       OOMPH_EXCEPTION_LOCATION);   
     }
    OomphCommunicator temp_comm(*rhs.distribution_pt()->communicator_pt());
    if (!(temp_comm == *dist_matrix_pt->distribution_pt()->communicator_pt()))
     {
      std::ostringstream error_message_stream;
      error_message_stream 
       << "The matrix matrix_pt must have the same communicator as the vectors"
       << " rhs and result must have the same communicator";
      throw OomphLibError(error_message_stream.str(),
                          OOMPH_CURRENT_FUNCTION,
                          OOMPH_EXCEPTION_LOCATION);
     }
   }
  // if the result vector is setup then check it is not distributed and has 
  // the same communicator as the rhs vector
  if (result.distribution_built())
   {
    if (!(*result.distribution_pt() == *rhs.distribution_pt()))
     {
      std::ostringstream error_message_stream;
      error_message_stream 
       << "The result vector distribution has been setup; it must have the "
       << "same distribution as the rhs vector.";
      throw OomphLibError(error_message_stream.str(),
                          OOMPH_CURRENT_FUNCTION,
                          OOMPH_EXCEPTION_LOCATION);
     }
   }   
 #endif
  
  if (!result.distribution_built())
   {
    result.build(rhs.distribution_pt(),0.0);
   }
  
  // set the distribution
  this->build_distribution(rhs.distribution_pt());
  
  // Time the solver 
  double t_start = TimingHelpers::timer();
  
  // factorise
  factorise(matrix_pt);
  
   // backsubstitute
  backsub(rhs,result);
  
  //Doc time for solver
  double t_end = TimingHelpers::timer();
  
   Solution_time = t_end-t_start;
   if(Doc_time)
    {
     oomph_info << std::endl << "CPU for solve with DenseLU   [sec]: " 
                << Solution_time << std::endl << std::endl;
    }
   
   //If we are not resolving then delete storage
   if(!Enable_resolve) {clean_up_memory();}
 }
  
 //=============================================================================
 /// \short Linear-algebra-type solver: Takes pointer to a matrix and rhs
 /// vector and returns the solution of the linear system.
 //=============================================================================
 void DenseLU::solve(DoubleMatrixBase* const &matrix_pt,
                     const Vector<double> &rhs,
                     Vector<double> &result)
 {
  // Time the solver
  clock_t t_start = clock();
 
  factorise(matrix_pt);
  backsub(rhs,result);
 
  //Doc time for solver
  clock_t t_end = clock();
 
  Solution_time = double(t_end-t_start)/CLOCKS_PER_SEC;
  if(Doc_time)
   {
    oomph_info << "CPU for solve with DenseLU   [sec]: "
               << Solution_time << std::endl;
   }
 
  //If we are not resolving then delete storage
  if(!Enable_resolve) {clean_up_memory();}
 }
 
 //==================================================================
 /// Solver: Takes pointer to problem and returns the results Vector
 /// which contains the solution of the linear system defined by
 /// the problem's residual Vector. (Jacobian assembled by FD).
 //==================================================================
 void FD_LU::solve(Problem* const &problem_pt, DoubleVector &result)
 {
  //Initialise timer
  clock_t t_start = clock();
  
 #ifdef PARANOID
  // if the result vector is setup then check it is not distributed and has 
  // the same communicator as the rhs vector
  if (result.built())
   {
    if (result.distributed())
     {
      std::ostringstream error_message_stream;
      error_message_stream 
       << "The result vector must not be distributed";
      throw OomphLibError(error_message_stream.str(),
                          OOMPH_CURRENT_FUNCTION,
                          OOMPH_EXCEPTION_LOCATION);
     }
   }   
 #endif
 
  //Find # of degrees of freedom
  unsigned long n_dof = problem_pt->ndof();
 
  //Allocate storage for the residuals vector and the jacobian matrix
  DoubleVector residuals;
  DenseDoubleMatrix jacobian(n_dof);
 
  {
   // initialise timer
   clock_t t_start = clock();
   
   //Get the full jacobian by finite differencing)  VERY INEFFICIENT!
   problem_pt->get_fd_jacobian(residuals,jacobian);
   
   // compute jacobian setup time
   clock_t t_end = clock();
   Jacobian_setup_time = double(t_end-t_start)/CLOCKS_PER_SEC;
 
   //Report the time
   if(Doc_time)
    {
     oomph_info << std::endl << "CPU for setup of Dense Jacobian [sec]: " 
                << Jacobian_setup_time << std::endl << std::endl;
    }
  }
 
  //Solve by dense LU decomposition (not efficient)
   solve(&jacobian,residuals,result);
 
   //Set the sign of the determinant of the jacobian
   problem_pt->sign_of_jacobian() = Sign_of_determinant_of_matrix;
   
   // Finalise/doc timings
   clock_t t_end = clock();
   double total_time=double(t_end-t_start)/CLOCKS_PER_SEC;
   if(Doc_time)
    {
     oomph_info << "CPU for FD DenseLU LinearSolver [sec]: " 
                << total_time << std::endl << std::endl;
    }
 }
 
 
 //===================================================================
 // Interface to SuperLU wrapper
 //===================================================================
 extern "C"
 {
  int superlu(int *, int *, int *, int *,
              double *, int *, int *,
              double *, int *,  int *, int *,
              void*, int *);
 }
 
 
 #ifdef OOMPH_HAS_MPI
 
 //===================================================================
 // Interface to SuperLU_DIST wrapper
 //===================================================================
 extern "C"
 {
                                       
  // Interface to distributed SuperLU solver where each processor 
  // holds the entire matrix
  void superlu_dist_global_matrix(int opt_flag, int allow_permutations,
                                  int n, int nnz, double *values, 
                                  int *row_index, int *col_start, 
                                  double *b, int nprow, int npcol, 
                                  int doc, void **data, int *info,
                                  MPI_Comm comm);
  
  // Interface to distributed SuperLU solver where each processor 
  // holds part of the matrix
  void superlu_dist_distributed_matrix(int opt_flag, int allow_permutations,
                                       int n, int nnz_local,
                                       int nrow_local, int first_row, 
                                       double *values, int *col_index, 
                                       int *row_start, double *b,
                                       int nprow, int npcol, 
                                       int doc, void **data, int *info,
                                       MPI_Comm comm);
 
 // helper method - just calls the superlu method dCompRow_to_CompCol to convert
 // the c-style vectors of a cr matrix to a cc matrix
  void superlu_cr_to_cc(int nrow, int ncol, int nnz, double* cr_values,
                        int* cr_index, int* cr_start, double** cc_values,
                        int** cc_index, int** cc_start);
 
 }
 #endif
 
 
 //=============================================================================
 /// Solver: Takes pointer to problem and returns the results Vector
 /// which contains the solution of the linear system defined by
 /// the problem's fully assembled Jacobian and residual Vector.
 //=============================================================================
 void SuperLUSolver::solve(Problem* const &problem_pt, DoubleVector &result)
 {
  // wipe memory
  this->clean_up_memory();
 
 #ifdef OOMPH_HAS_MPI
  // USING SUPERLU DIST
  /////////////////////
  if (Solver_type == Distributed || 
      (Solver_type == Default && problem_pt->communicator_pt()->nproc() > 1))
   {
    // init the timers
    double t_start = TimingHelpers::timer();
 
    // number of dofs
    unsigned n_dof = problem_pt->ndof();
 
    // set the distribution
    LinearAlgebraDistribution dist(problem_pt->communicator_pt(),n_dof,
                                   !Dist_use_global_solver);
    this->build_distribution(dist);
 
    // Take a copy of Delete_matrix_data
    bool copy_of_Delete_matrix_data = Dist_delete_matrix_data;
  
    // Set Delete_matrix to true
    Dist_delete_matrix_data = true;
  
    // Use the distributed version of SuperLU_DIST?
    if (!Dist_use_global_solver)
     {
      // Initialise timer
      double t_start = TimingHelpers::timer();
      
      // Storage for the residuals vector
      DoubleVector residuals(this->distribution_pt(),0.0);
      
      // Get the sparse jacobian and residuals of the problem
      CRDoubleMatrix jacobian(this->distribution_pt());
      problem_pt->get_jacobian(residuals, jacobian);
      
      // Doc time for setup
      double t_end = TimingHelpers::timer();
      Jacobian_setup_time = t_end-t_start;
      if (Doc_time)
       {
        oomph_info << "Time to set up CRDoubleMatrix Jacobian [sec]        : "
                   << Jacobian_setup_time << std::endl;
       }
      
      //Now call the linear algebra solve, if desired
      if(!Suppress_solve) 
       {
        //If the distribution of the result has been build and 
        //does not match that of
        //the solver then redistribute before the solve and return
        //to the incoming distribution afterwards.
        if((result.built()) && 
           (!(*result.distribution_pt() == *this->distribution_pt())))
         {
          LinearAlgebraDistribution 
           temp_global_dist(result.distribution_pt());       
          result.build(this->distribution_pt(),0.0);
          solve(&jacobian,residuals,result);
          result.redistribute(&temp_global_dist);
         }
        else
         {
          solve(&jacobian,residuals,result);
         }
       }
     }
    //Otherwise its the global solve version
    else
     {
      // Storage for the residuals vector
      // A non-distriubted residuals vector
      LinearAlgebraDistribution dist(problem_pt->communicator_pt(),
                                     problem_pt->ndof(),
                                     false);
      DoubleVector residuals(&dist,0.0);
      CRDoubleMatrix jacobian(&dist);
      
      //Get the sparse jacobian and residuals of the problem
      problem_pt->get_jacobian(residuals, jacobian);
      
      // Doc time for setup
      double t_end = TimingHelpers::timer();
      Jacobian_setup_time = t_end-t_start;
      if (Doc_time)
       {
        oomph_info << "Time to set up CR Jacobian [sec]   : "
                   << Jacobian_setup_time << std::endl;
       }
      
      //Now call the linear algebra solve, if desired
      if(!Suppress_solve) 
       {
        //If the result distribution has been built and 
        //does not match the global distribution
        //the redistribute before the solve and then return to the 
        //distributed version afterwards
        if((result.built()) &&  (!(*result.distribution_pt() == dist)))
         {
          LinearAlgebraDistribution 
           temp_global_dist(result.distribution_pt());       
          result.build(&dist,0.0);
          solve(&jacobian,residuals,result);
          result.redistribute(&temp_global_dist);
         }
        else
         {
          solve(&jacobian,residuals,result);
         }
       }
     }
    // Set Delete_matrix back to original value
    Dist_delete_matrix_data = copy_of_Delete_matrix_data;
   }
    
  // OTHERWISE WE ARE USING SUPERLU (SERIAL)
  //////////////////////////////////////////
  else
 #endif
   {
  
    // set the solver distribution
    LinearAlgebraDistribution dist(problem_pt->communicator_pt(),
                                   problem_pt->ndof(),false);
    this->build_distribution(dist);
       
    //Allocate storage for the residuals vector
    DoubleVector residuals(dist,0.0);
    
    // Use the compressed row version?
    if(Serial_compressed_row_flag)
     {
      
      // Initialise timer
      double t_start = TimingHelpers::timer();
      
      //Get the sparse jacobian and residuals of the problem
      CRDoubleMatrix CR_jacobian(this->distribution_pt());
      problem_pt->get_jacobian(residuals,CR_jacobian);
 
      // If we want to compute the gradient for the globally convergent
      // Newton method, then do it here
      if(Compute_gradient)
       {
        // Compute it
        CR_jacobian.multiply_transpose(residuals,
                                       Gradient_for_glob_conv_newton_solve);
        // Set the flag
        Gradient_has_been_computed=true;
       }
 
      // Doc time for setup
      double t_end = TimingHelpers::timer();
      Jacobian_setup_time = t_end-t_start;
      if(Doc_time)
       {
        oomph_info << std::endl 
                   << "Time to set up CRDoubleMatrix Jacobian [sec]: " 
                   << Jacobian_setup_time << std::endl;
       }
      
      //Now call the linear algebra solve, if desired
      if(!Suppress_solve) 
       {
        //If the result vector is built and distributed
        //then need to redistribute into the same form as the
        //RHS (non-distributed)
        if((result.built()) &&
           (!(*result.distribution_pt() == *this->distribution_pt())))
         {
          LinearAlgebraDistribution 
           temp_global_dist(result.distribution_pt());       
          result.build(this->distribution_pt(),0.0);
          solve(&CR_jacobian,residuals,result);
          result.redistribute(&temp_global_dist);
         }
        //Otherwise just solve
        else
         {
          solve(&CR_jacobian,residuals,result);
         }
       }
     }
    //Otherwise its the compressed column version
    else
     {
      // Initialise timer
      double t_start = TimingHelpers::timer();
      
      //Get the sparse jacobian and residuals of the problem
      CCDoubleMatrix CC_jacobian;
      problem_pt->get_jacobian(residuals,CC_jacobian);
 
      // If we want to compute the gradient for the globally convergent
      // Newton method, then do it here
      if(Compute_gradient)
       {
        // Compute it
        CC_jacobian.multiply_transpose(residuals,
                                       Gradient_for_glob_conv_newton_solve);
        // Set the flag
        Gradient_has_been_computed=true;
       }
      
      // Doc time for setup
      double t_end = TimingHelpers::timer();
      Jacobian_setup_time=t_end-t_start;
      if(Doc_time)
       {
        oomph_info << "\nTime to set up CCDoubleMatrix Jacobian [sec]: " 
                   <<  Jacobian_setup_time << std::endl;
       }
      
      //Now call the linear algebra solve, if desired
      if(!Suppress_solve) 
       {
        //If the result vector is built and distributed
        //then need to redistribute into the same form as the
        //RHS
        if((result.built()) && 
           (!(*result.distribution_pt() == *this->distribution_pt())))
         {
          LinearAlgebraDistribution 
           temp_global_dist(result.distribution_pt());       
          result.build(this->distribution_pt(),0.0);
          solve(&CC_jacobian,residuals,result);
          result.redistribute(&temp_global_dist);
         }
        //Otherwise just solve
        else
         {
          solve(&CC_jacobian,residuals,result);
         }
       }
     }
    
    //Set the sign of the jacobian 
    //(this is computed in the LU decomposition phase)
    problem_pt->sign_of_jacobian() = Serial_sign_of_determinant_of_matrix;
   }
 }
 
 //=========================================================================
 /// Linear-algebra-type solver: Takes pointer to a matrix and rhs 
 /// vector and returns the solution of the linear system. Problem pointer 
 /// defaults to NULL and can be omitted. The function returns the global 
 /// result Vector.
 /// Note: if Delete_matrix_data is true the function 
 /// matrix_pt->clean_up_memory() will be used to wipe the matrix data.
 //=========================================================================
 void SuperLUSolver::solve(DoubleMatrixBase* const &matrix_pt,
                          const DoubleVector &rhs,
                          DoubleVector &result)
 {
  // Initialise timer
  double t_start = TimingHelpers::timer(); 
 
 #ifdef PARANOID
  // check that the rhs vector is setup
  if (!rhs.built())
   {
    std::ostringstream error_message_stream;
    error_message_stream 
     << "The vectors rhs must be setup";
    throw OomphLibError(error_message_stream.str(),
                        OOMPH_CURRENT_FUNCTION,
                        OOMPH_EXCEPTION_LOCATION);
   }
 
  // check that the matrix is square
  if (matrix_pt->nrow() != matrix_pt->ncol())
   {
    std::ostringstream error_message_stream;
    error_message_stream 
     << "The matrix at matrix_pt must be square.";
    throw OomphLibError(error_message_stream.str(),
                        OOMPH_CURRENT_FUNCTION,
                        OOMPH_EXCEPTION_LOCATION);    
   }
 
  // check that the matrix has some entries, and so has a values_pt that
  // makes sense (only for CR because CC is never used I think dense
  // matrices will be safe since they don't use a values pointer).
  CRDoubleMatrix* cr_pt = dynamic_cast<CRDoubleMatrix*>(matrix_pt);
  if (cr_pt != 0)
   {
    if (cr_pt->nnz() == 0)
     {
      std::ostringstream error_message_stream;
      error_message_stream
       << "Attempted to call SuperLu on a CRDoubleMatrix with no entries, "
       << "SuperLU would segfault (because the values array pt is "
       << "uninitialised or null).";
      throw OomphLibError(error_message_stream.str(),
                          OOMPH_CURRENT_FUNCTION,
                          OOMPH_EXCEPTION_LOCATION);
     }
   }
 
  // check that the matrix and the rhs vector have the same nrow()
  if (matrix_pt->nrow() != rhs.nrow())
   {
    std::ostringstream error_message_stream;
    error_message_stream 
     << "The matrix and the rhs vector must have the same number of rows.";
    throw OomphLibError(error_message_stream.str(),
                        OOMPH_CURRENT_FUNCTION,
                        OOMPH_EXCEPTION_LOCATION);
   }
  
  // if the matrix is distributable then should have the same distribution
  // as the rhs vector
  DistributableLinearAlgebraObject* dist_matrix_pt = 
   dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt);
  if (dist_matrix_pt != 0)
   {
    if (!(*dist_matrix_pt->distribution_pt() == *rhs.distribution_pt()))
     {
      std::ostringstream error_message_stream;
      error_message_stream 
       << "The matrix matrix_pt must have the same distribution as the "
       << "rhs vector.";
      throw OomphLibError(error_message_stream.str(),
                          OOMPH_CURRENT_FUNCTION,
                          OOMPH_EXCEPTION_LOCATION);
     }
   }
  // if the matrix is not distributable then it the rhs vector should not be
  // distributed
  else
   {
    if (rhs.distribution_pt()->distributed())
     {
      std::ostringstream error_message_stream;
      error_message_stream 
       << "The matrix (matrix_pt) is not distributable and therefore the rhs"
       << " vector must not be distributed";
      throw OomphLibError(error_message_stream.str(),
                          OOMPH_CURRENT_FUNCTION,
                          OOMPH_EXCEPTION_LOCATION);
     }
   }
  // if the result vector is setup then check it has the same distribution
  // as the rhs
  if (result.built())
   {
    if (!(*result.distribution_pt() == *rhs.distribution_pt()))
     {
      std::ostringstream error_message_stream;
      error_message_stream 
       << "The result vector distribution has been setup; it must have the "
       << "same distribution as the rhs vector.";
      throw OomphLibError(error_message_stream.str(),
                          OOMPH_CURRENT_FUNCTION,
                          OOMPH_EXCEPTION_LOCATION);
     }
   }   
 #endif
 
  // set the distribution
  if (dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt))
   {
    // the solver has the same distribution as the matrix if possible
    this->build_distribution(dynamic_cast<DistributableLinearAlgebraObject*>
                             (matrix_pt)->distribution_pt());
   }
  else
   {
    // the solver has the same distribution as the RHS
    this->build_distribution(rhs.distribution_pt());
   }
 
  //Factorise the matrix
  factorise(matrix_pt);
  
  //Now do the back solve
  backsub(rhs,result);
 
  // Doc time for solve
  double t_end = TimingHelpers::timer(); 
  Solution_time = t_end-t_start; 
  if (Doc_time)
   {
    oomph_info << "Time for SuperLUSolver solve [sec]       : "
               << t_end-t_start << std::endl;
   }
 
  // If we are not storing the solver data for resolves, delete it
  if (!Enable_resolve) 
   {
    clean_up_memory();
   }
 }
 
 //===============================================================
 /// Resolve the system for a given RHS
 //===============================================================
 void SuperLUSolver::resolve(const DoubleVector &rhs, 
                             DoubleVector &result)
 {
  // Store starting time for solve
  double t_start = TimingHelpers::timer();
  
  // backsub
  backsub(rhs,result);
  
  // Doc time for solve
  double t_end = TimingHelpers::timer();
  Solution_time = t_end-t_start;
  if (Doc_time)
   {
    oomph_info << "Time for SuperLUSolver solve [sec]: "
                 << t_end-t_start << std::endl;
   }
 }
 
 //===================================================================
 ///\short LU decompose the matrix addressed by matrix_pt by using
 /// the SuperLU solver. The resulting matrix factors are stored 
 /// internally.
 //===================================================================
 void SuperLUSolver::factorise(DoubleMatrixBase* const &matrix_pt)
 {
  // wipe memory
  this->clean_up_memory();
 
  // if we have mpi and the solver is distributed or default and nproc
  // gt 1
 #ifdef OOMPH_HAS_MPI
  DistributableLinearAlgebraObject* dist_matrix_pt = 
   dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt);
  unsigned nproc = 1;
  if (dist_matrix_pt != 0)
   {
    nproc = dist_matrix_pt->distribution_pt()->communicator_pt()->nproc();
   }
  if (Solver_type == Distributed || 
      (Solver_type == Default && nproc > 1 && 
       MPI_Helpers::mpi_has_been_initialised()))
   {
   
    // if the matrix is a distributed linear algebra object then use SuperLU
    // dist
    if (dist_matrix_pt != 0)
     {
      factorise_distributed(matrix_pt);
      Using_dist = true;
     }
    else
     {
      factorise_serial(matrix_pt);
      Using_dist = false;
     }
   }
  else
 #endif
   {
    factorise_serial(matrix_pt);
    Using_dist = false;
   }
 }
 
 #ifdef OOMPH_HAS_MPI
 //=============================================================================
 /// LU decompose the matrix addressed by matrix_pt using
 /// the SuperLU_DIST solver. The resulting matrix factors are stored 
 /// internally.
 //=============================================================================
 void SuperLUSolver::factorise_distributed(DoubleMatrixBase* const &matrix_pt)
 {
  //Check that we have a square matrix
 #ifdef PARANOID
  int m = matrix_pt->ncol();
  int n = matrix_pt->nrow();
  if(n != m)
   {
    std::ostringstream error_message_stream;
    error_message_stream << "Can only solve for square matrices\n" 
                         << "N, M " << n << " " << m << std::endl;
    
    throw OomphLibError(error_message_stream.str(),
                        OOMPH_CURRENT_FUNCTION,
                        OOMPH_EXCEPTION_LOCATION);
   }
 #endif
 
  // number of processors
  unsigned nproc = MPI_Helpers::communicator_pt()->nproc();
  if(dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt) != 0)
   {
    nproc = dynamic_cast<DistributableLinearAlgebraObject*>
     (matrix_pt)->distribution_pt()->communicator_pt()->nproc();
   }
 
  // Find number of rows and columns for the process grid
  // First guess at number of rows:
  int nprow=int(sqrt(double(nproc)));
 
  // Does this evenly divide the processor grid?
  while (nprow>1)
   {
    if (nproc%nprow==0) break;
    nprow-=1;
   }
    
  // Store Number of rows/columns for process grid
  Dist_nprow=nprow;
  Dist_npcol=nproc/Dist_nprow;
 
  // Make sure any existing factors are deleted
  clean_up_memory();
   
  // Doc (0/1) = (true/false)
  int doc = !Doc_stats;
  
  // Rset Info
  Dist_info=0;
  
  // Flag for row and column permutations
  int allow_permutations = Dist_allow_row_and_col_permutations;
 
  // Is it a DistributedCRDoubleMatrix?
  if(dynamic_cast<CRDoubleMatrix*>(matrix_pt) != 0)
   {
    // Get a cast pointer to the matrix
    CRDoubleMatrix* cr_matrix_pt = dynamic_cast<CRDoubleMatrix*>(matrix_pt);
 
    //Get the distribution from the matrix
    this->build_distribution(cr_matrix_pt->distribution_pt());
 
 #ifdef PARANOID
    // paranoid check that the matrix has been setup
    if (!cr_matrix_pt->built())
     {
      throw OomphLibError
       ("To apply SuperLUSolver to a CRDoubleMatrix - it must be built",
        OOMPH_CURRENT_FUNCTION,OOMPH_EXCEPTION_LOCATION);
     }
 #endif
    
    // if the matrix is distributed then setup setup superlu dist distributed
    if (cr_matrix_pt->distributed())
     {
      // Find the number of non-zero entries in the matrix
      const int nnz_local = int(cr_matrix_pt->nnz());
      
      // Set up the pointers to the matrix.
      // NOTE: these arrays (accessed via value_pt, index_pt and
      // start_pt) may be modified by the SuperLU_DIST routines, and so 
      // a copy must be taken if the matrix is to be preserved.
      
      // Copy values
      Dist_value_pt = new double[nnz_local];
      double* matrix_value_pt = cr_matrix_pt->value();
      for(int i=0;i<nnz_local;i++) 
       {
        Dist_value_pt[i] = matrix_value_pt[i];
        }
      
      // Copy column indices
      Dist_index_pt = new int[nnz_local];
      int* matrix_index_pt = cr_matrix_pt->column_index();
      for (int i=0; i<nnz_local; i++)
       {
        Dist_index_pt[i] = matrix_index_pt[i];
       }
      
      // Copy row starts
      int nrow_local = cr_matrix_pt->nrow_local();
      Dist_start_pt = new int[nrow_local+1];
      int* matrix_start_pt = cr_matrix_pt->row_start();
      for (int i=0; i<=nrow_local; i++)
       {
        Dist_start_pt[i] = matrix_start_pt[i];
       }
      
      // cache
      int ndof = cr_matrix_pt->distribution_pt()->nrow();
      int first_row = cr_matrix_pt->first_row();
 
      // Now delete the matrix if we are allowed
      if (Dist_delete_matrix_data==true)
       {
        cr_matrix_pt->clear();
       }
 
      // Factorize
      superlu_dist_distributed_matrix(1, allow_permutations,
                                      ndof, nnz_local, nrow_local, 
                                      first_row, Dist_value_pt, Dist_index_pt, 
                                      Dist_start_pt, 0, Dist_nprow, Dist_npcol, 
                                      doc,&Dist_solver_data_pt, &Dist_info,
                                      this->distribution_pt()->
                                      communicator_pt()->mpi_comm());
    
      // Record that data is stored
      Dist_distributed_solve_data_allocated=true;
     }
    // else the CRDoubleMatrix is not distributed
    else
     {
      // Find the number of non-zero entries in the matrix
      const int nnz = int(cr_matrix_pt->nnz());  
 
      // cache the number of rows
      int nrow = cr_matrix_pt->nrow();
      
      // Set up the pointers to the matrix.
      // NOTE: these arrays (accessed via value_pt, index_pt and
      // start_pt) may be modified by the SuperLU_DIST routines, and so 
      // a copy must be taken if the matrix is to be preserved.
 
      // create the corresponing cc matrix
      superlu_cr_to_cc(nrow,nrow,nnz,cr_matrix_pt->value(),
                       cr_matrix_pt->column_index(),
                       cr_matrix_pt->row_start(),
                       &Dist_value_pt,&Dist_index_pt,&Dist_start_pt);
 
      // Delete the matrix if we are allowed
      if (Dist_delete_matrix_data==true)
       {
        cr_matrix_pt->clear();
       }
      
      // do the factorization
      superlu_dist_global_matrix(1, allow_permutations,
                                 nrow, nnz, Dist_value_pt, Dist_index_pt, 
                                 Dist_start_pt, 
                                 0, Dist_nprow, Dist_npcol, doc, 
                                 &Dist_solver_data_pt, &Dist_info,
                                 this->distribution_pt()
                                 ->communicator_pt()->mpi_comm());
      
      // Record that data is stored
      Dist_global_solve_data_allocated=true;
     }
   }
 
  // Or is it a CCDoubleMatrix?
 else if(dynamic_cast<CCDoubleMatrix*>(matrix_pt))
   {
    // Get a cast pointer to the matrix
    CCDoubleMatrix* serial_matrix_pt = dynamic_cast<CCDoubleMatrix*>(matrix_pt);
    
    // Find the number of non-zero entries in the matrix
    const int nnz = int(serial_matrix_pt->nnz());  
 
    // Find # of degrees of freedom (variables)
    int ndof = int(serial_matrix_pt->nrow());
 
    // Find the local number of degrees of freedom in the linear system
    int ndof_local = ndof;
 
    // Set up the pointers to the matrix.
    // NOTE: these arrays (accessed via value_pt, index_pt and
    // start_pt) may be modified by the SuperLU_DIST routines, and so 
    // a copy must be taken if the matrix is to be preserved.
 
    // Copy values
    Dist_value_pt = new double[nnz];
    double* matrix_value_pt = serial_matrix_pt->value();
    for(int i=0;i<nnz;i++) 
     {
      Dist_value_pt[i] = matrix_value_pt[i];
     }
    
    // copy row indices
    Dist_index_pt = new int[nnz];
    int* matrix_index_pt = serial_matrix_pt->row_index();
    for (int i=0; i<nnz; i++)
     {
      Dist_index_pt[i] = matrix_index_pt[i];
     }
    
    // copy column starts
    Dist_start_pt = new int[ndof_local+1];
    int* matrix_start_pt = serial_matrix_pt->column_start();
    for (int i=0; i<=ndof_local; i++)
     {
      Dist_start_pt[i] = matrix_start_pt[i];
     }
 
    // Delete the matrix if we are allowed
    if (Dist_delete_matrix_data==true)
     {
      serial_matrix_pt->clean_up_memory();
     }
    
    // do the factorization
    superlu_dist_global_matrix(1, allow_permutations,
                               ndof, nnz, Dist_value_pt, Dist_index_pt, 
                               Dist_start_pt, 0, Dist_nprow, Dist_npcol, doc, 
                               &Dist_solver_data_pt, &Dist_info,
                               this->distribution_pt()
                               ->communicator_pt()->mpi_comm());
    
    // Record that data is stored
    Dist_global_solve_data_allocated=true;
   }
  // Otherwise throw an error
  else
   {
    std::ostringstream error_message_stream;
    error_message_stream << "SuperLUSolver implemented only for "
                         << " CCDoubleMatrix, CRDoubleMatrix\n"
                         << "and DistributedCRDoubleMatrix matrices\n";
    throw OomphLibError(error_message_stream.str(),
                        OOMPH_CURRENT_FUNCTION,
                        OOMPH_EXCEPTION_LOCATION);
   }
 
    // Throw an error if superLU returned an error status in info.
    if(Dist_info != 0)
     {
      std::ostringstream error_msg;
      error_msg << "SuperLU returned the error status code "
                << Dist_info
                << " . See the SuperLU documentation for what this means.";
      throw OomphLibError(error_msg.str(),
                          OOMPH_CURRENT_FUNCTION,
                          OOMPH_EXCEPTION_LOCATION);
     }
 }
 #endif
 
 //===================================================================
 ///\short LU decompose the matrix addressed by matrix_pt by using
 /// the SuperLU solver. The resulting matrix factors are stored 
 /// internally.
 //===================================================================
 void SuperLUSolver::factorise_serial(DoubleMatrixBase* const &matrix_pt)
 {
 #ifdef PARANOID
  // PARANOID check that if the matrix is distributable then it should not be 
  // then it should not be distributed
  if (dynamic_cast<DistributableLinearAlgebraObject*>(matrix_pt) != 0)
   {
    if (dynamic_cast<DistributableLinearAlgebraObject*>
        (matrix_pt)->distributed())
     {
      std::ostringstream error_message_stream;                         
      error_message_stream                                        
       << "The matrix must not be distributed.";  
      throw OomphLibError(error_message_stream.str(),
                          OOMPH_CURRENT_FUNCTION,             
                          OOMPH_EXCEPTION_LOCATION);        
     }
   }
 #endif
 
  //Find # of degrees of freedom (variables)
  int n = matrix_pt->nrow();
  
  //Check that we have a square matrix
 #ifdef PARANOID
  int m = matrix_pt->ncol();
  if(n != m)
   {
    std::ostringstream error_message_stream;
    error_message_stream << "Can only solve for square matrices\n" 
                         << "N, M " << n << " " << m << std::endl;
    
    throw OomphLibError(error_message_stream.str(),
                        OOMPH_CURRENT_FUNCTION,
                        OOMPH_EXCEPTION_LOCATION);
   }
 #endif
  
  //Storage for the values, rows and column indices
  //required by SuplerLU
  double *value = 0;
  int *index=0, *start=0;
  
  //Integer used to represent compressed row or column format
  //Default compressed row
  int transpose = 0;
  
  //Number of non-zero entries in the matrix
  int nnz = 0;
  
  // Doc flag (convert to int for SuperLU)
  int doc = Doc_stats;
  
  //Is it a CR matrix
  if(dynamic_cast<CRDoubleMatrix*>(matrix_pt))
   {
    //Set the appropriate row flags
    Serial_compressed_row_flag=true;
    transpose = 1;
    //Get a cast pointer to the matrix
    CRDoubleMatrix* CR_matrix_pt = dynamic_cast<CRDoubleMatrix*>(matrix_pt);
    
    //Now set the pointers to the interanally stored values
    //and indices
    nnz = CR_matrix_pt->nnz();
    value = CR_matrix_pt->value();
    index = CR_matrix_pt->column_index();
    start = CR_matrix_pt->row_start();
   }
  //Otherwise is it the compressed column version?
  else if(dynamic_cast<CCDoubleMatrix*>(matrix_pt))
   {
    //Set the compressed row flag to false
    Serial_compressed_row_flag=false;
    //Get a cast pointer to the matrix
    CCDoubleMatrix* CC_matrix_pt = dynamic_cast<CCDoubleMatrix*>(matrix_pt);
    
    //Now set the pointers to the interanally stored values
    //and indices
    nnz = CC_matrix_pt->nnz();
    value = CC_matrix_pt->value();
    index = CC_matrix_pt->row_index();
    start = CC_matrix_pt->column_start();
   }
  //Otherwise throw and error
  else
   {
    throw OomphLibError("SuperLU only works with CR or CC Double matrices",
                        OOMPH_CURRENT_FUNCTION,
                        OOMPH_EXCEPTION_LOCATION);
   }
  
  // Clean up any previous storage so that if this is called twice with
  // the same matrix, we don't get a memory leak
  clean_up_memory();
  
  //Perform the lu decompose phase (i=1)
  int i=1;
  Serial_sign_of_determinant_of_matrix =  superlu(&i, &n, &nnz,  0,
                                                  value, index, start,
                                                  0, &n,  &transpose, &doc,
                                                  &Serial_f_factors, 
                                                  &Serial_info);
 
  // Throw an error if superLU returned an error status in info.
  if(Serial_info != 0)
   {
    std::ostringstream error_msg;
    error_msg << "SuperLU returned the error status code "
              << Serial_info
              << " . See the SuperLU documentation for what this means.";
    throw OomphLibError(error_msg.str(),
                        OOMPH_CURRENT_FUNCTION,
                        OOMPH_EXCEPTION_LOCATION);
   }
 
 
  //Set the number of degrees of freedom in the linear system
  Serial_n_dof = n;
 }
 
 //=============================================================================
 /// Do the backsubstitution for SuperLUSolver. 
 /// Note - this method performs no paranoid checks - these are all performed in
 /// solve(...) and resolve(...)
 //=============================================================================
 void SuperLUSolver::backsub(const DoubleVector &rhs,
                            DoubleVector &result)
 {
 #ifdef OOMPH_HAS_MPI
  if (Using_dist)
   {
    backsub_distributed(rhs,result);
   }
  else
 #endif
   {
    backsub_serial(rhs,result);
   }
 }
 
 #ifdef OOMPH_HAS_MPI
 //=========================================================================
 ///Static warning to suppress warnings about incorrect distribution of
 ///RHS vector. Default is false
 //=========================================================================
 bool SuperLUSolver::Suppress_incorrect_rhs_distribution_warning_in_resolve
                   =false;
 
 //=============================================================================
 /// Do the backsubstitution for SuperLU solver. 
 /// Note - this method performs no paranoid checks - these are all performed in
 /// solve(...) and resolve(...)
 //=============================================================================
 void SuperLUSolver::backsub_distributed(const DoubleVector &rhs,
                                         DoubleVector &result)
 {
 #ifdef PARANOID
  // check that the rhs vector is setup
  if (!rhs.distribution_pt()->built())
   {
    std::ostringstream error_message_stream;
    error_message_stream 
     << "The vectors rhs must be setup";
    throw OomphLibError(error_message_stream.str(),
                        OOMPH_CURRENT_FUNCTION,
                        OOMPH_EXCEPTION_LOCATION);
   }
 #endif
  // check that the rhs distribution is the same as the distribution as this 
  // solver. If not redistribute and issue a warning
  LinearAlgebraDistribution rhs_distribution(rhs.distribution_pt());
  if (!(*rhs.distribution_pt() == *this->distribution_pt()))
   {
    if(!Suppress_incorrect_rhs_distribution_warning_in_resolve)
     {
      std::ostringstream warning_stream;
      warning_stream 
       << "The distribution of rhs vector does not match that ofthe solver.\n";
      warning_stream
       << "The rhs will be redistributed, which is likely to  be inefficient\n";
      warning_stream
       << "To remove this warning you can either:\n"
       << "    i) Ensure that the rhs vector has the correct distribution\n"
     << "       before calling the resolve() function\n"
       << "or ii) Set the flag \n"
       << " SuperLUSolver::Suppress_incorrect_rhs_distribution_warning_in_resolve\n"
       << "       to be true\n\n";
      
      OomphLibWarning(warning_stream.str(),
                      "SuperLUSolver::resolve()",
                      OOMPH_EXCEPTION_LOCATION);
     }
 
    //Have to cast away const-ness (which tells us that we shouldn't really
    //be doing this!)
    const_cast<DoubleVector&>(rhs).redistribute(this->distribution_pt());
   }
  
 #ifdef PARANOID
  // if the result vector is setup then check it has the same distribution
  // as the rhs
  if (result.distribution_built())
   {
    if (!(*result.distribution_pt() == *rhs.distribution_pt()))
     {
      std::ostringstream error_message_stream;
      error_message_stream 
       << "The result vector distribution has been setup; it must have the "
       << "same distribution as the rhs vector.";
      throw OomphLibError(error_message_stream.str(),
                          OOMPH_CURRENT_FUNCTION,
                          OOMPH_EXCEPTION_LOCATION);
     }
   }   
 #endif
  // Doc (0/1) = (true/false)
  int doc = !Doc_stats;
  
  // Reset Info
  Dist_info=0;
 
  // number of DOFs
  int ndof = this->distribution_pt()->nrow();
 
  // Copy the rhs values to result
  result = rhs;
  
  // Do the backsubsitition phase
  if (Dist_distributed_solve_data_allocated)
   {
    // Call distributed solver
    superlu_dist_distributed_matrix(2, -1, ndof, 0, 0, 0, 0, 0, 0, 
                                    result.values_pt(), Dist_nprow, 
                                    Dist_npcol, doc, 
                                    &Dist_solver_data_pt, &Dist_info,
                                    this->distribution_pt()->
                                    communicator_pt()->mpi_comm());
   }
  else if (Dist_global_solve_data_allocated)
   {
    // Call global solver
    superlu_dist_global_matrix(2, -1, ndof, 0, 0, 0, 0, result.values_pt(),
                               Dist_nprow, Dist_npcol, doc, 
                               &Dist_solver_data_pt, &Dist_info,
                               this->distribution_pt()->communicator_pt()->mpi_comm());
   }
  else
   {
    throw OomphLibError("The matrix factors have not been stored",
                        OOMPH_CURRENT_FUNCTION,
                        OOMPH_EXCEPTION_LOCATION);
   }
 
  // Throw an error if superLU returned an error status in info.
  if(Dist_info != 0)
   {
    std::ostringstream error_msg;
    error_msg << "SuperLU returned the error status code "
              << Dist_info << " . See the SuperLU documentation for what this means.";
    throw OomphLibError(error_msg.str(),
                        OOMPH_CURRENT_FUNCTION,
                        OOMPH_EXCEPTION_LOCATION);
   }
 
  //Redistribute to original distribution
  //Have to cast away const-ness (which tells us that we shouldn't really
  //be doing this!)
  const_cast<DoubleVector&>(rhs).redistribute(&rhs_distribution);
 }
 #endif
 
 //================================================================
 /// Do the backsubstitution for SuperLU
 //================================================================
 void SuperLUSolver::backsub_serial(const DoubleVector &rhs,
                                    DoubleVector &result)
 {
  //Find the number of unknowns
  int n = rhs.nrow();
 
 #ifdef PARANOID
  // PARANOID check that this rhs distribution is setup
  if (!rhs.built())
   {
    std::ostringstream error_message_stream;                           
    error_message_stream                                        
     << "The rhs vector distribution must be setup.";               
    throw OomphLibError(error_message_stream.str(),
                        OOMPH_CURRENT_FUNCTION,
                        OOMPH_EXCEPTION_LOCATION);         
   }
  // PARANOID check that the rhs has the right number of global rows
  if(static_cast<int>(Serial_n_dof) != n)
   {
    throw OomphLibError(
     "RHS does not have the same dimension as the linear system",
     OOMPH_CURRENT_FUNCTION,
     OOMPH_EXCEPTION_LOCATION);
   }
  // PARANOID check that the rhs is not distributed
  if (rhs.distribution_pt()->distributed())
   {
    std::ostringstream error_message_stream;                           
    error_message_stream                                        
     << "The rhs vector must not be distributed.";               
    throw OomphLibError(error_message_stream.str(),
                        OOMPH_CURRENT_FUNCTION,
                        OOMPH_EXCEPTION_LOCATION);         
   }
  // PARANOID check that if the result is setup it matches the distribution
  // of the rhs
  if (result.built())
   {
    if (!(*rhs.distribution_pt() == *result.distribution_pt()))
     {
      std::ostringstream error_message_stream;                           
      error_message_stream                                        
       << "If the result distribution is setup then it must be the same as the "
       << "rhs distribution";               
      throw OomphLibError(error_message_stream.str(),
                          OOMPH_CURRENT_FUNCTION,
                          OOMPH_EXCEPTION_LOCATION);         
     }
   }
 #endif
 
  // copy result to rhs
  result=rhs;
  
  //Number of RHSs
  int nrhs=1;
 
  //Cast the boolean flags to ints for SuperLU
  int transpose = Serial_compressed_row_flag;
  int doc = Doc_stats;
 
  //Do the backsubsitition phase
  int i=2;
  superlu(&i, &n, 0,  &nrhs,
          0, 0, 0,
          result.values_pt(), &n,  &transpose, &doc,
          &Serial_f_factors, &Serial_info);
 
  // Throw an error if superLU returned an error status in info.
  if(Serial_info != 0)
    {
     std::ostringstream error_msg;
     error_msg << "SuperLU returned the error status code "
               << Serial_info
               << " . See the SuperLU documentation for what this means.";
     throw OomphLibError(error_msg.str(),
                         OOMPH_CURRENT_FUNCTION,
                         OOMPH_EXCEPTION_LOCATION);
    }
 }
 
 
 //=============================================================================
 /// Clean up the memory
 //=============================================================================
 void SuperLUSolver::clean_up_memory()
 {
  //If we have non-zero LU factors stored
  if(Serial_f_factors!=0)
   {
    //Clean up those factors
    int i=3; 
    int transpose = Serial_compressed_row_flag;
    superlu(&i, 0, 0,  0, 0, 0, 0,
            0, 0, &transpose, 0,
            &Serial_f_factors, &Serial_info);
 
    //Set the F_factors to zero
    Serial_f_factors=0;
    Serial_n_dof=0;
   }
 
 #ifdef OOMPH_HAS_MPI
  //If we have non-zero LU factors stored
  if(Dist_solver_data_pt!=0)
   {
    //Clean up any stored solver data
 
    // Doc (0/1) = (true/false)
    int doc = !Doc_stats;
    
    // Reset Info flag
    Dist_info=0;
 
    // number of DOFs
    int ndof = this->distribution_pt()->nrow();
    
    if (Dist_distributed_solve_data_allocated)
     {
      superlu_dist_distributed_matrix(3, -1, ndof, 0, 0, 0, 0, 0, 0, 0, 
                                      Dist_nprow, Dist_npcol, doc, 
                                      &Dist_solver_data_pt, 
                                      &Dist_info,
                                      this->distribution_pt()->communicator_pt()
                                      ->mpi_comm());
      Dist_distributed_solve_data_allocated = false;
     }
    if (Dist_global_solve_data_allocated)
     {
      superlu_dist_global_matrix(3, -1, ndof, 0, 0, 0, 0, 0,
                                 Dist_nprow, Dist_npcol, doc, 
                                 &Dist_solver_data_pt, &Dist_info,
                                 this->distribution_pt()->communicator_pt()
                                 ->mpi_comm());
      Dist_global_solve_data_allocated = false;
     }
    
    Dist_solver_data_pt=0;
  
    // Delete internal copy of the matrix
    delete[] Dist_value_pt;
    delete[] Dist_index_pt;
    delete[] Dist_start_pt;
    Dist_value_pt=0;
    Dist_index_pt=0;
    Dist_start_pt=0;
 
    // and the distribution
    this->clear_distribution();
   }
 #endif
 }
 
 } //end of oomph namespace