xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision e740cb95fda3a587e3fd58aed9abd930b75fc8ef)
1 /*$Id: superlu.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2 
3 /*
4         Provides an interface to the SuperLU sparse solver
5           Modified for SuperLU 2.0 by Matthew Knepley
6 */
7 
8 #include "src/mat/impls/aij/seq/aij.h"
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #include "zsp_defs.h"
13 #else
14 #include "dsp_defs.h"
15 #endif
16 #include "util.h"
17 EXTERN_C_END
18 
19 typedef struct {
20   SuperMatrix  A,B,AC,L,U;
21   int          *perm_r,*perm_c,ispec,relax,panel_size;
22   double       pivot_threshold;
23   NCformat     *store;
24   MatStructure flg;
25   PetscTruth   SuperluMatOdering;
26 
27   /* A few function pointers for inheritance */
28   int (*MatView)(Mat,PetscViewer);
29   int (*MatAssemblyEnd)(Mat,MatAssemblyType);
30   int (*MatDestroy)(Mat);
31 
32   /* Flag to clean up (non-global) SuperLU objects during Destroy */
33   PetscTruth CleanUpSuperLU;
34 } Mat_SeqAIJ_SuperLU;
35 
36 
37 extern int MatDestroy_SeqAIJ(Mat);
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatDestroy_SeqAIJ_SuperLU"
41 int MatDestroy_SeqAIJ_SuperLU(Mat A)
42 {
43   Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr;
44 
45   int                ierr,(*destroy)(Mat);
46 
47   PetscFunctionBegin;
48   /* It looks like this is decreasing the reference count a second time during MatDestroy?! */
49   if (--A->refct > 0)PetscFunctionReturn(0);
50   /* We have to free the global data or SuperLU crashes (sucky design)*/
51   /* Since we don't know if more solves on other matrices may be done
52      we cannot free the yucky SuperLU global data
53     StatFree();
54   */
55 
56   /* Free the SuperLU datastructures */
57   if (lu->CleanUpSuperLU) {
58     Destroy_CompCol_Permuted(&lu->AC);
59     Destroy_SuperNode_Matrix(&lu->L);
60     Destroy_CompCol_Matrix(&lu->U);
61     ierr = PetscFree(lu->B.Store);CHKERRQ(ierr);
62     ierr = PetscFree(lu->A.Store);CHKERRQ(ierr);
63     ierr = PetscFree(lu->perm_r);CHKERRQ(ierr);
64     ierr = PetscFree(lu->perm_c);CHKERRQ(ierr);
65   }
66 
67   destroy = lu->MatDestroy;
68   ierr = PetscFree(lu);CHKERRQ(ierr);
69   ierr = (*destroy)(A);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 
73 #undef __FUNCT__
74 #define __FUNCT__ "MatView_SeqAIJ_Spooles"
75 int MatView_SeqAIJ_SuperLU(Mat A,PetscViewer viewer)
76 {
77   int                   ierr;
78   PetscTruth            isascii;
79   PetscViewerFormat     format;
80   Mat_SeqAIJ_SuperLU   *lu=(Mat_SeqAIJ_SuperLU*)(A->spptr);
81 
82   PetscFunctionBegin;
83   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
84 
85   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
86   if (isascii) {
87     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
88     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
89       ierr = MatSeqAIJFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr);
90     }
91   }
92   PetscFunctionReturn(0);
93 }
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "MatAssemblyEnd_SeqAIJ_SuperLU"
97 int MatAssemblyEnd_SeqAIJ_SuperLU(Mat A,MatAssemblyType mode) {
98   int                ierr;
99   Mat_SeqAIJ_SuperLU *lu=(Mat_SeqAIJ_SuperLU*)(A->spptr);
100 
101   PetscFunctionBegin;
102   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
103   ierr = MatUseSuperLU_SeqAIJ(A);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #include "src/mat/impls/dense/seq/dense.h"
108 #undef __FUNCT__
109 #define __FUNCT__ "MatCreateNull_SeqAIJ_SuperLU"
110 int MatCreateNull_SeqAIJ_SuperLU(Mat A,Mat *nullMat)
111 {
112   Mat_SeqAIJ_SuperLU  *lu = (Mat_SeqAIJ_SuperLU*)A->spptr;
113   int                 numRows = A->m,numCols = A->n;
114   SCformat            *Lstore;
115   int                 numNullCols,size;
116 #if defined(PETSC_USE_COMPLEX)
117   doublecomplex       *nullVals,*workVals;
118 #else
119   PetscScalar         *nullVals,*workVals;
120 #endif
121   int                 row,newRow,col,newCol,block,b,ierr;
122 
123   PetscFunctionBegin;
124   PetscValidHeaderSpecific(A,MAT_COOKIE);
125   PetscValidPointer(nullMat);
126   if (!A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Unfactored matrix");
127   numNullCols = numCols - numRows;
128   if (numNullCols < 0) SETERRQ(PETSC_ERR_ARG_WRONG,"Function only applies to underdetermined problems");
129   /* Create the null matrix */
130   ierr = MatCreateSeqDense(A->comm,numRows,numNullCols,PETSC_NULL,nullMat);CHKERRQ(ierr);
131   if (numNullCols == 0) {
132     ierr = MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
133     ierr = MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
134     PetscFunctionReturn(0);
135   }
136 #if defined(PETSC_USE_COMPLEX)
137   nullVals = (doublecomplex*)((Mat_SeqDense*)(*nullMat)->data)->v;
138 #else
139   nullVals = ((Mat_SeqDense*)(*nullMat)->data)->v;
140 #endif
141   /* Copy in the columns */
142   Lstore = (SCformat*)lu->L.Store;
143   for(block = 0; block <= Lstore->nsuper; block++) {
144     newRow = Lstore->sup_to_col[block];
145     size   = Lstore->sup_to_col[block+1] - Lstore->sup_to_col[block];
146     for(col = Lstore->rowind_colptr[newRow]; col < Lstore->rowind_colptr[newRow+1]; col++) {
147       newCol = Lstore->rowind[col];
148       if (newCol >= numRows) {
149         for(b = 0; b < size; b++)
150 #if defined(PETSC_USE_COMPLEX)
151           nullVals[(newCol-numRows)*numRows+newRow+b] = ((doublecomplex*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col];
152 #else
153           nullVals[(newCol-numRows)*numRows+newRow+b] = ((double*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col];
154 #endif
155       }
156     }
157   }
158   /* Permute rhs to form P^T_c B */
159   ierr = PetscMalloc(numRows*sizeof(double),&workVals);CHKERRQ(ierr);
160   for(b = 0; b < numNullCols; b++) {
161     for(row = 0; row < numRows; row++) workVals[lu->perm_c[row]] = nullVals[b*numRows+row];
162     for(row = 0; row < numRows; row++) nullVals[b*numRows+row]   = workVals[row];
163   }
164   /* Backward solve the upper triangle A x = b */
165   for(b = 0; b < numNullCols; b++) {
166 #if defined(PETSC_USE_COMPLEX)
167     sp_ztrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr);
168 #else
169     sp_dtrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr);
170 #endif
171     if (ierr < 0)
172       SETERRQ1(PETSC_ERR_ARG_WRONG,"The argument %d was invalid",-ierr);
173   }
174   ierr = PetscFree(workVals);CHKERRQ(ierr);
175 
176   ierr = MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
177   ierr = MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatSolve_SeqAIJ_SuperLU"
183 int MatSolve_SeqAIJ_SuperLU(Mat A,Vec b,Vec x)
184 {
185   Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr;
186   PetscScalar        *array;
187   int                m,ierr;
188 
189   PetscFunctionBegin;
190   ierr = VecGetLocalSize(b,&m);CHKERRQ(ierr);
191   ierr = VecCopy(b,x);CHKERRQ(ierr);
192   ierr = VecGetArray(x,&array);CHKERRQ(ierr);
193   /* Create the Rhs */
194   lu->B.Stype        = SLU_DN;
195   lu->B.Mtype        = SLU_GE;
196   lu->B.nrow         = m;
197   lu->B.ncol         = 1;
198   ((DNformat*)lu->B.Store)->lda   = m;
199   ((DNformat*)lu->B.Store)->nzval = array;
200 #if defined(PETSC_USE_COMPLEX)
201   lu->B.Dtype        = SLU_Z;
202   zgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr);
203 #else
204   lu->B.Dtype        = SLU_D;
205   dgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr);
206 #endif
207   if (ierr < 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr);
208   ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 static int StatInitCalled = 0;
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_SuperLU"
216 int MatLUFactorNumeric_SeqAIJ_SuperLU(Mat A,Mat *F)
217 {
218   Mat_SeqAIJ         *aa = (Mat_SeqAIJ*)(A)->data;
219   Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)(*F)->spptr;
220   int                *etree,i,ierr;
221   PetscTruth         flag;
222 
223   PetscFunctionBegin;
224   /* Create the SuperMatrix for A^T:
225        Since SuperLU only likes column-oriented matrices,we pass it the transpose,
226        and then solve A^T X = B in MatSolve().
227   */
228 
229   if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numerical factorization */
230     lu->A.Stype   = SLU_NC;
231 #if defined(PETSC_USE_COMPLEX)
232     lu->A.Dtype   = SLU_Z;
233 #else
234     lu->A.Dtype   = SLU_D;
235 #endif
236     lu->A.Mtype   = SLU_GE;
237     lu->A.nrow    = A->n;
238     lu->A.ncol    = A->m;
239 
240     ierr = PetscMalloc(sizeof(NCformat),&lu->store);CHKERRQ(ierr);
241     ierr = PetscMalloc(sizeof(DNformat),&lu->B.Store);CHKERRQ(ierr);
242   }
243   lu->store->nnz    = aa->nz;
244   lu->store->colptr = aa->i;
245   lu->store->rowind = aa->j;
246   lu->store->nzval  = aa->a;
247   lu->A.Store       = lu->store;
248 
249   /* Set SuperLU options */
250   lu->relax      = sp_ienv(2);
251   lu->panel_size = sp_ienv(1);
252   /* We have to initialize global data or SuperLU crashes (sucky design) */
253   if (!StatInitCalled) {
254     StatInit(lu->panel_size,lu->relax);
255   }
256   StatInitCalled++;
257 
258   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU Options","Mat");CHKERRQ(ierr);
259   /* use SuperLU mat ordeing */
260   ierr = PetscOptionsInt("-mat_superlu_ordering","SuperLU ordering type (one of 0, 1, 2, 3)\n   0: natural ordering;\n   1: MMD applied to A'*A;\n   2: MMD applied to A'+A;\n   3: COLAMD, approximate minimum degree column ordering","None",lu->ispec,&lu->ispec,&flag);CHKERRQ(ierr);
261   if (flag) {
262     get_perm_c(lu->ispec, &lu->A, lu->perm_c);
263     lu->SuperluMatOdering = PETSC_TRUE;
264   }
265   PetscOptionsEnd();
266 
267   /* Create the elimination tree */
268   ierr = PetscMalloc(A->n*sizeof(int),&etree);CHKERRQ(ierr);
269   sp_preorder("N",&lu->A,lu->perm_c,etree,&lu->AC);
270   /* Factor the matrix */
271 #if defined(PETSC_USE_COMPLEX)
272   zgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr);
273 #else
274   dgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr);
275 #endif
276   if (ierr < 0) {
277     SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr);
278   } else if (ierr > 0) {
279     if (ierr <= A->m) {
280       SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element %d of U is exactly zero",ierr);
281     } else {
282       SETERRQ1(PETSC_ERR_ARG_WRONG,"Memory allocation failure after %d bytes were allocated",ierr-A->m);
283     }
284   }
285 
286   /* Cleanup */
287   ierr = PetscFree(etree);CHKERRQ(ierr);
288 
289   lu->flg = SAME_NONZERO_PATTERN;
290   PetscFunctionReturn(0);
291 }
292 
293 /*
294    Note the r permutation is ignored
295 */
296 #undef __FUNCT__
297 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_SuperLU"
298 int MatLUFactorSymbolic_SeqAIJ_SuperLU(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
299 {
300   Mat                 B;
301   Mat_SeqAIJ_SuperLU  *lu;
302   int                 ierr,*ca;
303 
304   PetscFunctionBegin;
305 
306   ierr            = MatCreateSeqAIJ(A->comm,A->m,A->n,0,PETSC_NULL,F);CHKERRQ(ierr);
307   B               = *F;
308   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_SuperLU;
309   B->ops->solve           = MatSolve_SeqAIJ_SuperLU;
310   B->ops->destroy         = MatDestroy_SeqAIJ_SuperLU;
311   B->factor               = FACTOR_LU;
312   (*F)->assembled         = PETSC_TRUE;  /* required by -sles_view */
313 
314   ierr            = PetscNew(Mat_SeqAIJ_SuperLU,&lu);CHKERRQ(ierr);
315   B->spptr        = (void*)lu;
316   ierr = PetscObjectComposeFunction((PetscObject)B,"MatCreateNull","MatCreateNull_SeqAIJ_SuperLU",
317                                     (void(*)(void))MatCreateNull_SeqAIJ_SuperLU);CHKERRQ(ierr);
318 
319   /* Allocate the work arrays required by SuperLU (notice sizes are for the transpose) */
320   ierr = PetscMalloc(A->n*sizeof(int),&lu->perm_r);CHKERRQ(ierr);
321   ierr = PetscMalloc(A->m*sizeof(int),&lu->perm_c);CHKERRQ(ierr);
322 
323   /* use PETSc mat ordering */
324   ierr = ISGetIndices(c,&ca);CHKERRQ(ierr);
325   ierr = PetscMemcpy(lu->perm_c,ca,A->m*sizeof(int));CHKERRQ(ierr);
326   ierr = ISRestoreIndices(c,&ca);CHKERRQ(ierr);
327   lu->SuperluMatOdering = PETSC_FALSE;
328 
329   lu->pivot_threshold = info->dtcol;
330   PetscLogObjectMemory(B,(A->m+A->n)*sizeof(int)+sizeof(Mat_SeqAIJ_SuperLU));
331 
332   lu->flg            = DIFFERENT_NONZERO_PATTERN;
333   lu->CleanUpSuperLU = PETSC_TRUE;
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatUseSuperLU_SeqAIJ"
339 int MatUseSuperLU_SeqAIJ(Mat A)
340 {
341   PetscTruth flg;
342   int        ierr;
343 
344   PetscFunctionBegin;
345   PetscValidHeaderSpecific(A,MAT_COOKIE);
346   ierr = PetscTypeCompare((PetscObject)A,MATSUPERLU,&flg);
347   if (!flg) PetscFunctionReturn(0);
348 
349   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ_SuperLU;
350 
351   PetscFunctionReturn(0);
352 }
353 
354 /* used by -sles_view */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatSeqAIJFactorInfo_SuperLU"
357 int MatSeqAIJFactorInfo_SuperLU(Mat A,PetscViewer viewer)
358 {
359   Mat_SeqAIJ_SuperLU      *lu= (Mat_SeqAIJ_SuperLU*)A->spptr;
360   int                     ierr;
361   PetscFunctionBegin;
362   /* check if matrix is SuperLU type */
363   if (A->ops->solve != MatSolve_SeqAIJ_SuperLU) PetscFunctionReturn(0);
364 
365   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr);
366   if(lu->SuperluMatOdering) ierr = PetscViewerASCIIPrintf(viewer,"  SuperLU mat ordering: %d\n",lu->ispec);CHKERRQ(ierr);
367 
368   PetscFunctionReturn(0);
369 }
370 
371 EXTERN_C_BEGIN
372 #undef __FUNCT__
373 #define __FUNCT__ "MatCreate_SeqAIJ_SuperLU"
374 int MatCreate_SeqAIJ_SuperLU(Mat A) {
375   int                ierr;
376   Mat_SeqAIJ_SuperLU *lu;
377 
378   PetscFunctionBegin;
379   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
380   ierr = MatUseSuperLU_SeqAIJ(A);CHKERRQ(ierr);
381 
382   ierr                = PetscNew(Mat_SeqAIJ_SuperLU,&lu);CHKERRQ(ierr);
383   lu->MatView         = A->ops->view;
384   lu->MatAssemblyEnd  = A->ops->assemblyend;
385   lu->MatDestroy      = A->ops->destroy;
386   lu->CleanUpSuperLU  = PETSC_FALSE;
387   A->spptr            = (void*)lu;
388   A->ops->view        = MatView_SeqAIJ_SuperLU;
389   A->ops->assemblyend = MatAssemblyEnd_SeqAIJ_SuperLU;
390   A->ops->destroy     = MatDestroy_SeqAIJ_SuperLU;
391   PetscFunctionReturn(0);
392 }
393 EXTERN_C_END
394