xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision 6abc6512c105a871402f018420c4c4316bc1e68d)
1289bc588SBarry Smith 
2289bc588SBarry Smith 
3289bc588SBarry Smith #include "aij.h"
48c37ef55SBarry Smith #include "inline/spops.h"
5289bc588SBarry Smith /*
6289bc588SBarry Smith     Factorization code for AIJ format.
7289bc588SBarry Smith */
8289bc588SBarry Smith 
9289bc588SBarry Smith int MatiAIJLUFactorSymbolic(Mat mat,IS isrow,IS iscol,Mat *fact)
10289bc588SBarry Smith {
11289bc588SBarry Smith   Matiaij *aij = (Matiaij *) mat->data, *aijnew;
12289bc588SBarry Smith   IS      isicol;
13*6abc6512SBarry Smith   int     *r,*ic, ierr, i, n = aij->m, *ai = aij->i, *aj = aij->j;
14*6abc6512SBarry Smith   int     *ainew,*ajnew, jmax,*fill, *ajtmp, nz;
15*6abc6512SBarry Smith   int     *idnew, idx, row,m,fm, nnz, nzi,len;
16289bc588SBarry Smith 
17289bc588SBarry Smith   if (n != aij->n) SETERR(1,"Mat must be square");
18289bc588SBarry Smith   if (!isrow) {SETERR(1,"Must have row permutation");}
19289bc588SBarry Smith   if (!iscol) {SETERR(1,"Must have column permutation");}
20289bc588SBarry Smith 
21*6abc6512SBarry Smith   if ((ierr = ISInvertPermutation(iscol,&isicol))) SETERR(ierr,0);
22289bc588SBarry Smith   ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic);
23289bc588SBarry Smith 
24289bc588SBarry Smith   /* get new row pointers */
25289bc588SBarry Smith   ainew = (int *) MALLOC( (n+1)*sizeof(int) ); CHKPTR(ainew);
26289bc588SBarry Smith   ainew[0] = 1;
27289bc588SBarry Smith   /* don't know how many column pointers are needed so estimate */
28289bc588SBarry Smith   jmax = 2*ai[n];
29289bc588SBarry Smith   ajnew = (int *) MALLOC( (jmax)*sizeof(int) ); CHKPTR(ajnew);
30289bc588SBarry Smith   /* fill is a linked list of nonzeros in active row */
31289bc588SBarry Smith   fill = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(fill);
32289bc588SBarry Smith   /* idnew is location of diagonal in factor */
33289bc588SBarry Smith   idnew = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(idnew);
34289bc588SBarry Smith   idnew[0] = 1;
35289bc588SBarry Smith 
36289bc588SBarry Smith   for ( i=0; i<n; i++ ) {
37289bc588SBarry Smith     /* first copy previous fill into linked list */
38289bc588SBarry Smith     nnz = nz    = ai[r[i]+1] - ai[r[i]];
39289bc588SBarry Smith     ajtmp = aj + ai[r[i]] - 1;
40289bc588SBarry Smith     fill[n] = n;
41289bc588SBarry Smith     while (nz--) {
42289bc588SBarry Smith       fm = n;
43289bc588SBarry Smith       idx = ic[*ajtmp++ - 1];
44289bc588SBarry Smith       do {
45289bc588SBarry Smith         m = fm;
46289bc588SBarry Smith         fm = fill[m];
47289bc588SBarry Smith       } while (fm < idx);
48289bc588SBarry Smith       fill[m] = idx;
49289bc588SBarry Smith       fill[idx] = fm;
50289bc588SBarry Smith     }
51289bc588SBarry Smith     row = fill[n];
52289bc588SBarry Smith     while ( row < i ) {
53289bc588SBarry Smith       ajtmp = ajnew + idnew[row] - 1;
54289bc588SBarry Smith       nz = ainew[row+1] - idnew[row];
55289bc588SBarry Smith       fm = row;
56289bc588SBarry Smith       while (nz--) {
57289bc588SBarry Smith         fm = n;
58289bc588SBarry Smith         idx = *ajtmp++ - 1;
59289bc588SBarry Smith         do {
60289bc588SBarry Smith           m = fm;
61289bc588SBarry Smith           fm = fill[m];
62289bc588SBarry Smith         } while (fm < idx);
63289bc588SBarry Smith         if (fm != idx) {
64289bc588SBarry Smith           fill[m] = idx;
65289bc588SBarry Smith           fill[idx] = fm;
66289bc588SBarry Smith           fm = idx;
67289bc588SBarry Smith           nnz++;
68289bc588SBarry Smith         }
69289bc588SBarry Smith       }
70289bc588SBarry Smith       row = fill[row];
71289bc588SBarry Smith     }
72289bc588SBarry Smith     /* copy new filled row into permanent storage */
73289bc588SBarry Smith     ainew[i+1] = ainew[i] + nnz;
74289bc588SBarry Smith     if (ainew[i+1] > jmax+1) {
75289bc588SBarry Smith       /* allocate a longer ajnew */
76289bc588SBarry Smith       jmax += nnz*(n-i);
77289bc588SBarry Smith       ajtmp = (int *) MALLOC( jmax*sizeof(int) );CHKPTR(ajtmp);
78289bc588SBarry Smith       MEMCPY(ajtmp,ajnew,(ainew[i]-1)*sizeof(int));
79289bc588SBarry Smith       FREE(ajnew);
80289bc588SBarry Smith       ajnew = ajtmp;
81289bc588SBarry Smith     }
82289bc588SBarry Smith     ajtmp = ajnew + ainew[i] - 1;
83289bc588SBarry Smith     fm = fill[n];
84289bc588SBarry Smith     nzi = 0;
85289bc588SBarry Smith     while (nnz--) {
86289bc588SBarry Smith       if (fm < i) nzi++;
87289bc588SBarry Smith       *ajtmp++ = fm + 1;
88289bc588SBarry Smith       fm = fill[fm];
89289bc588SBarry Smith     }
90289bc588SBarry Smith     idnew[i] = ainew[i] + nzi;
91289bc588SBarry Smith   }
92289bc588SBarry Smith 
93289bc588SBarry Smith   ISDestroy(isicol); FREE(fill);
94289bc588SBarry Smith 
95289bc588SBarry Smith   /* put together the new matrix */
9611d228e4SBarry Smith   ierr = MatCreateSequentialAIJ(n, n, 0, 0, fact); CHKERR(ierr);
97289bc588SBarry Smith   aijnew = (Matiaij *) (*fact)->data;
98289bc588SBarry Smith   FREE(aijnew->imax);
99289bc588SBarry Smith   aijnew->singlemalloc = 0;
100f0479e8cSBarry Smith   len = (ainew[n] - 1)*sizeof(Scalar);
101e8d4e0b9SBarry Smith   /* the next line frees the default space generated by the Create() */
102e8d4e0b9SBarry Smith   FREE(aijnew->a); FREE(aijnew->ilen);
103289bc588SBarry Smith   aijnew->a         = (Scalar *) MALLOC( len ); CHKPTR(aijnew->a);
104289bc588SBarry Smith   aijnew->j         = ajnew;
105289bc588SBarry Smith   aijnew->i         = ainew;
1068c37ef55SBarry Smith   aijnew->diag      = idnew;
107e8d4e0b9SBarry Smith   aijnew->ilen      = 0;
10820563c6bSBarry Smith   aijnew->imax      = 0;
109289bc588SBarry Smith   (*fact)->row      = isrow;
110289bc588SBarry Smith   (*fact)->col      = iscol;
111289bc588SBarry Smith   (*fact)->factor   = FACTOR_LU;
112289bc588SBarry Smith   return 0;
113289bc588SBarry Smith }
114289bc588SBarry Smith 
11520563c6bSBarry Smith int MatiAIJLUFactorNumeric(Mat mat,Mat *infact)
116289bc588SBarry Smith {
11720563c6bSBarry Smith   Mat     fact = *infact;
118289bc588SBarry Smith   Matiaij *aij = (Matiaij *) mat->data, *aijnew = (Matiaij *)fact->data;
119289bc588SBarry Smith   IS      iscol = fact->col, isrow = fact->row, isicol;
120289bc588SBarry Smith   int     *r,*ic, ierr, i, j, n = aij->m, *ai = aijnew->i, *aj = aijnew->j;
121*6abc6512SBarry Smith   int     *ajtmpold, *ajtmp, nz, row,*pj;
122*6abc6512SBarry Smith   Scalar  *rtmp,*v, *pv, *pc, multiplier;
123289bc588SBarry Smith 
124*6abc6512SBarry Smith   if ((ierr = ISInvertPermutation(iscol,&isicol))) SETERR(ierr,0);
1258c37ef55SBarry Smith   ierr = ISGetIndices(isrow,&r); CHKERR(ierr);
1268c37ef55SBarry Smith   ierr = ISGetIndices(isicol,&ic); CHKERR(ierr);
127289bc588SBarry Smith   rtmp = (Scalar *) MALLOC( (n+1)*sizeof(Scalar) ); CHKPTR(rtmp);
128289bc588SBarry Smith 
129289bc588SBarry Smith   for ( i=0; i<n; i++ ) {
130289bc588SBarry Smith     nz = ai[i+1] - ai[i];
131289bc588SBarry Smith     ajtmp = aj + ai[i] - 1;
132289bc588SBarry Smith     for  ( j=0; j<nz; j++ ) rtmp[ajtmp[j]-1] = 0.0;
133289bc588SBarry Smith 
134289bc588SBarry Smith     /* load in initial (unfactored row) */
1358c37ef55SBarry Smith     nz = aij->i[r[i]+1] - aij->i[r[i]];
1368c37ef55SBarry Smith     ajtmpold = aij->j + aij->i[r[i]] - 1;
1378c37ef55SBarry Smith     v  = aij->a + aij->i[r[i]] - 1;
1388c37ef55SBarry Smith     for ( j=0; j<nz; j++ ) rtmp[ic[ajtmpold[j]-1]] =  v[j];
139289bc588SBarry Smith 
1408c37ef55SBarry Smith     row = *ajtmp++ - 1;
141289bc588SBarry Smith     while (row < i) {
1428c37ef55SBarry Smith       pc = rtmp + row;
1438c37ef55SBarry Smith       if (*pc != 0.0) {
1448c37ef55SBarry Smith         nz = aijnew->diag[row] - ai[row];
1458c37ef55SBarry Smith         pv = aijnew->a + aijnew->diag[row] - 1;
1468c37ef55SBarry Smith         pj = aijnew->j + aijnew->diag[row];
1478c37ef55SBarry Smith         multiplier = *pc * *pv++;
1488c37ef55SBarry Smith         *pc = multiplier;
1498c37ef55SBarry Smith         nz = ai[row+1] - ai[row] - 1 - nz;
1508c37ef55SBarry Smith         while (nz-->0) rtmp[*pj++ - 1] -= multiplier* *pv++;
151289bc588SBarry Smith       }
1528c37ef55SBarry Smith       row = *ajtmp++ - 1;
153289bc588SBarry Smith     }
1548c37ef55SBarry Smith     /* finished row so stick it into aijnew->a */
1558c37ef55SBarry Smith     pv = aijnew->a + ai[i] - 1;
1568c37ef55SBarry Smith     pj = aijnew->j + ai[i] - 1;
1578c37ef55SBarry Smith     nz = ai[i+1] - ai[i];
1588c37ef55SBarry Smith     rtmp[i] = 1.0/rtmp[i];
1598c37ef55SBarry Smith     for ( j=0; j<nz; j++ ) {pv[j] = rtmp[pj[j]-1];}
1608c37ef55SBarry Smith   }
1618c37ef55SBarry Smith   FREE(rtmp);
162f0479e8cSBarry Smith   ierr = ISRestoreIndices(isicol,&ic); CHKERR(ierr);
163f0479e8cSBarry Smith   ierr = ISRestoreIndices(isrow,&r); CHKERR(ierr);
1648c37ef55SBarry Smith   ierr = ISDestroy(isicol); CHKERR(ierr);
1658c37ef55SBarry Smith   fact->factor = FACTOR_LU;
166289bc588SBarry Smith 
167289bc588SBarry Smith   return 0;
168289bc588SBarry Smith }
169da3a660dSBarry Smith int MatiAIJLUFactor(Mat matin,IS row,IS col)
170da3a660dSBarry Smith {
171da3a660dSBarry Smith   Matiaij *mat = (Matiaij *) matin->data;
172*6abc6512SBarry Smith   int     ierr;
173da3a660dSBarry Smith   Mat     fact;
174da3a660dSBarry Smith   ierr = MatiAIJLUFactorSymbolic(matin,row,col,&fact); CHKERR(ierr);
175da3a660dSBarry Smith   ierr = MatiAIJLUFactorNumeric(matin,&fact); CHKERR(ierr);
176da3a660dSBarry Smith 
177da3a660dSBarry Smith   /* free all the data structures from mat */
178da3a660dSBarry Smith   FREE(mat->a);
179da3a660dSBarry Smith   if (!mat->singlemalloc) {FREE(mat->i); FREE(mat->j);}
180da3a660dSBarry Smith   if (mat->diag) FREE(mat->diag);
181da3a660dSBarry Smith   if (mat->ilen) FREE(mat->ilen);
182da3a660dSBarry Smith   if (mat->imax) FREE(mat->imax);
183da3a660dSBarry Smith   if (matin->row && matin->col && matin->row != matin->col) {
184da3a660dSBarry Smith     ISDestroy(matin->row);
185da3a660dSBarry Smith   }
186da3a660dSBarry Smith   if (matin->col) ISDestroy(matin->col);
187da3a660dSBarry Smith   FREE(mat);
188da3a660dSBarry Smith 
189da3a660dSBarry Smith   MEMCPY(matin,fact,sizeof(struct _Mat));
190da3a660dSBarry Smith   FREE(fact);
191da3a660dSBarry Smith   return 0;
192da3a660dSBarry Smith }
193da3a660dSBarry Smith 
1948c37ef55SBarry Smith int MatiAIJSolve(Mat mat,Vec bb, Vec xx)
1958c37ef55SBarry Smith {
1968c37ef55SBarry Smith   Matiaij *aij = (Matiaij *) mat->data;
1978c37ef55SBarry Smith   IS      iscol = mat->col, isrow = mat->row;
198*6abc6512SBarry Smith   int     *r,*c, ierr, i,  n = aij->m, *vi, *ai = aij->i, *aj = aij->j;
1998c37ef55SBarry Smith   int     nz;
2008c37ef55SBarry Smith   Scalar  *x,*b,*tmp, *aa = aij->a, sum, *v;
2018c37ef55SBarry Smith 
202*6abc6512SBarry Smith   if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0);
203*6abc6512SBarry Smith   if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0);
2048c37ef55SBarry Smith   tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp);
2058c37ef55SBarry Smith 
206*6abc6512SBarry Smith   if ((ierr = ISGetIndices(isrow,&r))) SETERR(ierr,0);
207*6abc6512SBarry Smith   if ((ierr = ISGetIndices(iscol,&c))) SETERR(ierr,0); c = c + (n-1);
2088c37ef55SBarry Smith 
2098c37ef55SBarry Smith   /* forward solve the lower triangular */
2108c37ef55SBarry Smith   tmp[0] = b[*r++];
2118c37ef55SBarry Smith   for ( i=1; i<n; i++ ) {
2128c37ef55SBarry Smith     v   = aa + ai[i] - 1;
2138c37ef55SBarry Smith     vi  = aj + ai[i] - 1;
2148c37ef55SBarry Smith     nz  = aij->diag[i] - ai[i];
2158c37ef55SBarry Smith     sum = b[*r++];
2168c37ef55SBarry Smith     while (nz--) sum -= *v++ * tmp[*vi++ - 1];
2178c37ef55SBarry Smith     tmp[i] = sum;
2188c37ef55SBarry Smith   }
2198c37ef55SBarry Smith 
2208c37ef55SBarry Smith   /* backward solve the upper triangular */
2218c37ef55SBarry Smith   for ( i=n-1; i>=0; i-- ){
2228c37ef55SBarry Smith     v   = aa + aij->diag[i];
2238c37ef55SBarry Smith     vi  = aj + aij->diag[i];
2248c37ef55SBarry Smith     nz  = ai[i+1] - aij->diag[i] - 1;
2258c37ef55SBarry Smith     sum = tmp[i];
2268c37ef55SBarry Smith     while (nz--) sum -= *v++ * tmp[*vi++ - 1];
2278c37ef55SBarry Smith     x[*c--] = tmp[i] = sum*aa[aij->diag[i]-1];
2288c37ef55SBarry Smith   }
2298c37ef55SBarry Smith 
2308c37ef55SBarry Smith   FREE(tmp);
2318c37ef55SBarry Smith   return 0;
2328c37ef55SBarry Smith }
233da3a660dSBarry Smith int MatiAIJSolveAdd(Mat mat,Vec bb, Vec yy, Vec xx)
234da3a660dSBarry Smith {
235da3a660dSBarry Smith   Matiaij *aij = (Matiaij *) mat->data;
236da3a660dSBarry Smith   IS      iscol = mat->col, isrow = mat->row;
237*6abc6512SBarry Smith   int     *r,*c, ierr, i,  n = aij->m, *vi, *ai = aij->i, *aj = aij->j;
238da3a660dSBarry Smith   int     nz;
239da3a660dSBarry Smith   Scalar  *x,*b,*tmp, *aa = aij->a, sum, *v;
240da3a660dSBarry Smith 
241da3a660dSBarry Smith   if (yy != xx) {ierr = VecCopy(yy,xx); CHKERR(ierr);}
242da3a660dSBarry Smith 
243*6abc6512SBarry Smith   if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0);
244*6abc6512SBarry Smith   if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0);
245da3a660dSBarry Smith   tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp);
246da3a660dSBarry Smith 
247*6abc6512SBarry Smith   if ((ierr = ISGetIndices(isrow,&r))) SETERR(ierr,0);
248*6abc6512SBarry Smith   if ((ierr = ISGetIndices(iscol,&c))) SETERR(ierr,0); c = c + (n-1);
249da3a660dSBarry Smith 
250da3a660dSBarry Smith   /* forward solve the lower triangular */
251da3a660dSBarry Smith   tmp[0] = b[*r++];
252da3a660dSBarry Smith   for ( i=1; i<n; i++ ) {
253da3a660dSBarry Smith     v   = aa + ai[i] - 1;
254da3a660dSBarry Smith     vi  = aj + ai[i] - 1;
255da3a660dSBarry Smith     nz  = aij->diag[i] - ai[i];
256da3a660dSBarry Smith     sum = b[*r++];
257da3a660dSBarry Smith     while (nz--) sum -= *v++ * tmp[*vi++ - 1];
258da3a660dSBarry Smith     tmp[i] = sum;
259da3a660dSBarry Smith   }
260da3a660dSBarry Smith 
261da3a660dSBarry Smith   /* backward solve the upper triangular */
262da3a660dSBarry Smith   for ( i=n-1; i>=0; i-- ){
263da3a660dSBarry Smith     v   = aa + aij->diag[i];
264da3a660dSBarry Smith     vi  = aj + aij->diag[i];
265da3a660dSBarry Smith     nz  = ai[i+1] - aij->diag[i] - 1;
266da3a660dSBarry Smith     sum = tmp[i];
267da3a660dSBarry Smith     while (nz--) sum -= *v++ * tmp[*vi++ - 1];
268da3a660dSBarry Smith     tmp[i] = sum*aa[aij->diag[i]-1];
269da3a660dSBarry Smith     x[*c--] += tmp[i];
270da3a660dSBarry Smith   }
271da3a660dSBarry Smith 
272da3a660dSBarry Smith   FREE(tmp);
273da3a660dSBarry Smith   return 0;
274da3a660dSBarry Smith }
275da3a660dSBarry Smith /* -------------------------------------------------------------------*/
276da3a660dSBarry Smith int MatiAIJSolveTrans(Mat mat,Vec bb, Vec xx)
277da3a660dSBarry Smith {
278da3a660dSBarry Smith   Matiaij *aij = (Matiaij *) mat->data;
279da3a660dSBarry Smith   IS      iscol = mat->col, isrow = mat->row, invisrow,inviscol;
280*6abc6512SBarry Smith   int     *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j;
281da3a660dSBarry Smith   int     nz;
282*6abc6512SBarry Smith   Scalar  *x,*b,*tmp, *aa = aij->a, *v;
283da3a660dSBarry Smith 
284*6abc6512SBarry Smith   if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0);
285*6abc6512SBarry Smith   if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0);
286da3a660dSBarry Smith   tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp);
287da3a660dSBarry Smith 
288da3a660dSBarry Smith   /* invert the permutations */
289da3a660dSBarry Smith   ierr = ISInvertPermutation(isrow,&invisrow); CHKERR(ierr);
290da3a660dSBarry Smith   ierr = ISInvertPermutation(iscol,&inviscol); CHKERR(ierr);
291da3a660dSBarry Smith 
292da3a660dSBarry Smith 
293*6abc6512SBarry Smith   if ((ierr = ISGetIndices(invisrow,&r))) SETERR(ierr,0);
294*6abc6512SBarry Smith   if ((ierr = ISGetIndices(inviscol,&c))) SETERR(ierr,0);
295da3a660dSBarry Smith 
296da3a660dSBarry Smith   /* copy the b into temp work space according to permutation */
297da3a660dSBarry Smith   for ( i=0; i<n; i++ ) tmp[c[i]] = b[i];
298da3a660dSBarry Smith 
299da3a660dSBarry Smith   /* forward solve the U^T */
300da3a660dSBarry Smith   for ( i=0; i<n; i++ ) {
301da3a660dSBarry Smith     v   = aa + aij->diag[i] - 1;
302da3a660dSBarry Smith     vi  = aj + aij->diag[i];
303da3a660dSBarry Smith     nz  = ai[i+1] - aij->diag[i] - 1;
304da3a660dSBarry Smith     tmp[i] *= *v++;
305da3a660dSBarry Smith     while (nz--) {
306da3a660dSBarry Smith       tmp[*vi++ - 1] -= (*v++)*tmp[i];
307da3a660dSBarry Smith     }
308da3a660dSBarry Smith   }
309da3a660dSBarry Smith 
310da3a660dSBarry Smith   /* backward solve the L^T */
311da3a660dSBarry Smith   for ( i=n-1; i>=0; i-- ){
312da3a660dSBarry Smith     v   = aa + aij->diag[i] - 2;
313da3a660dSBarry Smith     vi  = aj + aij->diag[i] - 2;
314da3a660dSBarry Smith     nz  = aij->diag[i] - ai[i];
315da3a660dSBarry Smith     while (nz--) {
316da3a660dSBarry Smith       tmp[*vi-- - 1] -= (*v--)*tmp[i];
317da3a660dSBarry Smith     }
318da3a660dSBarry Smith   }
319da3a660dSBarry Smith 
320da3a660dSBarry Smith   /* copy tmp into x according to permutation */
321da3a660dSBarry Smith   for ( i=0; i<n; i++ ) x[r[i]] = tmp[i];
322da3a660dSBarry Smith 
323da3a660dSBarry Smith   ISDestroy(invisrow); ISDestroy(inviscol);
324da3a660dSBarry Smith 
325da3a660dSBarry Smith   FREE(tmp);
326da3a660dSBarry Smith   return 0;
327da3a660dSBarry Smith }
328da3a660dSBarry Smith 
329*6abc6512SBarry Smith int MatiAIJSolveTransAdd(Mat mat,Vec bb, Vec zz,Vec xx)
330da3a660dSBarry Smith {
331da3a660dSBarry Smith   Matiaij *aij = (Matiaij *) mat->data;
332*6abc6512SBarry Smith   IS      iscol = mat->col, isrow = mat->row, invisrow,inviscol;
333*6abc6512SBarry Smith   int     *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j;
334*6abc6512SBarry Smith   int     nz;
335*6abc6512SBarry Smith   Scalar  *x,*b,*tmp, *aa = aij->a, *v;
336*6abc6512SBarry Smith 
337*6abc6512SBarry Smith   if (zz != xx) VecCopy(zz,xx);
338*6abc6512SBarry Smith 
339*6abc6512SBarry Smith   if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0);
340*6abc6512SBarry Smith   if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0);
341*6abc6512SBarry Smith   tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp);
342*6abc6512SBarry Smith 
343*6abc6512SBarry Smith   /* invert the permutations */
344*6abc6512SBarry Smith   ierr = ISInvertPermutation(isrow,&invisrow); CHKERR(ierr);
345*6abc6512SBarry Smith   ierr = ISInvertPermutation(iscol,&inviscol); CHKERR(ierr);
346*6abc6512SBarry Smith 
347*6abc6512SBarry Smith 
348*6abc6512SBarry Smith   if ((ierr = ISGetIndices(invisrow,&r))) SETERR(ierr,0);
349*6abc6512SBarry Smith   if ((ierr = ISGetIndices(inviscol,&c))) SETERR(ierr,0);
350*6abc6512SBarry Smith 
351*6abc6512SBarry Smith   /* copy the b into temp work space according to permutation */
352*6abc6512SBarry Smith   for ( i=0; i<n; i++ ) tmp[c[i]] = b[i];
353*6abc6512SBarry Smith 
354*6abc6512SBarry Smith   /* forward solve the U^T */
355*6abc6512SBarry Smith   for ( i=0; i<n; i++ ) {
356*6abc6512SBarry Smith     v   = aa + aij->diag[i] - 1;
357*6abc6512SBarry Smith     vi  = aj + aij->diag[i];
358*6abc6512SBarry Smith     nz  = ai[i+1] - aij->diag[i] - 1;
359*6abc6512SBarry Smith     tmp[i] *= *v++;
360*6abc6512SBarry Smith     while (nz--) {
361*6abc6512SBarry Smith       tmp[*vi++ - 1] -= (*v++)*tmp[i];
362*6abc6512SBarry Smith     }
363*6abc6512SBarry Smith   }
364*6abc6512SBarry Smith 
365*6abc6512SBarry Smith   /* backward solve the L^T */
366*6abc6512SBarry Smith   for ( i=n-1; i>=0; i-- ){
367*6abc6512SBarry Smith     v   = aa + aij->diag[i] - 2;
368*6abc6512SBarry Smith     vi  = aj + aij->diag[i] - 2;
369*6abc6512SBarry Smith     nz  = aij->diag[i] - ai[i];
370*6abc6512SBarry Smith     while (nz--) {
371*6abc6512SBarry Smith       tmp[*vi-- - 1] -= (*v--)*tmp[i];
372*6abc6512SBarry Smith     }
373*6abc6512SBarry Smith   }
374*6abc6512SBarry Smith 
375*6abc6512SBarry Smith   /* copy tmp into x according to permutation */
376*6abc6512SBarry Smith   for ( i=0; i<n; i++ ) x[r[i]] += tmp[i];
377*6abc6512SBarry Smith 
378*6abc6512SBarry Smith   ISDestroy(invisrow); ISDestroy(inviscol);
379*6abc6512SBarry Smith 
380*6abc6512SBarry Smith   FREE(tmp);
381*6abc6512SBarry Smith   return 0;
382*6abc6512SBarry Smith 
383da3a660dSBarry Smith }
384