1 #ifndef lint 2 static char vcid[] = "$Id: $"; 3 #endif 4 5 6 #include "aij.h" 7 #include "inline/spops.h" 8 /* 9 Factorization code for AIJ format. 10 */ 11 12 int MatiAIJLUFactorSymbolic(Mat mat,IS isrow,IS iscol,Mat *fact) 13 { 14 Matiaij *aij = (Matiaij *) mat->data, *aijnew; 15 IS isicol; 16 int *r,*ic, ierr, i, n = aij->m, *ai = aij->i, *aj = aij->j; 17 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz; 18 int *idnew, idx, row,m,fm, nnz, nzi,len; 19 20 if (n != aij->n) SETERR(1,"Mat must be square"); 21 if (!isrow) {SETERR(1,"Must have row permutation");} 22 if (!iscol) {SETERR(1,"Must have column permutation");} 23 24 if ((ierr = ISInvertPermutation(iscol,&isicol))) SETERR(ierr,0); 25 ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic); 26 27 /* get new row pointers */ 28 ainew = (int *) MALLOC( (n+1)*sizeof(int) ); CHKPTR(ainew); 29 ainew[0] = 1; 30 /* don't know how many column pointers are needed so estimate */ 31 jmax = 2*ai[n]; 32 ajnew = (int *) MALLOC( (jmax)*sizeof(int) ); CHKPTR(ajnew); 33 /* fill is a linked list of nonzeros in active row */ 34 fill = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(fill); 35 /* idnew is location of diagonal in factor */ 36 idnew = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(idnew); 37 idnew[0] = 1; 38 39 for ( i=0; i<n; i++ ) { 40 /* first copy previous fill into linked list */ 41 nnz = nz = ai[r[i]+1] - ai[r[i]]; 42 ajtmp = aj + ai[r[i]] - 1; 43 fill[n] = n; 44 while (nz--) { 45 fm = n; 46 idx = ic[*ajtmp++ - 1]; 47 do { 48 m = fm; 49 fm = fill[m]; 50 } while (fm < idx); 51 fill[m] = idx; 52 fill[idx] = fm; 53 } 54 row = fill[n]; 55 while ( row < i ) { 56 ajtmp = ajnew + idnew[row] - 1; 57 nz = ainew[row+1] - idnew[row]; 58 fm = row; 59 while (nz--) { 60 fm = n; 61 idx = *ajtmp++ - 1; 62 do { 63 m = fm; 64 fm = fill[m]; 65 } while (fm < idx); 66 if (fm != idx) { 67 fill[m] = idx; 68 fill[idx] = fm; 69 fm = idx; 70 nnz++; 71 } 72 } 73 row = fill[row]; 74 } 75 /* copy new filled row into permanent storage */ 76 ainew[i+1] = ainew[i] + nnz; 77 if (ainew[i+1] > jmax+1) { 78 /* allocate a longer ajnew */ 79 jmax += nnz*(n-i); 80 ajtmp = (int *) MALLOC( jmax*sizeof(int) );CHKPTR(ajtmp); 81 MEMCPY(ajtmp,ajnew,(ainew[i]-1)*sizeof(int)); 82 FREE(ajnew); 83 ajnew = ajtmp; 84 } 85 ajtmp = ajnew + ainew[i] - 1; 86 fm = fill[n]; 87 nzi = 0; 88 while (nnz--) { 89 if (fm < i) nzi++; 90 *ajtmp++ = fm + 1; 91 fm = fill[fm]; 92 } 93 idnew[i] = ainew[i] + nzi; 94 } 95 96 ISDestroy(isicol); FREE(fill); 97 98 /* put together the new matrix */ 99 ierr = MatCreateSequentialAIJ(n, n, 0, 0, fact); CHKERR(ierr); 100 aijnew = (Matiaij *) (*fact)->data; 101 FREE(aijnew->imax); 102 aijnew->singlemalloc = 0; 103 len = (ainew[n] - 1)*sizeof(Scalar); 104 /* the next line frees the default space generated by the Create() */ 105 FREE(aijnew->a); FREE(aijnew->ilen); 106 aijnew->a = (Scalar *) MALLOC( len ); CHKPTR(aijnew->a); 107 aijnew->j = ajnew; 108 aijnew->i = ainew; 109 aijnew->diag = idnew; 110 aijnew->ilen = 0; 111 aijnew->imax = 0; 112 (*fact)->row = isrow; 113 (*fact)->col = iscol; 114 (*fact)->factor = FACTOR_LU; 115 return 0; 116 } 117 118 int MatiAIJLUFactorNumeric(Mat mat,Mat *infact) 119 { 120 Mat fact = *infact; 121 Matiaij *aij = (Matiaij *) mat->data, *aijnew = (Matiaij *)fact->data; 122 IS iscol = fact->col, isrow = fact->row, isicol; 123 int *r,*ic, ierr, i, j, n = aij->m, *ai = aijnew->i, *aj = aijnew->j; 124 int *ajtmpold, *ajtmp, nz, row,*pj; 125 Scalar *rtmp,*v, *pv, *pc, multiplier; 126 127 if ((ierr = ISInvertPermutation(iscol,&isicol))) SETERR(ierr,0); 128 ierr = ISGetIndices(isrow,&r); CHKERR(ierr); 129 ierr = ISGetIndices(isicol,&ic); CHKERR(ierr); 130 rtmp = (Scalar *) MALLOC( (n+1)*sizeof(Scalar) ); CHKPTR(rtmp); 131 132 for ( i=0; i<n; i++ ) { 133 nz = ai[i+1] - ai[i]; 134 ajtmp = aj + ai[i] - 1; 135 for ( j=0; j<nz; j++ ) rtmp[ajtmp[j]-1] = 0.0; 136 137 /* load in initial (unfactored row) */ 138 nz = aij->i[r[i]+1] - aij->i[r[i]]; 139 ajtmpold = aij->j + aij->i[r[i]] - 1; 140 v = aij->a + aij->i[r[i]] - 1; 141 for ( j=0; j<nz; j++ ) rtmp[ic[ajtmpold[j]-1]] = v[j]; 142 143 row = *ajtmp++ - 1; 144 while (row < i) { 145 pc = rtmp + row; 146 if (*pc != 0.0) { 147 nz = aijnew->diag[row] - ai[row]; 148 pv = aijnew->a + aijnew->diag[row] - 1; 149 pj = aijnew->j + aijnew->diag[row]; 150 multiplier = *pc * *pv++; 151 *pc = multiplier; 152 nz = ai[row+1] - ai[row] - 1 - nz; 153 while (nz-->0) rtmp[*pj++ - 1] -= multiplier* *pv++; 154 } 155 row = *ajtmp++ - 1; 156 } 157 /* finished row so stick it into aijnew->a */ 158 pv = aijnew->a + ai[i] - 1; 159 pj = aijnew->j + ai[i] - 1; 160 nz = ai[i+1] - ai[i]; 161 rtmp[i] = 1.0/rtmp[i]; 162 for ( j=0; j<nz; j++ ) {pv[j] = rtmp[pj[j]-1];} 163 } 164 FREE(rtmp); 165 ierr = ISRestoreIndices(isicol,&ic); CHKERR(ierr); 166 ierr = ISRestoreIndices(isrow,&r); CHKERR(ierr); 167 ierr = ISDestroy(isicol); CHKERR(ierr); 168 fact->factor = FACTOR_LU; 169 170 return 0; 171 } 172 int MatiAIJLUFactor(Mat matin,IS row,IS col) 173 { 174 Matiaij *mat = (Matiaij *) matin->data; 175 int ierr; 176 Mat fact; 177 ierr = MatiAIJLUFactorSymbolic(matin,row,col,&fact); CHKERR(ierr); 178 ierr = MatiAIJLUFactorNumeric(matin,&fact); CHKERR(ierr); 179 180 /* free all the data structures from mat */ 181 FREE(mat->a); 182 if (!mat->singlemalloc) {FREE(mat->i); FREE(mat->j);} 183 if (mat->diag) FREE(mat->diag); 184 if (mat->ilen) FREE(mat->ilen); 185 if (mat->imax) FREE(mat->imax); 186 if (matin->row && matin->col && matin->row != matin->col) { 187 ISDestroy(matin->row); 188 } 189 if (matin->col) ISDestroy(matin->col); 190 FREE(mat); 191 192 MEMCPY(matin,fact,sizeof(struct _Mat)); 193 FREE(fact); 194 return 0; 195 } 196 197 int MatiAIJSolve(Mat mat,Vec bb, Vec xx) 198 { 199 Matiaij *aij = (Matiaij *) mat->data; 200 IS iscol = mat->col, isrow = mat->row; 201 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 202 int nz; 203 Scalar *x,*b,*tmp, *aa = aij->a, sum, *v; 204 205 if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0); 206 if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0); 207 tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp); 208 209 if ((ierr = ISGetIndices(isrow,&r))) SETERR(ierr,0); 210 if ((ierr = ISGetIndices(iscol,&c))) SETERR(ierr,0); c = c + (n-1); 211 212 /* forward solve the lower triangular */ 213 tmp[0] = b[*r++]; 214 for ( i=1; i<n; i++ ) { 215 v = aa + ai[i] - 1; 216 vi = aj + ai[i] - 1; 217 nz = aij->diag[i] - ai[i]; 218 sum = b[*r++]; 219 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 220 tmp[i] = sum; 221 } 222 223 /* backward solve the upper triangular */ 224 for ( i=n-1; i>=0; i-- ){ 225 v = aa + aij->diag[i]; 226 vi = aj + aij->diag[i]; 227 nz = ai[i+1] - aij->diag[i] - 1; 228 sum = tmp[i]; 229 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 230 x[*c--] = tmp[i] = sum*aa[aij->diag[i]-1]; 231 } 232 233 FREE(tmp); 234 return 0; 235 } 236 int MatiAIJSolveAdd(Mat mat,Vec bb, Vec yy, Vec xx) 237 { 238 Matiaij *aij = (Matiaij *) mat->data; 239 IS iscol = mat->col, isrow = mat->row; 240 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 241 int nz; 242 Scalar *x,*b,*tmp, *aa = aij->a, sum, *v; 243 244 if (yy != xx) {ierr = VecCopy(yy,xx); CHKERR(ierr);} 245 246 if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0); 247 if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0); 248 tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp); 249 250 if ((ierr = ISGetIndices(isrow,&r))) SETERR(ierr,0); 251 if ((ierr = ISGetIndices(iscol,&c))) SETERR(ierr,0); c = c + (n-1); 252 253 /* forward solve the lower triangular */ 254 tmp[0] = b[*r++]; 255 for ( i=1; i<n; i++ ) { 256 v = aa + ai[i] - 1; 257 vi = aj + ai[i] - 1; 258 nz = aij->diag[i] - ai[i]; 259 sum = b[*r++]; 260 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 261 tmp[i] = sum; 262 } 263 264 /* backward solve the upper triangular */ 265 for ( i=n-1; i>=0; i-- ){ 266 v = aa + aij->diag[i]; 267 vi = aj + aij->diag[i]; 268 nz = ai[i+1] - aij->diag[i] - 1; 269 sum = tmp[i]; 270 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 271 tmp[i] = sum*aa[aij->diag[i]-1]; 272 x[*c--] += tmp[i]; 273 } 274 275 FREE(tmp); 276 return 0; 277 } 278 /* -------------------------------------------------------------------*/ 279 int MatiAIJSolveTrans(Mat mat,Vec bb, Vec xx) 280 { 281 Matiaij *aij = (Matiaij *) mat->data; 282 IS iscol = mat->col, isrow = mat->row, invisrow,inviscol; 283 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 284 int nz; 285 Scalar *x,*b,*tmp, *aa = aij->a, *v; 286 287 if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0); 288 if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0); 289 tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp); 290 291 /* invert the permutations */ 292 ierr = ISInvertPermutation(isrow,&invisrow); CHKERR(ierr); 293 ierr = ISInvertPermutation(iscol,&inviscol); CHKERR(ierr); 294 295 296 if ((ierr = ISGetIndices(invisrow,&r))) SETERR(ierr,0); 297 if ((ierr = ISGetIndices(inviscol,&c))) SETERR(ierr,0); 298 299 /* copy the b into temp work space according to permutation */ 300 for ( i=0; i<n; i++ ) tmp[c[i]] = b[i]; 301 302 /* forward solve the U^T */ 303 for ( i=0; i<n; i++ ) { 304 v = aa + aij->diag[i] - 1; 305 vi = aj + aij->diag[i]; 306 nz = ai[i+1] - aij->diag[i] - 1; 307 tmp[i] *= *v++; 308 while (nz--) { 309 tmp[*vi++ - 1] -= (*v++)*tmp[i]; 310 } 311 } 312 313 /* backward solve the L^T */ 314 for ( i=n-1; i>=0; i-- ){ 315 v = aa + aij->diag[i] - 2; 316 vi = aj + aij->diag[i] - 2; 317 nz = aij->diag[i] - ai[i]; 318 while (nz--) { 319 tmp[*vi-- - 1] -= (*v--)*tmp[i]; 320 } 321 } 322 323 /* copy tmp into x according to permutation */ 324 for ( i=0; i<n; i++ ) x[r[i]] = tmp[i]; 325 326 ISDestroy(invisrow); ISDestroy(inviscol); 327 328 FREE(tmp); 329 return 0; 330 } 331 332 int MatiAIJSolveTransAdd(Mat mat,Vec bb, Vec zz,Vec xx) 333 { 334 Matiaij *aij = (Matiaij *) mat->data; 335 IS iscol = mat->col, isrow = mat->row, invisrow,inviscol; 336 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 337 int nz; 338 Scalar *x,*b,*tmp, *aa = aij->a, *v; 339 340 if (zz != xx) VecCopy(zz,xx); 341 342 if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0); 343 if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0); 344 tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp); 345 346 /* invert the permutations */ 347 ierr = ISInvertPermutation(isrow,&invisrow); CHKERR(ierr); 348 ierr = ISInvertPermutation(iscol,&inviscol); CHKERR(ierr); 349 350 351 if ((ierr = ISGetIndices(invisrow,&r))) SETERR(ierr,0); 352 if ((ierr = ISGetIndices(inviscol,&c))) SETERR(ierr,0); 353 354 /* copy the b into temp work space according to permutation */ 355 for ( i=0; i<n; i++ ) tmp[c[i]] = b[i]; 356 357 /* forward solve the U^T */ 358 for ( i=0; i<n; i++ ) { 359 v = aa + aij->diag[i] - 1; 360 vi = aj + aij->diag[i]; 361 nz = ai[i+1] - aij->diag[i] - 1; 362 tmp[i] *= *v++; 363 while (nz--) { 364 tmp[*vi++ - 1] -= (*v++)*tmp[i]; 365 } 366 } 367 368 /* backward solve the L^T */ 369 for ( i=n-1; i>=0; i-- ){ 370 v = aa + aij->diag[i] - 2; 371 vi = aj + aij->diag[i] - 2; 372 nz = aij->diag[i] - ai[i]; 373 while (nz--) { 374 tmp[*vi-- - 1] -= (*v--)*tmp[i]; 375 } 376 } 377 378 /* copy tmp into x according to permutation */ 379 for ( i=0; i<n; i++ ) x[r[i]] += tmp[i]; 380 381 ISDestroy(invisrow); ISDestroy(inviscol); 382 383 FREE(tmp); 384 return 0; 385 386 } 387