1 #ifndef lint 2 static char vcid[] = "$Id: aijfact.c,v 1.14 1995/04/15 03:28:04 bsmith Exp bsmith $"; 3 #endif 4 5 6 #include "aij.h" 7 #include "inline/spops.h" 8 /* 9 Factorization code for AIJ format. 10 */ 11 12 int MatLUFactorSymbolic_AIJ(Mat mat,IS isrow,IS iscol,Mat *fact) 13 { 14 Mat_AIJ *aij = (Mat_AIJ *) mat->data, *aijnew; 15 IS isicol; 16 int *r,*ic, ierr, i, n = aij->m, *ai = aij->i, *aj = aij->j; 17 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz; 18 int *idnew, idx, row,m,fm, nnz, nzi,len; 19 20 if (n != aij->n) SETERR(1,"Mat must be square"); 21 if (!isrow) {SETERR(1,"Must have row permutation");} 22 if (!iscol) {SETERR(1,"Must have column permutation");} 23 24 if ((ierr = ISInvertPermutation(iscol,&isicol))) SETERR(ierr,0); 25 ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic); 26 27 /* get new row pointers */ 28 ainew = (int *) MALLOC( (n+1)*sizeof(int) ); CHKPTR(ainew); 29 ainew[0] = 1; 30 /* don't know how many column pointers are needed so estimate */ 31 jmax = 2*ai[n]; 32 ajnew = (int *) MALLOC( (jmax)*sizeof(int) ); CHKPTR(ajnew); 33 /* fill is a linked list of nonzeros in active row */ 34 fill = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(fill); 35 /* idnew is location of diagonal in factor */ 36 idnew = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(idnew); 37 idnew[0] = 1; 38 39 for ( i=0; i<n; i++ ) { 40 /* first copy previous fill into linked list */ 41 nnz = nz = ai[r[i]+1] - ai[r[i]]; 42 ajtmp = aj + ai[r[i]] - 1; 43 fill[n] = n; 44 while (nz--) { 45 fm = n; 46 idx = ic[*ajtmp++ - 1]; 47 do { 48 m = fm; 49 fm = fill[m]; 50 } while (fm < idx); 51 fill[m] = idx; 52 fill[idx] = fm; 53 } 54 row = fill[n]; 55 while ( row < i ) { 56 ajtmp = ajnew + idnew[row] - 1; 57 nz = ainew[row+1] - idnew[row]; 58 fm = row; 59 while (nz--) { 60 fm = n; 61 idx = *ajtmp++ - 1; 62 do { 63 m = fm; 64 fm = fill[m]; 65 } while (fm < idx); 66 if (fm != idx) { 67 fill[m] = idx; 68 fill[idx] = fm; 69 fm = idx; 70 nnz++; 71 } 72 } 73 row = fill[row]; 74 } 75 /* copy new filled row into permanent storage */ 76 ainew[i+1] = ainew[i] + nnz; 77 if (ainew[i+1] > jmax+1) { 78 /* allocate a longer ajnew */ 79 jmax += nnz*(n-i); 80 ajtmp = (int *) MALLOC( jmax*sizeof(int) );CHKPTR(ajtmp); 81 MEMCPY(ajtmp,ajnew,(ainew[i]-1)*sizeof(int)); 82 FREE(ajnew); 83 ajnew = ajtmp; 84 } 85 ajtmp = ajnew + ainew[i] - 1; 86 fm = fill[n]; 87 nzi = 0; 88 while (nnz--) { 89 if (fm < i) nzi++; 90 *ajtmp++ = fm + 1; 91 fm = fill[fm]; 92 } 93 idnew[i] = ainew[i] + nzi; 94 } 95 96 ISDestroy(isicol); FREE(fill); 97 98 /* put together the new matrix */ 99 ierr = MatCreateSequentialAIJ(mat->comm,n, n, 0, 0, fact); CHKERR(ierr); 100 aijnew = (Mat_AIJ *) (*fact)->data; 101 FREE(aijnew->imax); 102 aijnew->singlemalloc = 0; 103 len = (ainew[n] - 1)*sizeof(Scalar); 104 /* the next line frees the default space generated by the Create() */ 105 FREE(aijnew->a); FREE(aijnew->ilen); 106 aijnew->a = (Scalar *) MALLOC( len ); CHKPTR(aijnew->a); 107 aijnew->j = ajnew; 108 aijnew->i = ainew; 109 aijnew->diag = idnew; 110 aijnew->ilen = 0; 111 aijnew->imax = 0; 112 aijnew->row = isrow; 113 aijnew->col = iscol; 114 (*fact)->factor = FACTOR_LU; 115 /* Cannot do this here because child is destroyed before parent created 116 PLogObjectParent(*fact,isicol); */ 117 return 0; 118 } 119 120 int MatLUFactorNumeric_AIJ(Mat mat,Mat *infact) 121 { 122 Mat fact = *infact; 123 Mat_AIJ *aij = (Mat_AIJ *) mat->data, *aijnew = (Mat_AIJ *)fact->data; 124 IS iscol = aijnew->col, isrow = aijnew->row, isicol; 125 int *r,*ic, ierr, i, j, n = aij->m, *ai = aijnew->i, *aj = aijnew->j; 126 int *ajtmpold, *ajtmp, nz, row,*pj; 127 Scalar *rtmp,*v, *pv, *pc, multiplier; 128 129 if ((ierr = ISInvertPermutation(iscol,&isicol))) SETERR(ierr,0); 130 PLogObjectParent(*infact,isicol); 131 ierr = ISGetIndices(isrow,&r); CHKERR(ierr); 132 ierr = ISGetIndices(isicol,&ic); CHKERR(ierr); 133 rtmp = (Scalar *) MALLOC( (n+1)*sizeof(Scalar) ); CHKPTR(rtmp); 134 135 for ( i=0; i<n; i++ ) { 136 nz = ai[i+1] - ai[i]; 137 ajtmp = aj + ai[i] - 1; 138 for ( j=0; j<nz; j++ ) rtmp[ajtmp[j]-1] = 0.0; 139 140 /* load in initial (unfactored row) */ 141 nz = aij->i[r[i]+1] - aij->i[r[i]]; 142 ajtmpold = aij->j + aij->i[r[i]] - 1; 143 v = aij->a + aij->i[r[i]] - 1; 144 for ( j=0; j<nz; j++ ) rtmp[ic[ajtmpold[j]-1]] = v[j]; 145 146 row = *ajtmp++ - 1; 147 while (row < i) { 148 pc = rtmp + row; 149 if (*pc != 0.0) { 150 nz = aijnew->diag[row] - ai[row]; 151 pv = aijnew->a + aijnew->diag[row] - 1; 152 pj = aijnew->j + aijnew->diag[row]; 153 multiplier = *pc * *pv++; 154 *pc = multiplier; 155 nz = ai[row+1] - ai[row] - 1 - nz; 156 while (nz-->0) rtmp[*pj++ - 1] -= multiplier* *pv++; 157 } 158 row = *ajtmp++ - 1; 159 } 160 /* finished row so stick it into aijnew->a */ 161 pv = aijnew->a + ai[i] - 1; 162 pj = aijnew->j + ai[i] - 1; 163 nz = ai[i+1] - ai[i]; 164 rtmp[i] = 1.0/rtmp[i]; 165 for ( j=0; j<nz; j++ ) {pv[j] = rtmp[pj[j]-1];} 166 } 167 FREE(rtmp); 168 ierr = ISRestoreIndices(isicol,&ic); CHKERR(ierr); 169 ierr = ISRestoreIndices(isrow,&r); CHKERR(ierr); 170 ierr = ISDestroy(isicol); CHKERR(ierr); 171 fact->factor = FACTOR_LU; 172 aijnew->assembled = 1; 173 return 0; 174 } 175 int MatLUFactor_AIJ(Mat matin,IS row,IS col) 176 { 177 Mat_AIJ *mat = (Mat_AIJ *) matin->data; 178 int ierr; 179 Mat fact; 180 ierr = MatLUFactorSymbolic_AIJ(matin,row,col,&fact); CHKERR(ierr); 181 ierr = MatLUFactorNumeric_AIJ(matin,&fact); CHKERR(ierr); 182 183 /* free all the data structures from mat */ 184 FREE(mat->a); 185 if (!mat->singlemalloc) {FREE(mat->i); FREE(mat->j);} 186 if (mat->diag) FREE(mat->diag); 187 if (mat->ilen) FREE(mat->ilen); 188 if (mat->imax) FREE(mat->imax); 189 if (mat->row && mat->col && mat->row != mat->col) { 190 ISDestroy(mat->row); 191 } 192 if (mat->col) ISDestroy(mat->col); 193 FREE(mat); 194 195 MEMCPY(matin,fact,sizeof(struct _Mat)); 196 FREE(fact); 197 return 0; 198 } 199 200 int MatSolve_AIJ(Mat mat,Vec bb, Vec xx) 201 { 202 Mat_AIJ *aij = (Mat_AIJ *) mat->data; 203 IS iscol = aij->col, isrow = aij->row; 204 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 205 int nz; 206 Scalar *x,*b,*tmp, *aa = aij->a, sum, *v; 207 208 if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor"); 209 210 if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0); 211 if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0); 212 tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp); 213 214 if ((ierr = ISGetIndices(isrow,&r))) SETERR(ierr,0); 215 if ((ierr = ISGetIndices(iscol,&c))) SETERR(ierr,0); c = c + (n-1); 216 217 /* forward solve the lower triangular */ 218 tmp[0] = b[*r++]; 219 for ( i=1; i<n; i++ ) { 220 v = aa + ai[i] - 1; 221 vi = aj + ai[i] - 1; 222 nz = aij->diag[i] - ai[i]; 223 sum = b[*r++]; 224 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 225 tmp[i] = sum; 226 } 227 228 /* backward solve the upper triangular */ 229 for ( i=n-1; i>=0; i-- ){ 230 v = aa + aij->diag[i]; 231 vi = aj + aij->diag[i]; 232 nz = ai[i+1] - aij->diag[i] - 1; 233 sum = tmp[i]; 234 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 235 x[*c--] = tmp[i] = sum*aa[aij->diag[i]-1]; 236 } 237 238 FREE(tmp); 239 return 0; 240 } 241 int MatSolveAdd_AIJ(Mat mat,Vec bb, Vec yy, Vec xx) 242 { 243 Mat_AIJ *aij = (Mat_AIJ *) mat->data; 244 IS iscol = aij->col, isrow = aij->row; 245 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 246 int nz; 247 Scalar *x,*b,*tmp, *aa = aij->a, sum, *v; 248 249 if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor"); 250 if (yy != xx) {ierr = VecCopy(yy,xx); CHKERR(ierr);} 251 252 if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0); 253 if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0); 254 tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp); 255 256 if ((ierr = ISGetIndices(isrow,&r))) SETERR(ierr,0); 257 if ((ierr = ISGetIndices(iscol,&c))) SETERR(ierr,0); c = c + (n-1); 258 259 /* forward solve the lower triangular */ 260 tmp[0] = b[*r++]; 261 for ( i=1; i<n; i++ ) { 262 v = aa + ai[i] - 1; 263 vi = aj + ai[i] - 1; 264 nz = aij->diag[i] - ai[i]; 265 sum = b[*r++]; 266 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 267 tmp[i] = sum; 268 } 269 270 /* backward solve the upper triangular */ 271 for ( i=n-1; i>=0; i-- ){ 272 v = aa + aij->diag[i]; 273 vi = aj + aij->diag[i]; 274 nz = ai[i+1] - aij->diag[i] - 1; 275 sum = tmp[i]; 276 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 277 tmp[i] = sum*aa[aij->diag[i]-1]; 278 x[*c--] += tmp[i]; 279 } 280 281 FREE(tmp); 282 return 0; 283 } 284 /* -------------------------------------------------------------------*/ 285 int MatSolveTrans_AIJ(Mat mat,Vec bb, Vec xx) 286 { 287 Mat_AIJ *aij = (Mat_AIJ *) mat->data; 288 IS iscol = aij->col, isrow = aij->row, invisrow,inviscol; 289 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 290 int nz; 291 Scalar *x,*b,*tmp, *aa = aij->a, *v; 292 293 if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor"); 294 if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0); 295 if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0); 296 tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp); 297 298 /* invert the permutations */ 299 ierr = ISInvertPermutation(isrow,&invisrow); CHKERR(ierr); 300 ierr = ISInvertPermutation(iscol,&inviscol); CHKERR(ierr); 301 302 303 if ((ierr = ISGetIndices(invisrow,&r))) SETERR(ierr,0); 304 if ((ierr = ISGetIndices(inviscol,&c))) SETERR(ierr,0); 305 306 /* copy the b into temp work space according to permutation */ 307 for ( i=0; i<n; i++ ) tmp[c[i]] = b[i]; 308 309 /* forward solve the U^T */ 310 for ( i=0; i<n; i++ ) { 311 v = aa + aij->diag[i] - 1; 312 vi = aj + aij->diag[i]; 313 nz = ai[i+1] - aij->diag[i] - 1; 314 tmp[i] *= *v++; 315 while (nz--) { 316 tmp[*vi++ - 1] -= (*v++)*tmp[i]; 317 } 318 } 319 320 /* backward solve the L^T */ 321 for ( i=n-1; i>=0; i-- ){ 322 v = aa + aij->diag[i] - 2; 323 vi = aj + aij->diag[i] - 2; 324 nz = aij->diag[i] - ai[i]; 325 while (nz--) { 326 tmp[*vi-- - 1] -= (*v--)*tmp[i]; 327 } 328 } 329 330 /* copy tmp into x according to permutation */ 331 for ( i=0; i<n; i++ ) x[r[i]] = tmp[i]; 332 333 ISDestroy(invisrow); ISDestroy(inviscol); 334 335 FREE(tmp); 336 return 0; 337 } 338 339 int MatSolveTransAdd_AIJ(Mat mat,Vec bb, Vec zz,Vec xx) 340 { 341 Mat_AIJ *aij = (Mat_AIJ *) mat->data; 342 IS iscol = aij->col, isrow = aij->row, invisrow,inviscol; 343 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 344 int nz; 345 Scalar *x,*b,*tmp, *aa = aij->a, *v; 346 347 if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor"); 348 if (zz != xx) VecCopy(zz,xx); 349 350 if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0); 351 if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0); 352 tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp); 353 354 /* invert the permutations */ 355 ierr = ISInvertPermutation(isrow,&invisrow); CHKERR(ierr); 356 ierr = ISInvertPermutation(iscol,&inviscol); CHKERR(ierr); 357 358 359 if ((ierr = ISGetIndices(invisrow,&r))) SETERR(ierr,0); 360 if ((ierr = ISGetIndices(inviscol,&c))) SETERR(ierr,0); 361 362 /* copy the b into temp work space according to permutation */ 363 for ( i=0; i<n; i++ ) tmp[c[i]] = b[i]; 364 365 /* forward solve the U^T */ 366 for ( i=0; i<n; i++ ) { 367 v = aa + aij->diag[i] - 1; 368 vi = aj + aij->diag[i]; 369 nz = ai[i+1] - aij->diag[i] - 1; 370 tmp[i] *= *v++; 371 while (nz--) { 372 tmp[*vi++ - 1] -= (*v++)*tmp[i]; 373 } 374 } 375 376 /* backward solve the L^T */ 377 for ( i=n-1; i>=0; i-- ){ 378 v = aa + aij->diag[i] - 2; 379 vi = aj + aij->diag[i] - 2; 380 nz = aij->diag[i] - ai[i]; 381 while (nz--) { 382 tmp[*vi-- - 1] -= (*v--)*tmp[i]; 383 } 384 } 385 386 /* copy tmp into x according to permutation */ 387 for ( i=0; i<n; i++ ) x[r[i]] += tmp[i]; 388 389 ISDestroy(invisrow); ISDestroy(inviscol); 390 391 FREE(tmp); 392 return 0; 393 394 } 395 /* ----------------------------------------------------------------*/ 396 int MatILU_AIJ(Mat mat,IS isrow,IS iscol,int levels,Mat *fact) 397 { 398 Mat_AIJ *aij = (Mat_AIJ *) mat->data, *aijnew; 399 IS isicol; 400 int *r,*ic, ierr, i, n = aij->m, *ai = aij->i, *aj = aij->j; 401 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz, *lfill,*ajfill,*ajtmpf; 402 int *idnew, idx, row,m,fm, nnz, nzi,len; 403 404 if (n != aij->n) SETERR(1,"Mat must be square"); 405 if (!isrow) {SETERR(1,"Must have row permutation");} 406 if (!iscol) {SETERR(1,"Must have column permutation");} 407 408 if ((ierr = ISInvertPermutation(iscol,&isicol))) SETERR(ierr,0); 409 ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic); 410 411 /* get new row pointers */ 412 ainew = (int *) MALLOC( (n+1)*sizeof(int) ); CHKPTR(ainew); 413 ainew[0] = 1; 414 /* don't know how many column pointers are needed so estimate */ 415 jmax = 2*ai[n]; 416 ajnew = (int *) MALLOC( (jmax)*sizeof(int) ); CHKPTR(ajnew); 417 /* ajfill is level of fill for each fill entry */ 418 ajfill = (int *) MALLOC( (jmax)*sizeof(int) ); CHKPTR(ajfill); 419 /* fill is a linked list of nonzeros in active row */ 420 fill = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(fill); 421 /* lfill is level for each filled value */ 422 lfill = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(lfill); 423 /* idnew is location of diagonal in factor */ 424 idnew = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(idnew); 425 idnew[0] = 1; 426 427 for ( i=0; i<n; i++ ) { 428 /* first copy previous fill into linked list */ 429 nnz = nz = ai[r[i]+1] - ai[r[i]]; 430 ajtmp = aj + ai[r[i]] - 1; 431 fill[n] = n; 432 while (nz--) { 433 fm = n; 434 idx = ic[*ajtmp++ - 1]; 435 do { 436 m = fm; 437 fm = fill[m]; 438 } while (fm < idx); 439 fill[m] = idx; 440 fill[idx] = fm; 441 lfill[idx] = -1; 442 } 443 row = fill[n]; 444 while ( row < i ) { 445 ajtmp = ajnew + idnew[row] - 1; 446 ajtmpf = ajfill + idnew[row] - 1; 447 nz = ainew[row+1] - idnew[row]; 448 fm = row; 449 while (nz--) { 450 fm = n; 451 idx = *ajtmp++ - 1; 452 do { 453 m = fm; 454 fm = fill[m]; 455 } while (fm < idx); 456 if (fm != idx) { 457 lfill[idx] = *ajtmpf + 1; 458 if (lfill[idx] < levels) { 459 fill[m] = idx; 460 fill[idx] = fm; 461 fm = idx; 462 nnz++; 463 } 464 } 465 ajtmpf++; 466 } 467 row = fill[row]; 468 } 469 /* copy new filled row into permanent storage */ 470 ainew[i+1] = ainew[i] + nnz; 471 if (ainew[i+1] > jmax+1) { 472 /* allocate a longer ajnew */ 473 jmax += nnz*(n-i); 474 ajtmp = (int *) MALLOC( jmax*sizeof(int) );CHKPTR(ajtmp); 475 MEMCPY(ajtmp,ajnew,(ainew[i]-1)*sizeof(int)); 476 FREE(ajnew); 477 ajnew = ajtmp; 478 /* allocate a longer ajfill */ 479 ajtmp = (int *) MALLOC( jmax*sizeof(int) );CHKPTR(ajtmp); 480 MEMCPY(ajtmp,ajfill,(ainew[i]-1)*sizeof(int)); 481 FREE(ajfill); 482 ajfill = ajtmp; 483 } 484 ajtmp = ajnew + ainew[i] - 1; 485 ajtmpf = ajfill + ainew[i] - 1; 486 fm = fill[n]; 487 nzi = 0; 488 while (nnz--) { 489 if (fm < i) nzi++; 490 *ajtmp++ = fm + 1; 491 *ajtmpf++ = lfill[fm]; 492 fm = fill[fm]; 493 } 494 idnew[i] = ainew[i] + nzi; 495 } 496 FREE(ajfill); 497 ISDestroy(isicol); FREE(fill); FREE(lfill); 498 499 /* put together the new matrix */ 500 ierr = MatCreateSequentialAIJ(mat->comm,n, n, 0, 0, fact); CHKERR(ierr); 501 aijnew = (Mat_AIJ *) (*fact)->data; 502 FREE(aijnew->imax); 503 aijnew->singlemalloc = 0; 504 len = (ainew[n] - 1)*sizeof(Scalar); 505 /* the next line frees the default space generated by the Create() */ 506 FREE(aijnew->a); FREE(aijnew->ilen); 507 aijnew->a = (Scalar *) MALLOC( len ); CHKPTR(aijnew->a); 508 aijnew->j = ajnew; 509 aijnew->i = ainew; 510 aijnew->diag = idnew; 511 aijnew->ilen = 0; 512 aijnew->imax = 0; 513 aijnew->row = isrow; 514 aijnew->col = iscol; 515 (*fact)->factor = FACTOR_LU; 516 return 0; 517 } 518