1 #ifndef lint 2 static char vcid[] = "$Id: aijfact.c,v 1.15 1995/04/25 18:03:03 bsmith Exp bsmith $"; 3 #endif 4 5 6 #include "aij.h" 7 #include "inline/spops.h" 8 /* 9 Factorization code for AIJ format. 10 */ 11 12 int MatLUFactorSymbolic_AIJ(Mat mat,IS isrow,IS iscol,Mat *fact) 13 { 14 Mat_AIJ *aij = (Mat_AIJ *) mat->data, *aijnew; 15 IS isicol; 16 int *r,*ic, ierr, i, n = aij->m, *ai = aij->i, *aj = aij->j; 17 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz; 18 int *idnew, idx, row,m,fm, nnz, nzi,len; 19 20 if (n != aij->n) SETERR(1,"Mat must be square"); 21 if (!isrow) {SETERR(1,"Must have row permutation");} 22 if (!iscol) {SETERR(1,"Must have column permutation");} 23 24 ierr = ISInvertPermutation(iscol,&isicol); CHKERR(ierr); 25 ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic); 26 27 /* get new row pointers */ 28 ainew = (int *) MALLOC( (n+1)*sizeof(int) ); CHKPTR(ainew); 29 ainew[0] = 1; 30 /* don't know how many column pointers are needed so estimate */ 31 jmax = 2*ai[n]; 32 ajnew = (int *) MALLOC( (jmax)*sizeof(int) ); CHKPTR(ajnew); 33 /* fill is a linked list of nonzeros in active row */ 34 fill = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(fill); 35 /* idnew is location of diagonal in factor */ 36 idnew = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(idnew); 37 idnew[0] = 1; 38 39 for ( i=0; i<n; i++ ) { 40 /* first copy previous fill into linked list */ 41 nnz = nz = ai[r[i]+1] - ai[r[i]]; 42 ajtmp = aj + ai[r[i]] - 1; 43 fill[n] = n; 44 while (nz--) { 45 fm = n; 46 idx = ic[*ajtmp++ - 1]; 47 do { 48 m = fm; 49 fm = fill[m]; 50 } while (fm < idx); 51 fill[m] = idx; 52 fill[idx] = fm; 53 } 54 row = fill[n]; 55 while ( row < i ) { 56 ajtmp = ajnew + idnew[row] - 1; 57 nz = ainew[row+1] - idnew[row]; 58 fm = row; 59 while (nz--) { 60 fm = n; 61 idx = *ajtmp++ - 1; 62 do { 63 m = fm; 64 fm = fill[m]; 65 } while (fm < idx); 66 if (fm != idx) { 67 fill[m] = idx; 68 fill[idx] = fm; 69 fm = idx; 70 nnz++; 71 } 72 } 73 row = fill[row]; 74 } 75 /* copy new filled row into permanent storage */ 76 ainew[i+1] = ainew[i] + nnz; 77 if (ainew[i+1] > jmax+1) { 78 /* allocate a longer ajnew */ 79 jmax += nnz*(n-i); 80 ajtmp = (int *) MALLOC( jmax*sizeof(int) );CHKPTR(ajtmp); 81 MEMCPY(ajtmp,ajnew,(ainew[i]-1)*sizeof(int)); 82 FREE(ajnew); 83 ajnew = ajtmp; 84 } 85 ajtmp = ajnew + ainew[i] - 1; 86 fm = fill[n]; 87 nzi = 0; 88 while (nnz--) { 89 if (fm < i) nzi++; 90 *ajtmp++ = fm + 1; 91 fm = fill[fm]; 92 } 93 idnew[i] = ainew[i] + nzi; 94 } 95 96 ISDestroy(isicol); FREE(fill); 97 98 /* put together the new matrix */ 99 ierr = MatCreateSequentialAIJ(mat->comm,n, n, 0, 0, fact); CHKERR(ierr); 100 aijnew = (Mat_AIJ *) (*fact)->data; 101 FREE(aijnew->imax); 102 aijnew->singlemalloc = 0; 103 len = (ainew[n] - 1)*sizeof(Scalar); 104 /* the next line frees the default space generated by the Create() */ 105 FREE(aijnew->a); FREE(aijnew->ilen); 106 aijnew->a = (Scalar *) MALLOC( len ); CHKPTR(aijnew->a); 107 aijnew->j = ajnew; 108 aijnew->i = ainew; 109 aijnew->diag = idnew; 110 aijnew->ilen = 0; 111 aijnew->imax = 0; 112 aijnew->row = isrow; 113 aijnew->col = iscol; 114 (*fact)->factor = FACTOR_LU; 115 aijnew->solve_work = (Scalar *) MALLOC( n*sizeof(Scalar)); 116 CHKPTR(aijnew->solve_work); 117 /* Cannot do this here because child is destroyed before parent created 118 PLogObjectParent(*fact,isicol); */ 119 return 0; 120 } 121 122 int MatLUFactorNumeric_AIJ(Mat mat,Mat *infact) 123 { 124 Mat fact = *infact; 125 Mat_AIJ *aij = (Mat_AIJ *) mat->data, *aijnew = (Mat_AIJ *)fact->data; 126 IS iscol = aijnew->col, isrow = aijnew->row, isicol; 127 int *r,*ic, ierr, i, j, n = aij->m, *ai = aijnew->i, *aj = aijnew->j; 128 int *ajtmpold, *ajtmp, nz, row,*pj; 129 Scalar *rtmp,*v, *pv, *pc, multiplier; 130 131 ierr = ISInvertPermutation(iscol,&isicol); CHKERR(ierr); 132 PLogObjectParent(*infact,isicol); 133 ierr = ISGetIndices(isrow,&r); CHKERR(ierr); 134 ierr = ISGetIndices(isicol,&ic); CHKERR(ierr); 135 rtmp = (Scalar *) MALLOC( (n+1)*sizeof(Scalar) ); CHKPTR(rtmp); 136 137 for ( i=0; i<n; i++ ) { 138 nz = ai[i+1] - ai[i]; 139 ajtmp = aj + ai[i] - 1; 140 for ( j=0; j<nz; j++ ) rtmp[ajtmp[j]-1] = 0.0; 141 142 /* load in initial (unfactored row) */ 143 nz = aij->i[r[i]+1] - aij->i[r[i]]; 144 ajtmpold = aij->j + aij->i[r[i]] - 1; 145 v = aij->a + aij->i[r[i]] - 1; 146 for ( j=0; j<nz; j++ ) rtmp[ic[ajtmpold[j]-1]] = v[j]; 147 148 row = *ajtmp++ - 1; 149 while (row < i) { 150 pc = rtmp + row; 151 if (*pc != 0.0) { 152 nz = aijnew->diag[row] - ai[row]; 153 pv = aijnew->a + aijnew->diag[row] - 1; 154 pj = aijnew->j + aijnew->diag[row]; 155 multiplier = *pc * *pv++; 156 *pc = multiplier; 157 nz = ai[row+1] - ai[row] - 1 - nz; 158 PLogFlops(2*nz); 159 while (nz-->0) rtmp[*pj++ - 1] -= multiplier* *pv++; 160 } 161 row = *ajtmp++ - 1; 162 } 163 /* finished row so stick it into aijnew->a */ 164 pv = aijnew->a + ai[i] - 1; 165 pj = aijnew->j + ai[i] - 1; 166 nz = ai[i+1] - ai[i]; 167 rtmp[i] = 1.0/rtmp[i]; 168 for ( j=0; j<nz; j++ ) {pv[j] = rtmp[pj[j]-1];} 169 } 170 FREE(rtmp); 171 ierr = ISRestoreIndices(isicol,&ic); CHKERR(ierr); 172 ierr = ISRestoreIndices(isrow,&r); CHKERR(ierr); 173 ierr = ISDestroy(isicol); CHKERR(ierr); 174 fact->factor = FACTOR_LU; 175 aijnew->assembled = 1; 176 aijnew->nz = ai[aijnew->n]; 177 PLogFlops(aijnew->n); 178 return 0; 179 } 180 int MatLUFactor_AIJ(Mat matin,IS row,IS col) 181 { 182 Mat_AIJ *mat = (Mat_AIJ *) matin->data; 183 int ierr; 184 Mat fact; 185 ierr = MatLUFactorSymbolic_AIJ(matin,row,col,&fact); CHKERR(ierr); 186 ierr = MatLUFactorNumeric_AIJ(matin,&fact); CHKERR(ierr); 187 188 /* free all the data structures from mat */ 189 FREE(mat->a); 190 if (!mat->singlemalloc) {FREE(mat->i); FREE(mat->j);} 191 if (mat->diag) FREE(mat->diag); 192 if (mat->ilen) FREE(mat->ilen); 193 if (mat->imax) FREE(mat->imax); 194 if (mat->row && mat->col && mat->row != mat->col) { 195 ISDestroy(mat->row); 196 } 197 if (mat->col) ISDestroy(mat->col); 198 FREE(mat); 199 200 MEMCPY(matin,fact,sizeof(struct _Mat)); 201 FREE(fact); 202 return 0; 203 } 204 205 int MatSolve_AIJ(Mat mat,Vec bb, Vec xx) 206 { 207 Mat_AIJ *aij = (Mat_AIJ *) mat->data; 208 IS iscol = aij->col, isrow = aij->row; 209 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 210 int nz; 211 Scalar *x,*b,*tmp, *aa = aij->a, sum, *v; 212 213 if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor"); 214 215 ierr = VecGetArray(bb,&b); CHKERR(ierr); 216 ierr = VecGetArray(xx,&x); CHKERR(ierr); 217 tmp = aij->solve_work; 218 219 ierr = ISGetIndices(isrow,&r);CHKERR(ierr); 220 ierr = ISGetIndices(iscol,&c);CHKERR(ierr); c = c + (n-1); 221 222 /* forward solve the lower triangular */ 223 tmp[0] = b[*r++]; 224 for ( i=1; i<n; i++ ) { 225 v = aa + ai[i] - 1; 226 vi = aj + ai[i] - 1; 227 nz = aij->diag[i] - ai[i]; 228 sum = b[*r++]; 229 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 230 tmp[i] = sum; 231 } 232 233 /* backward solve the upper triangular */ 234 for ( i=n-1; i>=0; i-- ){ 235 v = aa + aij->diag[i]; 236 vi = aj + aij->diag[i]; 237 nz = ai[i+1] - aij->diag[i] - 1; 238 sum = tmp[i]; 239 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 240 x[*c--] = tmp[i] = sum*aa[aij->diag[i]-1]; 241 } 242 243 PLogFlops(2*aij->nz - aij->n); 244 return 0; 245 } 246 int MatSolveAdd_AIJ(Mat mat,Vec bb, Vec yy, Vec xx) 247 { 248 Mat_AIJ *aij = (Mat_AIJ *) mat->data; 249 IS iscol = aij->col, isrow = aij->row; 250 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 251 int nz; 252 Scalar *x,*b,*tmp, *aa = aij->a, sum, *v; 253 254 if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor"); 255 if (yy != xx) {ierr = VecCopy(yy,xx); CHKERR(ierr);} 256 257 ierr = VecGetArray(bb,&b); CHKERR(ierr); 258 ierr = VecGetArray(xx,&x); CHKERR(ierr); 259 tmp = aij->solve_work; 260 261 ierr = ISGetIndices(isrow,&r); CHKERR(ierr); 262 ierr = ISGetIndices(iscol,&c); CHKERR(ierr); c = c + (n-1); 263 264 /* forward solve the lower triangular */ 265 tmp[0] = b[*r++]; 266 for ( i=1; i<n; i++ ) { 267 v = aa + ai[i] - 1; 268 vi = aj + ai[i] - 1; 269 nz = aij->diag[i] - ai[i]; 270 sum = b[*r++]; 271 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 272 tmp[i] = sum; 273 } 274 275 /* backward solve the upper triangular */ 276 for ( i=n-1; i>=0; i-- ){ 277 v = aa + aij->diag[i]; 278 vi = aj + aij->diag[i]; 279 nz = ai[i+1] - aij->diag[i] - 1; 280 sum = tmp[i]; 281 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 282 tmp[i] = sum*aa[aij->diag[i]-1]; 283 x[*c--] += tmp[i]; 284 } 285 286 PLogFlops(2*aij->nz); 287 return 0; 288 } 289 /* -------------------------------------------------------------------*/ 290 int MatSolveTrans_AIJ(Mat mat,Vec bb, Vec xx) 291 { 292 Mat_AIJ *aij = (Mat_AIJ *) mat->data; 293 IS iscol = aij->col, isrow = aij->row, invisrow,inviscol; 294 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 295 int nz; 296 Scalar *x,*b,*tmp, *aa = aij->a, *v; 297 298 if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor"); 299 ierr = VecGetArray(bb,&b); CHKERR(ierr); 300 ierr = VecGetArray(xx,&x); CHKERR(ierr); 301 tmp = aij->solve_work; 302 303 /* invert the permutations */ 304 ierr = ISInvertPermutation(isrow,&invisrow); CHKERR(ierr); 305 ierr = ISInvertPermutation(iscol,&inviscol); CHKERR(ierr); 306 307 308 ierr = ISGetIndices(invisrow,&r); CHKERR(ierr); 309 ierr = ISGetIndices(inviscol,&c); CHKERR(ierr); 310 311 /* copy the b into temp work space according to permutation */ 312 for ( i=0; i<n; i++ ) tmp[c[i]] = b[i]; 313 314 /* forward solve the U^T */ 315 for ( i=0; i<n; i++ ) { 316 v = aa + aij->diag[i] - 1; 317 vi = aj + aij->diag[i]; 318 nz = ai[i+1] - aij->diag[i] - 1; 319 tmp[i] *= *v++; 320 while (nz--) { 321 tmp[*vi++ - 1] -= (*v++)*tmp[i]; 322 } 323 } 324 325 /* backward solve the L^T */ 326 for ( i=n-1; i>=0; i-- ){ 327 v = aa + aij->diag[i] - 2; 328 vi = aj + aij->diag[i] - 2; 329 nz = aij->diag[i] - ai[i]; 330 while (nz--) { 331 tmp[*vi-- - 1] -= (*v--)*tmp[i]; 332 } 333 } 334 335 /* copy tmp into x according to permutation */ 336 for ( i=0; i<n; i++ ) x[r[i]] = tmp[i]; 337 338 ISDestroy(invisrow); ISDestroy(inviscol); 339 340 PLogFlops(2*aij->nz-aij->n); 341 return 0; 342 } 343 344 int MatSolveTransAdd_AIJ(Mat mat,Vec bb, Vec zz,Vec xx) 345 { 346 Mat_AIJ *aij = (Mat_AIJ *) mat->data; 347 IS iscol = aij->col, isrow = aij->row, invisrow,inviscol; 348 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 349 int nz; 350 Scalar *x,*b,*tmp, *aa = aij->a, *v; 351 352 if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor"); 353 if (zz != xx) VecCopy(zz,xx); 354 355 ierr = VecGetArray(bb,&b); CHKERR(ierr); 356 ierr = VecGetArray(xx,&x); CHKERR(ierr); 357 tmp = aij->solve_work; 358 359 /* invert the permutations */ 360 ierr = ISInvertPermutation(isrow,&invisrow); CHKERR(ierr); 361 ierr = ISInvertPermutation(iscol,&inviscol); CHKERR(ierr); 362 363 364 ierr = ISGetIndices(invisrow,&r); CHKERR(ierr); 365 ierr = ISGetIndices(inviscol,&c); CHKERR(ierr); 366 367 /* copy the b into temp work space according to permutation */ 368 for ( i=0; i<n; i++ ) tmp[c[i]] = b[i]; 369 370 /* forward solve the U^T */ 371 for ( i=0; i<n; i++ ) { 372 v = aa + aij->diag[i] - 1; 373 vi = aj + aij->diag[i]; 374 nz = ai[i+1] - aij->diag[i] - 1; 375 tmp[i] *= *v++; 376 while (nz--) { 377 tmp[*vi++ - 1] -= (*v++)*tmp[i]; 378 } 379 } 380 381 /* backward solve the L^T */ 382 for ( i=n-1; i>=0; i-- ){ 383 v = aa + aij->diag[i] - 2; 384 vi = aj + aij->diag[i] - 2; 385 nz = aij->diag[i] - ai[i]; 386 while (nz--) { 387 tmp[*vi-- - 1] -= (*v--)*tmp[i]; 388 } 389 } 390 391 /* copy tmp into x according to permutation */ 392 for ( i=0; i<n; i++ ) x[r[i]] += tmp[i]; 393 394 ISDestroy(invisrow); ISDestroy(inviscol); 395 396 PLogFlops(2*aij->nz); 397 return 0; 398 } 399 /* ----------------------------------------------------------------*/ 400 int MatILU_AIJ(Mat mat,IS isrow,IS iscol,int levels,Mat *fact) 401 { 402 Mat_AIJ *aij = (Mat_AIJ *) mat->data, *aijnew; 403 IS isicol; 404 int *r,*ic, ierr, i, n = aij->m, *ai = aij->i, *aj = aij->j; 405 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz, *lfill,*ajfill,*ajtmpf; 406 int *idnew, idx, row,m,fm, nnz, nzi,len; 407 408 if (n != aij->n) SETERR(1,"Mat must be square"); 409 if (!isrow) {SETERR(1,"Must have row permutation");} 410 if (!iscol) {SETERR(1,"Must have column permutation");} 411 412 ierr = ISInvertPermutation(iscol,&isicol); CHKERR(ierr); 413 ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic); 414 415 /* get new row pointers */ 416 ainew = (int *) MALLOC( (n+1)*sizeof(int) ); CHKPTR(ainew); 417 ainew[0] = 1; 418 /* don't know how many column pointers are needed so estimate */ 419 jmax = 2*ai[n]; 420 ajnew = (int *) MALLOC( (jmax)*sizeof(int) ); CHKPTR(ajnew); 421 /* ajfill is level of fill for each fill entry */ 422 ajfill = (int *) MALLOC( (jmax)*sizeof(int) ); CHKPTR(ajfill); 423 /* fill is a linked list of nonzeros in active row */ 424 fill = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(fill); 425 /* lfill is level for each filled value */ 426 lfill = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(lfill); 427 /* idnew is location of diagonal in factor */ 428 idnew = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(idnew); 429 idnew[0] = 1; 430 431 for ( i=0; i<n; i++ ) { 432 /* first copy previous fill into linked list */ 433 nnz = nz = ai[r[i]+1] - ai[r[i]]; 434 ajtmp = aj + ai[r[i]] - 1; 435 fill[n] = n; 436 while (nz--) { 437 fm = n; 438 idx = ic[*ajtmp++ - 1]; 439 do { 440 m = fm; 441 fm = fill[m]; 442 } while (fm < idx); 443 fill[m] = idx; 444 fill[idx] = fm; 445 lfill[idx] = -1; 446 } 447 row = fill[n]; 448 while ( row < i ) { 449 ajtmp = ajnew + idnew[row] - 1; 450 ajtmpf = ajfill + idnew[row] - 1; 451 nz = ainew[row+1] - idnew[row]; 452 fm = row; 453 while (nz--) { 454 fm = n; 455 idx = *ajtmp++ - 1; 456 do { 457 m = fm; 458 fm = fill[m]; 459 } while (fm < idx); 460 if (fm != idx) { 461 lfill[idx] = *ajtmpf + 1; 462 if (lfill[idx] < levels) { 463 fill[m] = idx; 464 fill[idx] = fm; 465 fm = idx; 466 nnz++; 467 } 468 } 469 ajtmpf++; 470 } 471 row = fill[row]; 472 } 473 /* copy new filled row into permanent storage */ 474 ainew[i+1] = ainew[i] + nnz; 475 if (ainew[i+1] > jmax+1) { 476 /* allocate a longer ajnew */ 477 jmax += nnz*(n-i); 478 ajtmp = (int *) MALLOC( jmax*sizeof(int) );CHKPTR(ajtmp); 479 MEMCPY(ajtmp,ajnew,(ainew[i]-1)*sizeof(int)); 480 FREE(ajnew); 481 ajnew = ajtmp; 482 /* allocate a longer ajfill */ 483 ajtmp = (int *) MALLOC( jmax*sizeof(int) );CHKPTR(ajtmp); 484 MEMCPY(ajtmp,ajfill,(ainew[i]-1)*sizeof(int)); 485 FREE(ajfill); 486 ajfill = ajtmp; 487 } 488 ajtmp = ajnew + ainew[i] - 1; 489 ajtmpf = ajfill + ainew[i] - 1; 490 fm = fill[n]; 491 nzi = 0; 492 while (nnz--) { 493 if (fm < i) nzi++; 494 *ajtmp++ = fm + 1; 495 *ajtmpf++ = lfill[fm]; 496 fm = fill[fm]; 497 } 498 idnew[i] = ainew[i] + nzi; 499 } 500 FREE(ajfill); 501 ISDestroy(isicol); FREE(fill); FREE(lfill); 502 503 /* put together the new matrix */ 504 ierr = MatCreateSequentialAIJ(mat->comm,n, n, 0, 0, fact); CHKERR(ierr); 505 aijnew = (Mat_AIJ *) (*fact)->data; 506 FREE(aijnew->imax); 507 aijnew->singlemalloc = 0; 508 len = (ainew[n] - 1)*sizeof(Scalar); 509 /* the next line frees the default space generated by the Create() */ 510 FREE(aijnew->a); FREE(aijnew->ilen); 511 aijnew->a = (Scalar *) MALLOC( len ); CHKPTR(aijnew->a); 512 aijnew->j = ajnew; 513 aijnew->i = ainew; 514 aijnew->diag = idnew; 515 aijnew->ilen = 0; 516 aijnew->imax = 0; 517 aijnew->row = isrow; 518 aijnew->col = iscol; 519 aijnew->solve_work = (Scalar *) MALLOC( n*sizeof(Scalar)); 520 CHKPTR(aijnew->solve_work); 521 (*fact)->factor = FACTOR_LU; 522 return 0; 523 } 524