1 #ifndef lint 2 static char vcid[] = "$Id: aijfact.c,v 1.17 1995/05/21 14:04:20 curfman Exp curfman $"; 3 #endif 4 5 6 #include "aij.h" 7 #include "inline/spops.h" 8 /* 9 Factorization code for AIJ format. 10 */ 11 12 int MatLUFactorSymbolic_AIJ(Mat mat,IS isrow,IS iscol,Mat *fact) 13 { 14 Mat_AIJ *aij = (Mat_AIJ *) mat->data, *aijnew; 15 IS isicol; 16 int *r,*ic, ierr, i, n = aij->m, *ai = aij->i, *aj = aij->j; 17 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz; 18 int *idnew, idx, row,m,fm, nnz, nzi,len; 19 20 if (n != aij->n) SETERR(1,"Mat must be square"); 21 if (!isrow) {SETERR(1,"Must have row permutation");} 22 if (!iscol) {SETERR(1,"Must have column permutation");} 23 24 ierr = ISInvertPermutation(iscol,&isicol); CHKERR(ierr); 25 ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic); 26 27 /* get new row pointers */ 28 ainew = (int *) MALLOC( (n+1)*sizeof(int) ); CHKPTR(ainew); 29 ainew[0] = 1; 30 /* don't know how many column pointers are needed so estimate */ 31 jmax = 2*ai[n]; 32 ajnew = (int *) MALLOC( (jmax)*sizeof(int) ); CHKPTR(ajnew); 33 /* fill is a linked list of nonzeros in active row */ 34 fill = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(fill); 35 /* idnew is location of diagonal in factor */ 36 idnew = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(idnew); 37 idnew[0] = 1; 38 39 for ( i=0; i<n; i++ ) { 40 /* first copy previous fill into linked list */ 41 nnz = nz = ai[r[i]+1] - ai[r[i]]; 42 ajtmp = aj + ai[r[i]] - 1; 43 fill[n] = n; 44 while (nz--) { 45 fm = n; 46 idx = ic[*ajtmp++ - 1]; 47 do { 48 m = fm; 49 fm = fill[m]; 50 } while (fm < idx); 51 fill[m] = idx; 52 fill[idx] = fm; 53 } 54 row = fill[n]; 55 while ( row < i ) { 56 ajtmp = ajnew + idnew[row] - 1; 57 nz = ainew[row+1] - idnew[row]; 58 fm = row; 59 while (nz--) { 60 fm = n; 61 idx = *ajtmp++ - 1; 62 do { 63 m = fm; 64 fm = fill[m]; 65 } while (fm < idx); 66 if (fm != idx) { 67 fill[m] = idx; 68 fill[idx] = fm; 69 fm = idx; 70 nnz++; 71 } 72 } 73 row = fill[row]; 74 } 75 /* copy new filled row into permanent storage */ 76 ainew[i+1] = ainew[i] + nnz; 77 if (ainew[i+1] > jmax+1) { 78 /* allocate a longer ajnew */ 79 jmax += nnz*(n-i); 80 ajtmp = (int *) MALLOC( jmax*sizeof(int) );CHKPTR(ajtmp); 81 MEMCPY(ajtmp,ajnew,(ainew[i]-1)*sizeof(int)); 82 FREE(ajnew); 83 ajnew = ajtmp; 84 } 85 ajtmp = ajnew + ainew[i] - 1; 86 fm = fill[n]; 87 nzi = 0; 88 while (nnz--) { 89 if (fm < i) nzi++; 90 *ajtmp++ = fm + 1; 91 fm = fill[fm]; 92 } 93 idnew[i] = ainew[i] + nzi; 94 } 95 96 ISDestroy(isicol); FREE(fill); 97 98 /* put together the new matrix */ 99 ierr = MatCreateSequentialAIJ(mat->comm,n, n, 0, 0, fact); CHKERR(ierr); 100 aijnew = (Mat_AIJ *) (*fact)->data; 101 FREE(aijnew->imax); 102 aijnew->singlemalloc = 0; 103 len = (ainew[n] - 1)*sizeof(Scalar); 104 /* the next line frees the default space generated by the Create() */ 105 FREE(aijnew->a); FREE(aijnew->ilen); 106 aijnew->a = (Scalar *) MALLOC( len ); CHKPTR(aijnew->a); 107 aijnew->j = ajnew; 108 aijnew->i = ainew; 109 aijnew->diag = idnew; 110 aijnew->ilen = 0; 111 aijnew->imax = 0; 112 aijnew->row = isrow; 113 aijnew->col = iscol; 114 /* aijnew->mem += something */ 115 (*fact)->factor = FACTOR_LU; 116 aijnew->solve_work = (Scalar *) MALLOC( n*sizeof(Scalar)); 117 CHKPTR(aijnew->solve_work); 118 /* Cannot do this here because child is destroyed before parent created 119 PLogObjectParent(*fact,isicol); */ 120 return 0; 121 } 122 123 int MatLUFactorNumeric_AIJ(Mat mat,Mat *infact) 124 { 125 Mat fact = *infact; 126 Mat_AIJ *aij = (Mat_AIJ *) mat->data, *aijnew = (Mat_AIJ *)fact->data; 127 IS iscol = aijnew->col, isrow = aijnew->row, isicol; 128 int *r,*ic, ierr, i, j, n = aij->m, *ai = aijnew->i, *aj = aijnew->j; 129 int *ajtmpold, *ajtmp, nz, row,*pj; 130 Scalar *rtmp,*v, *pv, *pc, multiplier; 131 132 ierr = ISInvertPermutation(iscol,&isicol); CHKERR(ierr); 133 PLogObjectParent(*infact,isicol); 134 ierr = ISGetIndices(isrow,&r); CHKERR(ierr); 135 ierr = ISGetIndices(isicol,&ic); CHKERR(ierr); 136 rtmp = (Scalar *) MALLOC( (n+1)*sizeof(Scalar) ); CHKPTR(rtmp); 137 138 for ( i=0; i<n; i++ ) { 139 nz = ai[i+1] - ai[i]; 140 ajtmp = aj + ai[i] - 1; 141 for ( j=0; j<nz; j++ ) rtmp[ajtmp[j]-1] = 0.0; 142 143 /* load in initial (unfactored row) */ 144 nz = aij->i[r[i]+1] - aij->i[r[i]]; 145 ajtmpold = aij->j + aij->i[r[i]] - 1; 146 v = aij->a + aij->i[r[i]] - 1; 147 for ( j=0; j<nz; j++ ) rtmp[ic[ajtmpold[j]-1]] = v[j]; 148 149 row = *ajtmp++ - 1; 150 while (row < i) { 151 pc = rtmp + row; 152 if (*pc != 0.0) { 153 nz = aijnew->diag[row] - ai[row]; 154 pv = aijnew->a + aijnew->diag[row] - 1; 155 pj = aijnew->j + aijnew->diag[row]; 156 multiplier = *pc * *pv++; 157 *pc = multiplier; 158 nz = ai[row+1] - ai[row] - 1 - nz; 159 PLogFlops(2*nz); 160 while (nz-->0) rtmp[*pj++ - 1] -= multiplier* *pv++; 161 } 162 row = *ajtmp++ - 1; 163 } 164 /* finished row so stick it into aijnew->a */ 165 pv = aijnew->a + ai[i] - 1; 166 pj = aijnew->j + ai[i] - 1; 167 nz = ai[i+1] - ai[i]; 168 rtmp[i] = 1.0/rtmp[i]; 169 for ( j=0; j<nz; j++ ) {pv[j] = rtmp[pj[j]-1];} 170 } 171 FREE(rtmp); 172 ierr = ISRestoreIndices(isicol,&ic); CHKERR(ierr); 173 ierr = ISRestoreIndices(isrow,&r); CHKERR(ierr); 174 ierr = ISDestroy(isicol); CHKERR(ierr); 175 fact->factor = FACTOR_LU; 176 aijnew->assembled = 1; 177 aijnew->nz = ai[aijnew->n]; 178 PLogFlops(aijnew->n); 179 return 0; 180 } 181 int MatLUFactor_AIJ(Mat matin,IS row,IS col) 182 { 183 Mat_AIJ *mat = (Mat_AIJ *) matin->data; 184 int ierr; 185 Mat fact; 186 ierr = MatLUFactorSymbolic_AIJ(matin,row,col,&fact); CHKERR(ierr); 187 ierr = MatLUFactorNumeric_AIJ(matin,&fact); CHKERR(ierr); 188 189 /* free all the data structures from mat */ 190 FREE(mat->a); 191 if (!mat->singlemalloc) {FREE(mat->i); FREE(mat->j);} 192 if (mat->diag) FREE(mat->diag); 193 if (mat->ilen) FREE(mat->ilen); 194 if (mat->imax) FREE(mat->imax); 195 if (mat->row && mat->col && mat->row != mat->col) { 196 ISDestroy(mat->row); 197 } 198 if (mat->col) ISDestroy(mat->col); 199 FREE(mat); 200 201 MEMCPY(matin,fact,sizeof(struct _Mat)); 202 FREE(fact); 203 return 0; 204 } 205 206 int MatSolve_AIJ(Mat mat,Vec bb, Vec xx) 207 { 208 Mat_AIJ *aij = (Mat_AIJ *) mat->data; 209 IS iscol = aij->col, isrow = aij->row; 210 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 211 int nz; 212 Scalar *x,*b,*tmp, *aa = aij->a, sum, *v; 213 214 if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor"); 215 216 ierr = VecGetArray(bb,&b); CHKERR(ierr); 217 ierr = VecGetArray(xx,&x); CHKERR(ierr); 218 tmp = aij->solve_work; 219 220 ierr = ISGetIndices(isrow,&r);CHKERR(ierr); 221 ierr = ISGetIndices(iscol,&c);CHKERR(ierr); c = c + (n-1); 222 223 /* forward solve the lower triangular */ 224 tmp[0] = b[*r++]; 225 for ( i=1; i<n; i++ ) { 226 v = aa + ai[i] - 1; 227 vi = aj + ai[i] - 1; 228 nz = aij->diag[i] - ai[i]; 229 sum = b[*r++]; 230 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 231 tmp[i] = sum; 232 } 233 234 /* backward solve the upper triangular */ 235 for ( i=n-1; i>=0; i-- ){ 236 v = aa + aij->diag[i]; 237 vi = aj + aij->diag[i]; 238 nz = ai[i+1] - aij->diag[i] - 1; 239 sum = tmp[i]; 240 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 241 x[*c--] = tmp[i] = sum*aa[aij->diag[i]-1]; 242 } 243 244 PLogFlops(2*aij->nz - aij->n); 245 return 0; 246 } 247 int MatSolveAdd_AIJ(Mat mat,Vec bb, Vec yy, Vec xx) 248 { 249 Mat_AIJ *aij = (Mat_AIJ *) mat->data; 250 IS iscol = aij->col, isrow = aij->row; 251 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 252 int nz; 253 Scalar *x,*b,*tmp, *aa = aij->a, sum, *v; 254 255 if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor"); 256 if (yy != xx) {ierr = VecCopy(yy,xx); CHKERR(ierr);} 257 258 ierr = VecGetArray(bb,&b); CHKERR(ierr); 259 ierr = VecGetArray(xx,&x); CHKERR(ierr); 260 tmp = aij->solve_work; 261 262 ierr = ISGetIndices(isrow,&r); CHKERR(ierr); 263 ierr = ISGetIndices(iscol,&c); CHKERR(ierr); c = c + (n-1); 264 265 /* forward solve the lower triangular */ 266 tmp[0] = b[*r++]; 267 for ( i=1; i<n; i++ ) { 268 v = aa + ai[i] - 1; 269 vi = aj + ai[i] - 1; 270 nz = aij->diag[i] - ai[i]; 271 sum = b[*r++]; 272 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 273 tmp[i] = sum; 274 } 275 276 /* backward solve the upper triangular */ 277 for ( i=n-1; i>=0; i-- ){ 278 v = aa + aij->diag[i]; 279 vi = aj + aij->diag[i]; 280 nz = ai[i+1] - aij->diag[i] - 1; 281 sum = tmp[i]; 282 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 283 tmp[i] = sum*aa[aij->diag[i]-1]; 284 x[*c--] += tmp[i]; 285 } 286 287 PLogFlops(2*aij->nz); 288 return 0; 289 } 290 /* -------------------------------------------------------------------*/ 291 int MatSolveTrans_AIJ(Mat mat,Vec bb, Vec xx) 292 { 293 Mat_AIJ *aij = (Mat_AIJ *) mat->data; 294 IS iscol = aij->col, isrow = aij->row, invisrow,inviscol; 295 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 296 int nz; 297 Scalar *x,*b,*tmp, *aa = aij->a, *v; 298 299 if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor"); 300 ierr = VecGetArray(bb,&b); CHKERR(ierr); 301 ierr = VecGetArray(xx,&x); CHKERR(ierr); 302 tmp = aij->solve_work; 303 304 /* invert the permutations */ 305 ierr = ISInvertPermutation(isrow,&invisrow); CHKERR(ierr); 306 ierr = ISInvertPermutation(iscol,&inviscol); CHKERR(ierr); 307 308 309 ierr = ISGetIndices(invisrow,&r); CHKERR(ierr); 310 ierr = ISGetIndices(inviscol,&c); CHKERR(ierr); 311 312 /* copy the b into temp work space according to permutation */ 313 for ( i=0; i<n; i++ ) tmp[c[i]] = b[i]; 314 315 /* forward solve the U^T */ 316 for ( i=0; i<n; i++ ) { 317 v = aa + aij->diag[i] - 1; 318 vi = aj + aij->diag[i]; 319 nz = ai[i+1] - aij->diag[i] - 1; 320 tmp[i] *= *v++; 321 while (nz--) { 322 tmp[*vi++ - 1] -= (*v++)*tmp[i]; 323 } 324 } 325 326 /* backward solve the L^T */ 327 for ( i=n-1; i>=0; i-- ){ 328 v = aa + aij->diag[i] - 2; 329 vi = aj + aij->diag[i] - 2; 330 nz = aij->diag[i] - ai[i]; 331 while (nz--) { 332 tmp[*vi-- - 1] -= (*v--)*tmp[i]; 333 } 334 } 335 336 /* copy tmp into x according to permutation */ 337 for ( i=0; i<n; i++ ) x[r[i]] = tmp[i]; 338 339 ISDestroy(invisrow); ISDestroy(inviscol); 340 341 PLogFlops(2*aij->nz-aij->n); 342 return 0; 343 } 344 345 int MatSolveTransAdd_AIJ(Mat mat,Vec bb, Vec zz,Vec xx) 346 { 347 Mat_AIJ *aij = (Mat_AIJ *) mat->data; 348 IS iscol = aij->col, isrow = aij->row, invisrow,inviscol; 349 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 350 int nz; 351 Scalar *x,*b,*tmp, *aa = aij->a, *v; 352 353 if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor"); 354 if (zz != xx) VecCopy(zz,xx); 355 356 ierr = VecGetArray(bb,&b); CHKERR(ierr); 357 ierr = VecGetArray(xx,&x); CHKERR(ierr); 358 tmp = aij->solve_work; 359 360 /* invert the permutations */ 361 ierr = ISInvertPermutation(isrow,&invisrow); CHKERR(ierr); 362 ierr = ISInvertPermutation(iscol,&inviscol); CHKERR(ierr); 363 364 365 ierr = ISGetIndices(invisrow,&r); CHKERR(ierr); 366 ierr = ISGetIndices(inviscol,&c); CHKERR(ierr); 367 368 /* copy the b into temp work space according to permutation */ 369 for ( i=0; i<n; i++ ) tmp[c[i]] = b[i]; 370 371 /* forward solve the U^T */ 372 for ( i=0; i<n; i++ ) { 373 v = aa + aij->diag[i] - 1; 374 vi = aj + aij->diag[i]; 375 nz = ai[i+1] - aij->diag[i] - 1; 376 tmp[i] *= *v++; 377 while (nz--) { 378 tmp[*vi++ - 1] -= (*v++)*tmp[i]; 379 } 380 } 381 382 /* backward solve the L^T */ 383 for ( i=n-1; i>=0; i-- ){ 384 v = aa + aij->diag[i] - 2; 385 vi = aj + aij->diag[i] - 2; 386 nz = aij->diag[i] - ai[i]; 387 while (nz--) { 388 tmp[*vi-- - 1] -= (*v--)*tmp[i]; 389 } 390 } 391 392 /* copy tmp into x according to permutation */ 393 for ( i=0; i<n; i++ ) x[r[i]] += tmp[i]; 394 395 ISDestroy(invisrow); ISDestroy(inviscol); 396 397 PLogFlops(2*aij->nz); 398 return 0; 399 } 400 /* ----------------------------------------------------------------*/ 401 int MatILUFactorSymbolic_AIJ(Mat mat,IS isrow,IS iscol,int levels,Mat *fact) 402 { 403 Mat_AIJ *aij = (Mat_AIJ *) mat->data, *aijnew; 404 IS isicol; 405 int *r,*ic, ierr, i, n = aij->m, *ai = aij->i, *aj = aij->j; 406 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz, *lfill,*ajfill,*ajtmpf; 407 int *idnew, idx, row,m,fm, nnz, nzi,len; 408 409 if (n != aij->n) SETERR(1,"Mat must be square"); 410 if (!isrow) {SETERR(1,"Must have row permutation");} 411 if (!iscol) {SETERR(1,"Must have column permutation");} 412 413 ierr = ISInvertPermutation(iscol,&isicol); CHKERR(ierr); 414 ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic); 415 416 /* get new row pointers */ 417 ainew = (int *) MALLOC( (n+1)*sizeof(int) ); CHKPTR(ainew); 418 ainew[0] = 1; 419 /* don't know how many column pointers are needed so estimate */ 420 jmax = 2*ai[n]; 421 ajnew = (int *) MALLOC( (jmax)*sizeof(int) ); CHKPTR(ajnew); 422 /* ajfill is level of fill for each fill entry */ 423 ajfill = (int *) MALLOC( (jmax)*sizeof(int) ); CHKPTR(ajfill); 424 /* fill is a linked list of nonzeros in active row */ 425 fill = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(fill); 426 /* lfill is level for each filled value */ 427 lfill = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(lfill); 428 /* idnew is location of diagonal in factor */ 429 idnew = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(idnew); 430 idnew[0] = 1; 431 432 for ( i=0; i<n; i++ ) { 433 /* first copy previous fill into linked list */ 434 nnz = nz = ai[r[i]+1] - ai[r[i]]; 435 ajtmp = aj + ai[r[i]] - 1; 436 fill[n] = n; 437 while (nz--) { 438 fm = n; 439 idx = ic[*ajtmp++ - 1]; 440 do { 441 m = fm; 442 fm = fill[m]; 443 } while (fm < idx); 444 fill[m] = idx; 445 fill[idx] = fm; 446 lfill[idx] = -1; 447 } 448 row = fill[n]; 449 while ( row < i ) { 450 ajtmp = ajnew + idnew[row] - 1; 451 ajtmpf = ajfill + idnew[row] - 1; 452 nz = ainew[row+1] - idnew[row]; 453 fm = row; 454 while (nz--) { 455 fm = n; 456 idx = *ajtmp++ - 1; 457 do { 458 m = fm; 459 fm = fill[m]; 460 } while (fm < idx); 461 if (fm != idx) { 462 lfill[idx] = *ajtmpf + 1; 463 if (lfill[idx] < levels) { 464 fill[m] = idx; 465 fill[idx] = fm; 466 fm = idx; 467 nnz++; 468 } 469 } 470 ajtmpf++; 471 } 472 row = fill[row]; 473 } 474 /* copy new filled row into permanent storage */ 475 ainew[i+1] = ainew[i] + nnz; 476 if (ainew[i+1] > jmax+1) { 477 /* allocate a longer ajnew */ 478 jmax += nnz*(n-i); 479 ajtmp = (int *) MALLOC( jmax*sizeof(int) );CHKPTR(ajtmp); 480 MEMCPY(ajtmp,ajnew,(ainew[i]-1)*sizeof(int)); 481 FREE(ajnew); 482 ajnew = ajtmp; 483 /* allocate a longer ajfill */ 484 ajtmp = (int *) MALLOC( jmax*sizeof(int) );CHKPTR(ajtmp); 485 MEMCPY(ajtmp,ajfill,(ainew[i]-1)*sizeof(int)); 486 FREE(ajfill); 487 ajfill = ajtmp; 488 } 489 ajtmp = ajnew + ainew[i] - 1; 490 ajtmpf = ajfill + ainew[i] - 1; 491 fm = fill[n]; 492 nzi = 0; 493 while (nnz--) { 494 if (fm < i) nzi++; 495 *ajtmp++ = fm + 1; 496 *ajtmpf++ = lfill[fm]; 497 fm = fill[fm]; 498 } 499 idnew[i] = ainew[i] + nzi; 500 } 501 FREE(ajfill); 502 ISDestroy(isicol); FREE(fill); FREE(lfill); 503 504 /* put together the new matrix */ 505 ierr = MatCreateSequentialAIJ(mat->comm,n, n, 0, 0, fact); CHKERR(ierr); 506 aijnew = (Mat_AIJ *) (*fact)->data; 507 FREE(aijnew->imax); 508 aijnew->singlemalloc = 0; 509 len = (ainew[n] - 1)*sizeof(Scalar); 510 /* the next line frees the default space generated by the Create() */ 511 FREE(aijnew->a); FREE(aijnew->ilen); 512 aijnew->a = (Scalar *) MALLOC( len ); CHKPTR(aijnew->a); 513 aijnew->j = ajnew; 514 aijnew->i = ainew; 515 aijnew->diag = idnew; 516 aijnew->ilen = 0; 517 aijnew->imax = 0; 518 aijnew->row = isrow; 519 aijnew->col = iscol; 520 aijnew->solve_work = (Scalar *) MALLOC( n*sizeof(Scalar)); 521 CHKPTR(aijnew->solve_work); 522 (*fact)->factor = FACTOR_LU; 523 return 0; 524 } 525