1 #ifndef lint 2 static char vcid[] = "$Id: aijfact.c,v 1.10 1995/03/17 04:56:47 bsmith Exp curfman $"; 3 #endif 4 5 6 #include "aij.h" 7 #include "inline/spops.h" 8 /* 9 Factorization code for AIJ format. 10 */ 11 12 int MatLUFactorSymbolic_AIJ(Mat mat,IS isrow,IS iscol,Mat *fact) 13 { 14 Mat_AIJ *aij = (Mat_AIJ *) mat->data, *aijnew; 15 IS isicol; 16 int *r,*ic, ierr, i, n = aij->m, *ai = aij->i, *aj = aij->j; 17 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz; 18 int *idnew, idx, row,m,fm, nnz, nzi,len; 19 20 if (n != aij->n) SETERR(1,"Mat must be square"); 21 if (!isrow) {SETERR(1,"Must have row permutation");} 22 if (!iscol) {SETERR(1,"Must have column permutation");} 23 24 if ((ierr = ISInvertPermutation(iscol,&isicol))) SETERR(ierr,0); 25 ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic); 26 27 /* get new row pointers */ 28 ainew = (int *) MALLOC( (n+1)*sizeof(int) ); CHKPTR(ainew); 29 ainew[0] = 1; 30 /* don't know how many column pointers are needed so estimate */ 31 jmax = 2*ai[n]; 32 ajnew = (int *) MALLOC( (jmax)*sizeof(int) ); CHKPTR(ajnew); 33 /* fill is a linked list of nonzeros in active row */ 34 fill = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(fill); 35 /* idnew is location of diagonal in factor */ 36 idnew = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(idnew); 37 idnew[0] = 1; 38 39 for ( i=0; i<n; i++ ) { 40 /* first copy previous fill into linked list */ 41 nnz = nz = ai[r[i]+1] - ai[r[i]]; 42 ajtmp = aj + ai[r[i]] - 1; 43 fill[n] = n; 44 while (nz--) { 45 fm = n; 46 idx = ic[*ajtmp++ - 1]; 47 do { 48 m = fm; 49 fm = fill[m]; 50 } while (fm < idx); 51 fill[m] = idx; 52 fill[idx] = fm; 53 } 54 row = fill[n]; 55 while ( row < i ) { 56 ajtmp = ajnew + idnew[row] - 1; 57 nz = ainew[row+1] - idnew[row]; 58 fm = row; 59 while (nz--) { 60 fm = n; 61 idx = *ajtmp++ - 1; 62 do { 63 m = fm; 64 fm = fill[m]; 65 } while (fm < idx); 66 if (fm != idx) { 67 fill[m] = idx; 68 fill[idx] = fm; 69 fm = idx; 70 nnz++; 71 } 72 } 73 row = fill[row]; 74 } 75 /* copy new filled row into permanent storage */ 76 ainew[i+1] = ainew[i] + nnz; 77 if (ainew[i+1] > jmax+1) { 78 /* allocate a longer ajnew */ 79 jmax += nnz*(n-i); 80 ajtmp = (int *) MALLOC( jmax*sizeof(int) );CHKPTR(ajtmp); 81 MEMCPY(ajtmp,ajnew,(ainew[i]-1)*sizeof(int)); 82 FREE(ajnew); 83 ajnew = ajtmp; 84 } 85 ajtmp = ajnew + ainew[i] - 1; 86 fm = fill[n]; 87 nzi = 0; 88 while (nnz--) { 89 if (fm < i) nzi++; 90 *ajtmp++ = fm + 1; 91 fm = fill[fm]; 92 } 93 idnew[i] = ainew[i] + nzi; 94 } 95 96 ISDestroy(isicol); FREE(fill); 97 98 /* put together the new matrix */ 99 ierr = MatCreateSequentialAIJ(n, n, 0, 0, fact); CHKERR(ierr); 100 aijnew = (Mat_AIJ *) (*fact)->data; 101 FREE(aijnew->imax); 102 aijnew->singlemalloc = 0; 103 len = (ainew[n] - 1)*sizeof(Scalar); 104 /* the next line frees the default space generated by the Create() */ 105 FREE(aijnew->a); FREE(aijnew->ilen); 106 aijnew->a = (Scalar *) MALLOC( len ); CHKPTR(aijnew->a); 107 aijnew->j = ajnew; 108 aijnew->i = ainew; 109 aijnew->diag = idnew; 110 aijnew->ilen = 0; 111 aijnew->imax = 0; 112 (*fact)->row = isrow; 113 (*fact)->col = iscol; 114 (*fact)->factor = FACTOR_LU; 115 return 0; 116 } 117 118 int MatLUFactorNumeric_AIJ(Mat mat,Mat *infact) 119 { 120 Mat fact = *infact; 121 Mat_AIJ *aij = (Mat_AIJ *) mat->data, *aijnew = (Mat_AIJ *)fact->data; 122 IS iscol = fact->col, isrow = fact->row, isicol; 123 int *r,*ic, ierr, i, j, n = aij->m, *ai = aijnew->i, *aj = aijnew->j; 124 int *ajtmpold, *ajtmp, nz, row,*pj; 125 Scalar *rtmp,*v, *pv, *pc, multiplier; 126 127 if ((ierr = ISInvertPermutation(iscol,&isicol))) SETERR(ierr,0); 128 ierr = ISGetIndices(isrow,&r); CHKERR(ierr); 129 ierr = ISGetIndices(isicol,&ic); CHKERR(ierr); 130 rtmp = (Scalar *) MALLOC( (n+1)*sizeof(Scalar) ); CHKPTR(rtmp); 131 132 for ( i=0; i<n; i++ ) { 133 nz = ai[i+1] - ai[i]; 134 ajtmp = aj + ai[i] - 1; 135 for ( j=0; j<nz; j++ ) rtmp[ajtmp[j]-1] = 0.0; 136 137 /* load in initial (unfactored row) */ 138 nz = aij->i[r[i]+1] - aij->i[r[i]]; 139 ajtmpold = aij->j + aij->i[r[i]] - 1; 140 v = aij->a + aij->i[r[i]] - 1; 141 for ( j=0; j<nz; j++ ) rtmp[ic[ajtmpold[j]-1]] = v[j]; 142 143 row = *ajtmp++ - 1; 144 while (row < i) { 145 pc = rtmp + row; 146 if (*pc != 0.0) { 147 nz = aijnew->diag[row] - ai[row]; 148 pv = aijnew->a + aijnew->diag[row] - 1; 149 pj = aijnew->j + aijnew->diag[row]; 150 multiplier = *pc * *pv++; 151 *pc = multiplier; 152 nz = ai[row+1] - ai[row] - 1 - nz; 153 while (nz-->0) rtmp[*pj++ - 1] -= multiplier* *pv++; 154 } 155 row = *ajtmp++ - 1; 156 } 157 /* finished row so stick it into aijnew->a */ 158 pv = aijnew->a + ai[i] - 1; 159 pj = aijnew->j + ai[i] - 1; 160 nz = ai[i+1] - ai[i]; 161 rtmp[i] = 1.0/rtmp[i]; 162 for ( j=0; j<nz; j++ ) {pv[j] = rtmp[pj[j]-1];} 163 } 164 FREE(rtmp); 165 ierr = ISRestoreIndices(isicol,&ic); CHKERR(ierr); 166 ierr = ISRestoreIndices(isrow,&r); CHKERR(ierr); 167 ierr = ISDestroy(isicol); CHKERR(ierr); 168 fact->factor = FACTOR_LU; 169 aijnew->assembled = 1; 170 return 0; 171 } 172 int MatLUFactor_AIJ(Mat matin,IS row,IS col) 173 { 174 Mat_AIJ *mat = (Mat_AIJ *) matin->data; 175 int ierr; 176 Mat fact; 177 ierr = MatLUFactorSymbolic_AIJ(matin,row,col,&fact); CHKERR(ierr); 178 ierr = MatLUFactorNumeric_AIJ(matin,&fact); CHKERR(ierr); 179 180 /* free all the data structures from mat */ 181 FREE(mat->a); 182 if (!mat->singlemalloc) {FREE(mat->i); FREE(mat->j);} 183 if (mat->diag) FREE(mat->diag); 184 if (mat->ilen) FREE(mat->ilen); 185 if (mat->imax) FREE(mat->imax); 186 if (matin->row && matin->col && matin->row != matin->col) { 187 ISDestroy(matin->row); 188 } 189 if (matin->col) ISDestroy(matin->col); 190 FREE(mat); 191 192 MEMCPY(matin,fact,sizeof(struct _Mat)); 193 FREE(fact); 194 return 0; 195 } 196 197 int MatSolve_AIJ(Mat mat,Vec bb, Vec xx) 198 { 199 Mat_AIJ *aij = (Mat_AIJ *) mat->data; 200 IS iscol = mat->col, isrow = mat->row; 201 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 202 int nz; 203 Scalar *x,*b,*tmp, *aa = aij->a, sum, *v; 204 205 if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor"); 206 207 if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0); 208 if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0); 209 tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp); 210 211 if ((ierr = ISGetIndices(isrow,&r))) SETERR(ierr,0); 212 if ((ierr = ISGetIndices(iscol,&c))) SETERR(ierr,0); c = c + (n-1); 213 214 /* forward solve the lower triangular */ 215 tmp[0] = b[*r++]; 216 for ( i=1; i<n; i++ ) { 217 v = aa + ai[i] - 1; 218 vi = aj + ai[i] - 1; 219 nz = aij->diag[i] - ai[i]; 220 sum = b[*r++]; 221 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 222 tmp[i] = sum; 223 } 224 225 /* backward solve the upper triangular */ 226 for ( i=n-1; i>=0; i-- ){ 227 v = aa + aij->diag[i]; 228 vi = aj + aij->diag[i]; 229 nz = ai[i+1] - aij->diag[i] - 1; 230 sum = tmp[i]; 231 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 232 x[*c--] = tmp[i] = sum*aa[aij->diag[i]-1]; 233 } 234 235 FREE(tmp); 236 return 0; 237 } 238 int MatSolveAdd_AIJ(Mat mat,Vec bb, Vec yy, Vec xx) 239 { 240 Mat_AIJ *aij = (Mat_AIJ *) mat->data; 241 IS iscol = mat->col, isrow = mat->row; 242 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 243 int nz; 244 Scalar *x,*b,*tmp, *aa = aij->a, sum, *v; 245 246 if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor"); 247 if (yy != xx) {ierr = VecCopy(yy,xx); CHKERR(ierr);} 248 249 if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0); 250 if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0); 251 tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp); 252 253 if ((ierr = ISGetIndices(isrow,&r))) SETERR(ierr,0); 254 if ((ierr = ISGetIndices(iscol,&c))) SETERR(ierr,0); c = c + (n-1); 255 256 /* forward solve the lower triangular */ 257 tmp[0] = b[*r++]; 258 for ( i=1; i<n; i++ ) { 259 v = aa + ai[i] - 1; 260 vi = aj + ai[i] - 1; 261 nz = aij->diag[i] - ai[i]; 262 sum = b[*r++]; 263 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 264 tmp[i] = sum; 265 } 266 267 /* backward solve the upper triangular */ 268 for ( i=n-1; i>=0; i-- ){ 269 v = aa + aij->diag[i]; 270 vi = aj + aij->diag[i]; 271 nz = ai[i+1] - aij->diag[i] - 1; 272 sum = tmp[i]; 273 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 274 tmp[i] = sum*aa[aij->diag[i]-1]; 275 x[*c--] += tmp[i]; 276 } 277 278 FREE(tmp); 279 return 0; 280 } 281 /* -------------------------------------------------------------------*/ 282 int MatSolveTrans_AIJ(Mat mat,Vec bb, Vec xx) 283 { 284 Mat_AIJ *aij = (Mat_AIJ *) mat->data; 285 IS iscol = mat->col, isrow = mat->row, invisrow,inviscol; 286 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 287 int nz; 288 Scalar *x,*b,*tmp, *aa = aij->a, *v; 289 290 if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor"); 291 if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0); 292 if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0); 293 tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp); 294 295 /* invert the permutations */ 296 ierr = ISInvertPermutation(isrow,&invisrow); CHKERR(ierr); 297 ierr = ISInvertPermutation(iscol,&inviscol); CHKERR(ierr); 298 299 300 if ((ierr = ISGetIndices(invisrow,&r))) SETERR(ierr,0); 301 if ((ierr = ISGetIndices(inviscol,&c))) SETERR(ierr,0); 302 303 /* copy the b into temp work space according to permutation */ 304 for ( i=0; i<n; i++ ) tmp[c[i]] = b[i]; 305 306 /* forward solve the U^T */ 307 for ( i=0; i<n; i++ ) { 308 v = aa + aij->diag[i] - 1; 309 vi = aj + aij->diag[i]; 310 nz = ai[i+1] - aij->diag[i] - 1; 311 tmp[i] *= *v++; 312 while (nz--) { 313 tmp[*vi++ - 1] -= (*v++)*tmp[i]; 314 } 315 } 316 317 /* backward solve the L^T */ 318 for ( i=n-1; i>=0; i-- ){ 319 v = aa + aij->diag[i] - 2; 320 vi = aj + aij->diag[i] - 2; 321 nz = aij->diag[i] - ai[i]; 322 while (nz--) { 323 tmp[*vi-- - 1] -= (*v--)*tmp[i]; 324 } 325 } 326 327 /* copy tmp into x according to permutation */ 328 for ( i=0; i<n; i++ ) x[r[i]] = tmp[i]; 329 330 ISDestroy(invisrow); ISDestroy(inviscol); 331 332 FREE(tmp); 333 return 0; 334 } 335 336 int MatSolveTransAdd_AIJ(Mat mat,Vec bb, Vec zz,Vec xx) 337 { 338 Mat_AIJ *aij = (Mat_AIJ *) mat->data; 339 IS iscol = mat->col, isrow = mat->row, invisrow,inviscol; 340 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 341 int nz; 342 Scalar *x,*b,*tmp, *aa = aij->a, *v; 343 344 if (mat->factor != FACTOR_LU) SETERR(1,"Cannot solve with factor"); 345 if (zz != xx) VecCopy(zz,xx); 346 347 if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0); 348 if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0); 349 tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp); 350 351 /* invert the permutations */ 352 ierr = ISInvertPermutation(isrow,&invisrow); CHKERR(ierr); 353 ierr = ISInvertPermutation(iscol,&inviscol); CHKERR(ierr); 354 355 356 if ((ierr = ISGetIndices(invisrow,&r))) SETERR(ierr,0); 357 if ((ierr = ISGetIndices(inviscol,&c))) SETERR(ierr,0); 358 359 /* copy the b into temp work space according to permutation */ 360 for ( i=0; i<n; i++ ) tmp[c[i]] = b[i]; 361 362 /* forward solve the U^T */ 363 for ( i=0; i<n; i++ ) { 364 v = aa + aij->diag[i] - 1; 365 vi = aj + aij->diag[i]; 366 nz = ai[i+1] - aij->diag[i] - 1; 367 tmp[i] *= *v++; 368 while (nz--) { 369 tmp[*vi++ - 1] -= (*v++)*tmp[i]; 370 } 371 } 372 373 /* backward solve the L^T */ 374 for ( i=n-1; i>=0; i-- ){ 375 v = aa + aij->diag[i] - 2; 376 vi = aj + aij->diag[i] - 2; 377 nz = aij->diag[i] - ai[i]; 378 while (nz--) { 379 tmp[*vi-- - 1] -= (*v--)*tmp[i]; 380 } 381 } 382 383 /* copy tmp into x according to permutation */ 384 for ( i=0; i<n; i++ ) x[r[i]] += tmp[i]; 385 386 ISDestroy(invisrow); ISDestroy(inviscol); 387 388 FREE(tmp); 389 return 0; 390 391 } 392 /* ----------------------------------------------------------------*/ 393 int MatILU_AIJ(Mat mat,IS isrow,IS iscol,int levels,Mat *fact) 394 { 395 Mat_AIJ *aij = (Mat_AIJ *) mat->data, *aijnew; 396 IS isicol; 397 int *r,*ic, ierr, i, n = aij->m, *ai = aij->i, *aj = aij->j; 398 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz, *lfill,*ajfill,*ajtmpf; 399 int *idnew, idx, row,m,fm, nnz, nzi,len; 400 401 if (n != aij->n) SETERR(1,"Mat must be square"); 402 if (!isrow) {SETERR(1,"Must have row permutation");} 403 if (!iscol) {SETERR(1,"Must have column permutation");} 404 405 if ((ierr = ISInvertPermutation(iscol,&isicol))) SETERR(ierr,0); 406 ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic); 407 408 /* get new row pointers */ 409 ainew = (int *) MALLOC( (n+1)*sizeof(int) ); CHKPTR(ainew); 410 ainew[0] = 1; 411 /* don't know how many column pointers are needed so estimate */ 412 jmax = 2*ai[n]; 413 ajnew = (int *) MALLOC( (jmax)*sizeof(int) ); CHKPTR(ajnew); 414 /* ajfill is level of fill for each fill entry */ 415 ajfill = (int *) MALLOC( (jmax)*sizeof(int) ); CHKPTR(ajfill); 416 /* fill is a linked list of nonzeros in active row */ 417 fill = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(fill); 418 /* lfill is level for each filled value */ 419 lfill = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(lfill); 420 /* idnew is location of diagonal in factor */ 421 idnew = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(idnew); 422 idnew[0] = 1; 423 424 for ( i=0; i<n; i++ ) { 425 /* first copy previous fill into linked list */ 426 nnz = nz = ai[r[i]+1] - ai[r[i]]; 427 ajtmp = aj + ai[r[i]] - 1; 428 fill[n] = n; 429 while (nz--) { 430 fm = n; 431 idx = ic[*ajtmp++ - 1]; 432 do { 433 m = fm; 434 fm = fill[m]; 435 } while (fm < idx); 436 fill[m] = idx; 437 fill[idx] = fm; 438 lfill[idx] = -1; 439 } 440 row = fill[n]; 441 while ( row < i ) { 442 ajtmp = ajnew + idnew[row] - 1; 443 ajtmpf = ajfill + idnew[row] - 1; 444 nz = ainew[row+1] - idnew[row]; 445 fm = row; 446 while (nz--) { 447 fm = n; 448 idx = *ajtmp++ - 1; 449 do { 450 m = fm; 451 fm = fill[m]; 452 } while (fm < idx); 453 if (fm != idx) { 454 lfill[idx] = *ajtmpf + 1; 455 if (lfill[idx] < levels) { 456 fill[m] = idx; 457 fill[idx] = fm; 458 fm = idx; 459 nnz++; 460 } 461 } 462 ajtmpf++; 463 } 464 row = fill[row]; 465 } 466 /* copy new filled row into permanent storage */ 467 ainew[i+1] = ainew[i] + nnz; 468 if (ainew[i+1] > jmax+1) { 469 /* allocate a longer ajnew */ 470 jmax += nnz*(n-i); 471 ajtmp = (int *) MALLOC( jmax*sizeof(int) );CHKPTR(ajtmp); 472 MEMCPY(ajtmp,ajnew,(ainew[i]-1)*sizeof(int)); 473 FREE(ajnew); 474 ajnew = ajtmp; 475 /* allocate a longer ajfill */ 476 ajtmp = (int *) MALLOC( jmax*sizeof(int) );CHKPTR(ajtmp); 477 MEMCPY(ajtmp,ajfill,(ainew[i]-1)*sizeof(int)); 478 FREE(ajfill); 479 ajfill = ajtmp; 480 } 481 ajtmp = ajnew + ainew[i] - 1; 482 ajtmpf = ajfill + ainew[i] - 1; 483 fm = fill[n]; 484 nzi = 0; 485 while (nnz--) { 486 if (fm < i) nzi++; 487 *ajtmp++ = fm + 1; 488 *ajtmpf++ = lfill[fm]; 489 fm = fill[fm]; 490 } 491 idnew[i] = ainew[i] + nzi; 492 } 493 FREE(ajfill); 494 ISDestroy(isicol); FREE(fill); FREE(lfill); 495 496 /* put together the new matrix */ 497 ierr = MatCreateSequentialAIJ(n, n, 0, 0, fact); CHKERR(ierr); 498 aijnew = (Mat_AIJ *) (*fact)->data; 499 FREE(aijnew->imax); 500 aijnew->singlemalloc = 0; 501 len = (ainew[n] - 1)*sizeof(Scalar); 502 /* the next line frees the default space generated by the Create() */ 503 FREE(aijnew->a); FREE(aijnew->ilen); 504 aijnew->a = (Scalar *) MALLOC( len ); CHKPTR(aijnew->a); 505 aijnew->j = ajnew; 506 aijnew->i = ainew; 507 aijnew->diag = idnew; 508 aijnew->ilen = 0; 509 aijnew->imax = 0; 510 (*fact)->row = isrow; 511 (*fact)->col = iscol; 512 (*fact)->factor = FACTOR_LU; 513 return 0; 514 } 515