1 2 3 #include "aij.h" 4 #include "inline/spops.h" 5 /* 6 Factorization code for AIJ format. 7 */ 8 9 int MatiAIJLUFactorSymbolic(Mat mat,IS isrow,IS iscol,Mat *fact) 10 { 11 Matiaij *aij = (Matiaij *) mat->data, *aijnew; 12 IS isicol; 13 int *r,*ic, ierr, i, n = aij->m, *ai = aij->i, *aj = aij->j; 14 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz; 15 int *idnew, idx, row,m,fm, nnz, nzi,len; 16 17 if (n != aij->n) SETERR(1,"Mat must be square"); 18 if (!isrow) {SETERR(1,"Must have row permutation");} 19 if (!iscol) {SETERR(1,"Must have column permutation");} 20 21 if ((ierr = ISInvertPermutation(iscol,&isicol))) SETERR(ierr,0); 22 ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic); 23 24 /* get new row pointers */ 25 ainew = (int *) MALLOC( (n+1)*sizeof(int) ); CHKPTR(ainew); 26 ainew[0] = 1; 27 /* don't know how many column pointers are needed so estimate */ 28 jmax = 2*ai[n]; 29 ajnew = (int *) MALLOC( (jmax)*sizeof(int) ); CHKPTR(ajnew); 30 /* fill is a linked list of nonzeros in active row */ 31 fill = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(fill); 32 /* idnew is location of diagonal in factor */ 33 idnew = (int *) MALLOC( (n+1)*sizeof(int)); CHKPTR(idnew); 34 idnew[0] = 1; 35 36 for ( i=0; i<n; i++ ) { 37 /* first copy previous fill into linked list */ 38 nnz = nz = ai[r[i]+1] - ai[r[i]]; 39 ajtmp = aj + ai[r[i]] - 1; 40 fill[n] = n; 41 while (nz--) { 42 fm = n; 43 idx = ic[*ajtmp++ - 1]; 44 do { 45 m = fm; 46 fm = fill[m]; 47 } while (fm < idx); 48 fill[m] = idx; 49 fill[idx] = fm; 50 } 51 row = fill[n]; 52 while ( row < i ) { 53 ajtmp = ajnew + idnew[row] - 1; 54 nz = ainew[row+1] - idnew[row]; 55 fm = row; 56 while (nz--) { 57 fm = n; 58 idx = *ajtmp++ - 1; 59 do { 60 m = fm; 61 fm = fill[m]; 62 } while (fm < idx); 63 if (fm != idx) { 64 fill[m] = idx; 65 fill[idx] = fm; 66 fm = idx; 67 nnz++; 68 } 69 } 70 row = fill[row]; 71 } 72 /* copy new filled row into permanent storage */ 73 ainew[i+1] = ainew[i] + nnz; 74 if (ainew[i+1] > jmax+1) { 75 /* allocate a longer ajnew */ 76 jmax += nnz*(n-i); 77 ajtmp = (int *) MALLOC( jmax*sizeof(int) );CHKPTR(ajtmp); 78 MEMCPY(ajtmp,ajnew,(ainew[i]-1)*sizeof(int)); 79 FREE(ajnew); 80 ajnew = ajtmp; 81 } 82 ajtmp = ajnew + ainew[i] - 1; 83 fm = fill[n]; 84 nzi = 0; 85 while (nnz--) { 86 if (fm < i) nzi++; 87 *ajtmp++ = fm + 1; 88 fm = fill[fm]; 89 } 90 idnew[i] = ainew[i] + nzi; 91 } 92 93 ISDestroy(isicol); FREE(fill); 94 95 /* put together the new matrix */ 96 ierr = MatCreateSequentialAIJ(n, n, 0, 0, fact); CHKERR(ierr); 97 aijnew = (Matiaij *) (*fact)->data; 98 FREE(aijnew->imax); 99 aijnew->singlemalloc = 0; 100 len = (ainew[n] - 1)*sizeof(Scalar); 101 /* the next line frees the default space generated by the Create() */ 102 FREE(aijnew->a); FREE(aijnew->ilen); 103 aijnew->a = (Scalar *) MALLOC( len ); CHKPTR(aijnew->a); 104 aijnew->j = ajnew; 105 aijnew->i = ainew; 106 aijnew->diag = idnew; 107 aijnew->ilen = 0; 108 aijnew->imax = 0; 109 (*fact)->row = isrow; 110 (*fact)->col = iscol; 111 (*fact)->factor = FACTOR_LU; 112 return 0; 113 } 114 115 int MatiAIJLUFactorNumeric(Mat mat,Mat *infact) 116 { 117 Mat fact = *infact; 118 Matiaij *aij = (Matiaij *) mat->data, *aijnew = (Matiaij *)fact->data; 119 IS iscol = fact->col, isrow = fact->row, isicol; 120 int *r,*ic, ierr, i, j, n = aij->m, *ai = aijnew->i, *aj = aijnew->j; 121 int *ajtmpold, *ajtmp, nz, row,*pj; 122 Scalar *rtmp,*v, *pv, *pc, multiplier; 123 124 if ((ierr = ISInvertPermutation(iscol,&isicol))) SETERR(ierr,0); 125 ierr = ISGetIndices(isrow,&r); CHKERR(ierr); 126 ierr = ISGetIndices(isicol,&ic); CHKERR(ierr); 127 rtmp = (Scalar *) MALLOC( (n+1)*sizeof(Scalar) ); CHKPTR(rtmp); 128 129 for ( i=0; i<n; i++ ) { 130 nz = ai[i+1] - ai[i]; 131 ajtmp = aj + ai[i] - 1; 132 for ( j=0; j<nz; j++ ) rtmp[ajtmp[j]-1] = 0.0; 133 134 /* load in initial (unfactored row) */ 135 nz = aij->i[r[i]+1] - aij->i[r[i]]; 136 ajtmpold = aij->j + aij->i[r[i]] - 1; 137 v = aij->a + aij->i[r[i]] - 1; 138 for ( j=0; j<nz; j++ ) rtmp[ic[ajtmpold[j]-1]] = v[j]; 139 140 row = *ajtmp++ - 1; 141 while (row < i) { 142 pc = rtmp + row; 143 if (*pc != 0.0) { 144 nz = aijnew->diag[row] - ai[row]; 145 pv = aijnew->a + aijnew->diag[row] - 1; 146 pj = aijnew->j + aijnew->diag[row]; 147 multiplier = *pc * *pv++; 148 *pc = multiplier; 149 nz = ai[row+1] - ai[row] - 1 - nz; 150 while (nz-->0) rtmp[*pj++ - 1] -= multiplier* *pv++; 151 } 152 row = *ajtmp++ - 1; 153 } 154 /* finished row so stick it into aijnew->a */ 155 pv = aijnew->a + ai[i] - 1; 156 pj = aijnew->j + ai[i] - 1; 157 nz = ai[i+1] - ai[i]; 158 rtmp[i] = 1.0/rtmp[i]; 159 for ( j=0; j<nz; j++ ) {pv[j] = rtmp[pj[j]-1];} 160 } 161 FREE(rtmp); 162 ierr = ISRestoreIndices(isicol,&ic); CHKERR(ierr); 163 ierr = ISRestoreIndices(isrow,&r); CHKERR(ierr); 164 ierr = ISDestroy(isicol); CHKERR(ierr); 165 fact->factor = FACTOR_LU; 166 167 return 0; 168 } 169 int MatiAIJLUFactor(Mat matin,IS row,IS col) 170 { 171 Matiaij *mat = (Matiaij *) matin->data; 172 int ierr; 173 Mat fact; 174 ierr = MatiAIJLUFactorSymbolic(matin,row,col,&fact); CHKERR(ierr); 175 ierr = MatiAIJLUFactorNumeric(matin,&fact); CHKERR(ierr); 176 177 /* free all the data structures from mat */ 178 FREE(mat->a); 179 if (!mat->singlemalloc) {FREE(mat->i); FREE(mat->j);} 180 if (mat->diag) FREE(mat->diag); 181 if (mat->ilen) FREE(mat->ilen); 182 if (mat->imax) FREE(mat->imax); 183 if (matin->row && matin->col && matin->row != matin->col) { 184 ISDestroy(matin->row); 185 } 186 if (matin->col) ISDestroy(matin->col); 187 FREE(mat); 188 189 MEMCPY(matin,fact,sizeof(struct _Mat)); 190 FREE(fact); 191 return 0; 192 } 193 194 int MatiAIJSolve(Mat mat,Vec bb, Vec xx) 195 { 196 Matiaij *aij = (Matiaij *) mat->data; 197 IS iscol = mat->col, isrow = mat->row; 198 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 199 int nz; 200 Scalar *x,*b,*tmp, *aa = aij->a, sum, *v; 201 202 if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0); 203 if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0); 204 tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp); 205 206 if ((ierr = ISGetIndices(isrow,&r))) SETERR(ierr,0); 207 if ((ierr = ISGetIndices(iscol,&c))) SETERR(ierr,0); c = c + (n-1); 208 209 /* forward solve the lower triangular */ 210 tmp[0] = b[*r++]; 211 for ( i=1; i<n; i++ ) { 212 v = aa + ai[i] - 1; 213 vi = aj + ai[i] - 1; 214 nz = aij->diag[i] - ai[i]; 215 sum = b[*r++]; 216 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 217 tmp[i] = sum; 218 } 219 220 /* backward solve the upper triangular */ 221 for ( i=n-1; i>=0; i-- ){ 222 v = aa + aij->diag[i]; 223 vi = aj + aij->diag[i]; 224 nz = ai[i+1] - aij->diag[i] - 1; 225 sum = tmp[i]; 226 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 227 x[*c--] = tmp[i] = sum*aa[aij->diag[i]-1]; 228 } 229 230 FREE(tmp); 231 return 0; 232 } 233 int MatiAIJSolveAdd(Mat mat,Vec bb, Vec yy, Vec xx) 234 { 235 Matiaij *aij = (Matiaij *) mat->data; 236 IS iscol = mat->col, isrow = mat->row; 237 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 238 int nz; 239 Scalar *x,*b,*tmp, *aa = aij->a, sum, *v; 240 241 if (yy != xx) {ierr = VecCopy(yy,xx); CHKERR(ierr);} 242 243 if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0); 244 if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0); 245 tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp); 246 247 if ((ierr = ISGetIndices(isrow,&r))) SETERR(ierr,0); 248 if ((ierr = ISGetIndices(iscol,&c))) SETERR(ierr,0); c = c + (n-1); 249 250 /* forward solve the lower triangular */ 251 tmp[0] = b[*r++]; 252 for ( i=1; i<n; i++ ) { 253 v = aa + ai[i] - 1; 254 vi = aj + ai[i] - 1; 255 nz = aij->diag[i] - ai[i]; 256 sum = b[*r++]; 257 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 258 tmp[i] = sum; 259 } 260 261 /* backward solve the upper triangular */ 262 for ( i=n-1; i>=0; i-- ){ 263 v = aa + aij->diag[i]; 264 vi = aj + aij->diag[i]; 265 nz = ai[i+1] - aij->diag[i] - 1; 266 sum = tmp[i]; 267 while (nz--) sum -= *v++ * tmp[*vi++ - 1]; 268 tmp[i] = sum*aa[aij->diag[i]-1]; 269 x[*c--] += tmp[i]; 270 } 271 272 FREE(tmp); 273 return 0; 274 } 275 /* -------------------------------------------------------------------*/ 276 int MatiAIJSolveTrans(Mat mat,Vec bb, Vec xx) 277 { 278 Matiaij *aij = (Matiaij *) mat->data; 279 IS iscol = mat->col, isrow = mat->row, invisrow,inviscol; 280 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 281 int nz; 282 Scalar *x,*b,*tmp, *aa = aij->a, *v; 283 284 if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0); 285 if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0); 286 tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp); 287 288 /* invert the permutations */ 289 ierr = ISInvertPermutation(isrow,&invisrow); CHKERR(ierr); 290 ierr = ISInvertPermutation(iscol,&inviscol); CHKERR(ierr); 291 292 293 if ((ierr = ISGetIndices(invisrow,&r))) SETERR(ierr,0); 294 if ((ierr = ISGetIndices(inviscol,&c))) SETERR(ierr,0); 295 296 /* copy the b into temp work space according to permutation */ 297 for ( i=0; i<n; i++ ) tmp[c[i]] = b[i]; 298 299 /* forward solve the U^T */ 300 for ( i=0; i<n; i++ ) { 301 v = aa + aij->diag[i] - 1; 302 vi = aj + aij->diag[i]; 303 nz = ai[i+1] - aij->diag[i] - 1; 304 tmp[i] *= *v++; 305 while (nz--) { 306 tmp[*vi++ - 1] -= (*v++)*tmp[i]; 307 } 308 } 309 310 /* backward solve the L^T */ 311 for ( i=n-1; i>=0; i-- ){ 312 v = aa + aij->diag[i] - 2; 313 vi = aj + aij->diag[i] - 2; 314 nz = aij->diag[i] - ai[i]; 315 while (nz--) { 316 tmp[*vi-- - 1] -= (*v--)*tmp[i]; 317 } 318 } 319 320 /* copy tmp into x according to permutation */ 321 for ( i=0; i<n; i++ ) x[r[i]] = tmp[i]; 322 323 ISDestroy(invisrow); ISDestroy(inviscol); 324 325 FREE(tmp); 326 return 0; 327 } 328 329 int MatiAIJSolveTransAdd(Mat mat,Vec bb, Vec zz,Vec xx) 330 { 331 Matiaij *aij = (Matiaij *) mat->data; 332 IS iscol = mat->col, isrow = mat->row, invisrow,inviscol; 333 int *r,*c, ierr, i, n = aij->m, *vi, *ai = aij->i, *aj = aij->j; 334 int nz; 335 Scalar *x,*b,*tmp, *aa = aij->a, *v; 336 337 if (zz != xx) VecCopy(zz,xx); 338 339 if ((ierr = VecGetArray(bb,&b))) SETERR(ierr,0); 340 if ((ierr = VecGetArray(xx,&x))) SETERR(ierr,0); 341 tmp = (Scalar *) MALLOC(n*sizeof(Scalar)); CHKPTR(tmp); 342 343 /* invert the permutations */ 344 ierr = ISInvertPermutation(isrow,&invisrow); CHKERR(ierr); 345 ierr = ISInvertPermutation(iscol,&inviscol); CHKERR(ierr); 346 347 348 if ((ierr = ISGetIndices(invisrow,&r))) SETERR(ierr,0); 349 if ((ierr = ISGetIndices(inviscol,&c))) SETERR(ierr,0); 350 351 /* copy the b into temp work space according to permutation */ 352 for ( i=0; i<n; i++ ) tmp[c[i]] = b[i]; 353 354 /* forward solve the U^T */ 355 for ( i=0; i<n; i++ ) { 356 v = aa + aij->diag[i] - 1; 357 vi = aj + aij->diag[i]; 358 nz = ai[i+1] - aij->diag[i] - 1; 359 tmp[i] *= *v++; 360 while (nz--) { 361 tmp[*vi++ - 1] -= (*v++)*tmp[i]; 362 } 363 } 364 365 /* backward solve the L^T */ 366 for ( i=n-1; i>=0; i-- ){ 367 v = aa + aij->diag[i] - 2; 368 vi = aj + aij->diag[i] - 2; 369 nz = aij->diag[i] - ai[i]; 370 while (nz--) { 371 tmp[*vi-- - 1] -= (*v--)*tmp[i]; 372 } 373 } 374 375 /* copy tmp into x according to permutation */ 376 for ( i=0; i<n; i++ ) x[r[i]] += tmp[i]; 377 378 ISDestroy(invisrow); ISDestroy(inviscol); 379 380 FREE(tmp); 381 return 0; 382 383 } 384