1 2 /* 3 Factorization code for BAIJ format. 4 */ 5 6 #include <../src/mat/impls/baij/seq/baij.h> 7 #include <petsc/private/kernels/blockinvert.h> 8 #include <petscbt.h> 9 #include <../src/mat/utils/freespace.h> 10 11 /* ----------------------------------------------------------------*/ 12 extern PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat,Mat,MatDuplicateOption,PetscBool); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering" 16 /* 17 This is not much faster than MatLUFactorNumeric_SeqBAIJ_N() but the solve is faster at least sometimes 18 */ 19 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info) 20 { 21 Mat C =B; 22 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; 23 PetscErrorCode ierr; 24 PetscInt i,j,k,ipvt[15]; 25 const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ajtmp,*bjtmp,*bdiag=b->diag,*pj; 26 PetscInt nz,nzL,row; 27 MatScalar *rtmp,*pc,*mwork,*pv,*vv,work[225]; 28 const MatScalar *v,*aa=a->a; 29 PetscInt bs2 = a->bs2,bs=A->rmap->bs,flg; 30 PetscInt sol_ver; 31 PetscBool allowzeropivot,zeropivotdetected; 32 33 PetscFunctionBegin; 34 ierr = PetscOptionsGetInt(NULL,((PetscObject)A)->prefix,"-sol_ver",&sol_ver,NULL);CHKERRQ(ierr); 35 36 /* generate work space needed by the factorization */ 37 ierr = PetscMalloc2(bs2*n,&rtmp,bs2,&mwork);CHKERRQ(ierr); 38 ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); 39 40 for (i=0; i<n; i++) { 41 /* zero rtmp */ 42 /* L part */ 43 nz = bi[i+1] - bi[i]; 44 bjtmp = bj + bi[i]; 45 for (j=0; j<nz; j++) { 46 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 47 } 48 49 /* U part */ 50 nz = bdiag[i] - bdiag[i+1]; 51 bjtmp = bj + bdiag[i+1]+1; 52 for (j=0; j<nz; j++) { 53 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 54 } 55 56 /* load in initial (unfactored row) */ 57 nz = ai[i+1] - ai[i]; 58 ajtmp = aj + ai[i]; 59 v = aa + bs2*ai[i]; 60 for (j=0; j<nz; j++) { 61 ierr = PetscMemcpy(rtmp+bs2*ajtmp[j],v+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 62 } 63 64 /* elimination */ 65 bjtmp = bj + bi[i]; 66 nzL = bi[i+1] - bi[i]; 67 for (k=0; k < nzL; k++) { 68 row = bjtmp[k]; 69 pc = rtmp + bs2*row; 70 for (flg=0,j=0; j<bs2; j++) { 71 if (pc[j]!=0.0) { 72 flg = 1; 73 break; 74 } 75 } 76 if (flg) { 77 pv = b->a + bs2*bdiag[row]; 78 PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); 79 /*ierr = PetscKernel_A_gets_A_times_B_15(pc,pv,mwork);CHKERRQ(ierr);*/ 80 pj = b->j + bdiag[row+1]+1; /* begining of U(row,:) */ 81 pv = b->a + bs2*(bdiag[row+1]+1); 82 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries inU(row,:), excluding diag */ 83 for (j=0; j<nz; j++) { 84 vv = rtmp + bs2*pj[j]; 85 PetscKernel_A_gets_A_minus_B_times_C(bs,vv,pc,pv); 86 /* ierr = PetscKernel_A_gets_A_minus_B_times_C_15(vv,pc,pv);CHKERRQ(ierr); */ 87 pv += bs2; 88 } 89 ierr = PetscLogFlops(2*bs2*bs*(nz+1)-bs2);CHKERRQ(ierr); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 90 } 91 } 92 93 /* finished row so stick it into b->a */ 94 /* L part */ 95 pv = b->a + bs2*bi[i]; 96 pj = b->j + bi[i]; 97 nz = bi[i+1] - bi[i]; 98 for (j=0; j<nz; j++) { 99 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 100 } 101 102 /* Mark diagonal and invert diagonal for simplier triangular solves */ 103 pv = b->a + bs2*bdiag[i]; 104 pj = b->j + bdiag[i]; 105 ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); 106 107 allowzeropivot = PetscNot(A->erroriffailure); 108 ierr = PetscKernel_A_gets_inverse_A_15(pv,ipvt,work,info->shiftamount,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 109 if (zeropivotdetected) C->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 110 111 /* U part */ 112 pv = b->a + bs2*(bdiag[i+1]+1); 113 pj = b->j + bdiag[i+1]+1; 114 nz = bdiag[i] - bdiag[i+1] - 1; 115 for (j=0; j<nz; j++) { 116 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 117 } 118 } 119 120 ierr = PetscFree2(rtmp,mwork);CHKERRQ(ierr); 121 122 C->ops->solve = MatSolve_SeqBAIJ_15_NaturalOrdering_ver1; 123 C->ops->solvetranspose = MatSolve_SeqBAIJ_N_NaturalOrdering; 124 C->assembled = PETSC_TRUE; 125 126 ierr = PetscLogFlops(1.333333333333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 127 PetscFunctionReturn(0); 128 } 129 130 #undef __FUNCT__ 131 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_N" 132 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N(Mat B,Mat A,const MatFactorInfo *info) 133 { 134 Mat C =B; 135 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; 136 IS isrow = b->row,isicol = b->icol; 137 PetscErrorCode ierr; 138 const PetscInt *r,*ic; 139 PetscInt i,j,k,n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 140 PetscInt *ajtmp,*bjtmp,nz,nzL,row,*bdiag=b->diag,*pj; 141 MatScalar *rtmp,*pc,*mwork,*v,*pv,*aa=a->a; 142 PetscInt bs=A->rmap->bs,bs2 = a->bs2,*v_pivots,flg; 143 MatScalar *v_work; 144 PetscBool col_identity,row_identity,both_identity; 145 PetscBool allowzeropivot,zeropivotdetected; 146 147 PetscFunctionBegin; 148 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 149 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 150 allowzeropivot = PetscNot(A->erroriffailure); 151 152 ierr = PetscMalloc1(bs2*n,&rtmp);CHKERRQ(ierr); 153 ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); 154 155 /* generate work space needed by dense LU factorization */ 156 ierr = PetscMalloc3(bs,&v_work,bs2,&mwork,bs,&v_pivots);CHKERRQ(ierr); 157 158 for (i=0; i<n; i++) { 159 /* zero rtmp */ 160 /* L part */ 161 nz = bi[i+1] - bi[i]; 162 bjtmp = bj + bi[i]; 163 for (j=0; j<nz; j++) { 164 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 165 } 166 167 /* U part */ 168 nz = bdiag[i] - bdiag[i+1]; 169 bjtmp = bj + bdiag[i+1]+1; 170 for (j=0; j<nz; j++) { 171 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 172 } 173 174 /* load in initial (unfactored row) */ 175 nz = ai[r[i]+1] - ai[r[i]]; 176 ajtmp = aj + ai[r[i]]; 177 v = aa + bs2*ai[r[i]]; 178 for (j=0; j<nz; j++) { 179 ierr = PetscMemcpy(rtmp+bs2*ic[ajtmp[j]],v+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 180 } 181 182 /* elimination */ 183 bjtmp = bj + bi[i]; 184 nzL = bi[i+1] - bi[i]; 185 for (k=0; k < nzL; k++) { 186 row = bjtmp[k]; 187 pc = rtmp + bs2*row; 188 for (flg=0,j=0; j<bs2; j++) { 189 if (pc[j]!=0.0) { 190 flg = 1; 191 break; 192 } 193 } 194 if (flg) { 195 pv = b->a + bs2*bdiag[row]; 196 PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); /* *pc = *pc * (*pv); */ 197 pj = b->j + bdiag[row+1]+1; /* begining of U(row,:) */ 198 pv = b->a + bs2*(bdiag[row+1]+1); 199 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries inU(row,:), excluding diag */ 200 for (j=0; j<nz; j++) { 201 PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); 202 } 203 ierr = PetscLogFlops(2*bs2*bs*(nz+1)-bs2);CHKERRQ(ierr); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 204 } 205 } 206 207 /* finished row so stick it into b->a */ 208 /* L part */ 209 pv = b->a + bs2*bi[i]; 210 pj = b->j + bi[i]; 211 nz = bi[i+1] - bi[i]; 212 for (j=0; j<nz; j++) { 213 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 214 } 215 216 /* Mark diagonal and invert diagonal for simplier triangular solves */ 217 pv = b->a + bs2*bdiag[i]; 218 pj = b->j + bdiag[i]; 219 /* if (*pj != i)SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"row %d != *pj %d",i,*pj); */ 220 ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); 221 222 ierr = PetscKernel_A_gets_inverse_A(bs,pv,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 223 if (zeropivotdetected) B->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 224 225 /* U part */ 226 pv = b->a + bs2*(bdiag[i+1]+1); 227 pj = b->j + bdiag[i+1]+1; 228 nz = bdiag[i] - bdiag[i+1] - 1; 229 for (j=0; j<nz; j++) { 230 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 231 } 232 } 233 234 ierr = PetscFree(rtmp);CHKERRQ(ierr); 235 ierr = PetscFree3(v_work,mwork,v_pivots);CHKERRQ(ierr); 236 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 237 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 238 239 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 240 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 241 242 both_identity = (PetscBool) (row_identity && col_identity); 243 if (both_identity) { 244 C->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering; 245 } else { 246 C->ops->solve = MatSolve_SeqBAIJ_N; 247 } 248 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_N; 249 250 C->assembled = PETSC_TRUE; 251 252 ierr = PetscLogFlops(1.333333333333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 253 PetscFunctionReturn(0); 254 } 255 256 /* 257 ilu(0) with natural ordering under new data structure. 258 See MatILUFactorSymbolic_SeqAIJ_ilu0() for detailed description 259 because this code is almost identical to MatILUFactorSymbolic_SeqAIJ_ilu0_inplace(). 260 */ 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatILUFactorSymbolic_SeqBAIJ_ilu0" 264 PetscErrorCode MatILUFactorSymbolic_SeqBAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 265 { 266 267 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 268 PetscErrorCode ierr; 269 PetscInt n=a->mbs,*ai=a->i,*aj,*adiag=a->diag,bs2 = a->bs2; 270 PetscInt i,j,nz,*bi,*bj,*bdiag,bi_temp; 271 272 PetscFunctionBegin; 273 ierr = MatDuplicateNoCreate_SeqBAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 274 b = (Mat_SeqBAIJ*)(fact)->data; 275 276 /* allocate matrix arrays for new data structure */ 277 ierr = PetscMalloc3(bs2*ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr); 278 ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 279 280 b->singlemalloc = PETSC_TRUE; 281 b->free_a = PETSC_TRUE; 282 b->free_ij = PETSC_TRUE; 283 fact->preallocated = PETSC_TRUE; 284 fact->assembled = PETSC_TRUE; 285 if (!b->diag) { 286 ierr = PetscMalloc1(n+1,&b->diag);CHKERRQ(ierr); 287 ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 288 } 289 bdiag = b->diag; 290 291 if (n > 0) { 292 ierr = PetscMemzero(b->a,bs2*ai[n]*sizeof(MatScalar));CHKERRQ(ierr); 293 } 294 295 /* set bi and bj with new data structure */ 296 bi = b->i; 297 bj = b->j; 298 299 /* L part */ 300 bi[0] = 0; 301 for (i=0; i<n; i++) { 302 nz = adiag[i] - ai[i]; 303 bi[i+1] = bi[i] + nz; 304 aj = a->j + ai[i]; 305 for (j=0; j<nz; j++) { 306 *bj = aj[j]; bj++; 307 } 308 } 309 310 /* U part */ 311 bi_temp = bi[n]; 312 bdiag[n] = bi[n]-1; 313 for (i=n-1; i>=0; i--) { 314 nz = ai[i+1] - adiag[i] - 1; 315 bi_temp = bi_temp + nz + 1; 316 aj = a->j + adiag[i] + 1; 317 for (j=0; j<nz; j++) { 318 *bj = aj[j]; bj++; 319 } 320 /* diag[i] */ 321 *bj = i; bj++; 322 bdiag[i] = bi_temp - 1; 323 } 324 PetscFunctionReturn(0); 325 } 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatILUFactorSymbolic_SeqBAIJ" 329 PetscErrorCode MatILUFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 330 { 331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 332 IS isicol; 333 PetscErrorCode ierr; 334 const PetscInt *r,*ic; 335 PetscInt n=a->mbs,*ai=a->i,*aj=a->j,d; 336 PetscInt *bi,*cols,nnz,*cols_lvl; 337 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 338 PetscInt i,levels,diagonal_fill; 339 PetscBool col_identity,row_identity,both_identity; 340 PetscReal f; 341 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 342 PetscBT lnkbt; 343 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 344 PetscFreeSpaceList free_space =NULL,current_space=NULL; 345 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 346 PetscBool missing; 347 PetscInt bs=A->rmap->bs,bs2=a->bs2; 348 349 PetscFunctionBegin; 350 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 351 if (bs>1) { /* check shifttype */ 352 if (info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix"); 353 } 354 355 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 356 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 357 358 f = info->fill; 359 levels = (PetscInt)info->levels; 360 diagonal_fill = (PetscInt)info->diagonal_fill; 361 362 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 363 364 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 365 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 366 367 both_identity = (PetscBool) (row_identity && col_identity); 368 369 if (!levels && both_identity) { 370 /* special case: ilu(0) with natural ordering */ 371 ierr = MatILUFactorSymbolic_SeqBAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 372 ierr = MatSeqBAIJSetNumericFactorization(fact,both_identity);CHKERRQ(ierr); 373 374 fact->factortype = MAT_FACTOR_ILU; 375 (fact)->info.factor_mallocs = 0; 376 (fact)->info.fill_ratio_given = info->fill; 377 (fact)->info.fill_ratio_needed = 1.0; 378 379 b = (Mat_SeqBAIJ*)(fact)->data; 380 b->row = isrow; 381 b->col = iscol; 382 b->icol = isicol; 383 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 384 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 385 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 386 387 ierr = PetscMalloc1((n+1)*bs,&b->solve_work);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 392 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 393 394 /* get new row pointers */ 395 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 396 bi[0] = 0; 397 /* bdiag is location of diagonal in factor */ 398 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 399 bdiag[0] = 0; 400 401 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 402 403 /* create a linked list for storing column indices of the active row */ 404 nlnk = n + 1; 405 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 406 407 /* initial FreeSpace size is f*(ai[n]+1) */ 408 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 409 current_space = free_space; 410 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr); 411 current_space_lvl = free_space_lvl; 412 413 for (i=0; i<n; i++) { 414 nzi = 0; 415 /* copy current row into linked list */ 416 nnz = ai[r[i]+1] - ai[r[i]]; 417 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 418 cols = aj + ai[r[i]]; 419 lnk[i] = -1; /* marker to indicate if diagonal exists */ 420 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 421 nzi += nlnk; 422 423 /* make sure diagonal entry is included */ 424 if (diagonal_fill && lnk[i] == -1) { 425 fm = n; 426 while (lnk[fm] < i) fm = lnk[fm]; 427 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 428 lnk[fm] = i; 429 lnk_lvl[i] = 0; 430 nzi++; dcount++; 431 } 432 433 /* add pivot rows into the active row */ 434 nzbd = 0; 435 prow = lnk[n]; 436 while (prow < i) { 437 nnz = bdiag[prow]; 438 cols = bj_ptr[prow] + nnz + 1; 439 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 440 nnz = bi[prow+1] - bi[prow] - nnz - 1; 441 442 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 443 nzi += nlnk; 444 prow = lnk[prow]; 445 nzbd++; 446 } 447 bdiag[i] = nzbd; 448 bi[i+1] = bi[i] + nzi; 449 450 /* if free space is not available, make more free space */ 451 if (current_space->local_remaining<nzi) { 452 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,(n - i))); /* estimated and max additional space needed */ 453 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 454 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 455 reallocs++; 456 } 457 458 /* copy data into free_space and free_space_lvl, then initialize lnk */ 459 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 460 461 bj_ptr[i] = current_space->array; 462 bjlvl_ptr[i] = current_space_lvl->array; 463 464 /* make sure the active row i has diagonal entry */ 465 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 466 467 current_space->array += nzi; 468 current_space->local_used += nzi; 469 current_space->local_remaining -= nzi; 470 471 current_space_lvl->array += nzi; 472 current_space_lvl->local_used += nzi; 473 current_space_lvl->local_remaining -= nzi; 474 } 475 476 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 477 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 478 479 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 480 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 481 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 482 483 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 484 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 485 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 486 487 #if defined(PETSC_USE_INFO) 488 { 489 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 490 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 491 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 492 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 493 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 494 if (diagonal_fill) { 495 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 496 } 497 } 498 #endif 499 500 /* put together the new matrix */ 501 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(fact,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 502 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 503 504 b = (Mat_SeqBAIJ*)(fact)->data; 505 b->free_a = PETSC_TRUE; 506 b->free_ij = PETSC_TRUE; 507 b->singlemalloc = PETSC_FALSE; 508 509 ierr = PetscMalloc1(bs2*(bdiag[0]+1),&b->a);CHKERRQ(ierr); 510 511 b->j = bj; 512 b->i = bi; 513 b->diag = bdiag; 514 b->free_diag = PETSC_TRUE; 515 b->ilen = 0; 516 b->imax = 0; 517 b->row = isrow; 518 b->col = iscol; 519 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 520 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 521 b->icol = isicol; 522 523 ierr = PetscMalloc1(bs*n+bs,&b->solve_work);CHKERRQ(ierr); 524 /* In b structure: Free imax, ilen, old a, old j. 525 Allocate bdiag, solve_work, new a, new j */ 526 ierr = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1) * (sizeof(PetscInt)+bs2*sizeof(PetscScalar)));CHKERRQ(ierr); 527 b->maxnz = b->nz = bdiag[0]+1; 528 529 fact->info.factor_mallocs = reallocs; 530 fact->info.fill_ratio_given = f; 531 fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 532 533 ierr = MatSeqBAIJSetNumericFactorization(fact,both_identity);CHKERRQ(ierr); 534 PetscFunctionReturn(0); 535 } 536 537 /* 538 This code is virtually identical to MatILUFactorSymbolic_SeqAIJ 539 except that the data structure of Mat_SeqAIJ is slightly different. 540 Not a good example of code reuse. 541 */ 542 #undef __FUNCT__ 543 #define __FUNCT__ "MatILUFactorSymbolic_SeqBAIJ_inplace" 544 PetscErrorCode MatILUFactorSymbolic_SeqBAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 545 { 546 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 547 IS isicol; 548 PetscErrorCode ierr; 549 const PetscInt *r,*ic,*ai = a->i,*aj = a->j,*xi; 550 PetscInt prow,n = a->mbs,*ainew,*ajnew,jmax,*fill,nz,*im,*ajfill,*flev,*xitmp; 551 PetscInt *dloc,idx,row,m,fm,nzf,nzi,reallocate = 0,dcount = 0; 552 PetscInt incrlev,nnz,i,bs = A->rmap->bs,bs2 = a->bs2,levels,diagonal_fill,dd; 553 PetscBool col_identity,row_identity,both_identity,flg; 554 PetscReal f; 555 556 PetscFunctionBegin; 557 ierr = MatMissingDiagonal_SeqBAIJ(A,&flg,&dd);CHKERRQ(ierr); 558 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix A is missing diagonal entry in row %D",dd); 559 560 f = info->fill; 561 levels = (PetscInt)info->levels; 562 diagonal_fill = (PetscInt)info->diagonal_fill; 563 564 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 565 566 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 567 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 568 both_identity = (PetscBool) (row_identity && col_identity); 569 570 if (!levels && both_identity) { /* special case copy the nonzero structure */ 571 ierr = MatDuplicateNoCreate_SeqBAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 572 ierr = MatSeqBAIJSetNumericFactorization_inplace(fact,both_identity);CHKERRQ(ierr); 573 574 fact->factortype = MAT_FACTOR_ILU; 575 b = (Mat_SeqBAIJ*)fact->data; 576 b->row = isrow; 577 b->col = iscol; 578 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 579 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 580 b->icol = isicol; 581 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 582 583 ierr = PetscMalloc1((n+1)*bs,&b->solve_work);CHKERRQ(ierr); 584 PetscFunctionReturn(0); 585 } 586 587 /* general case perform the symbolic factorization */ 588 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 589 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 590 591 /* get new row pointers */ 592 ierr = PetscMalloc1(n+1,&ainew);CHKERRQ(ierr); 593 ainew[0] = 0; 594 /* don't know how many column pointers are needed so estimate */ 595 jmax = (PetscInt)(f*ai[n] + 1); 596 ierr = PetscMalloc1(jmax,&ajnew);CHKERRQ(ierr); 597 /* ajfill is level of fill for each fill entry */ 598 ierr = PetscMalloc1(jmax,&ajfill);CHKERRQ(ierr); 599 /* fill is a linked list of nonzeros in active row */ 600 ierr = PetscMalloc1(n+1,&fill);CHKERRQ(ierr); 601 /* im is level for each filled value */ 602 ierr = PetscMalloc1(n+1,&im);CHKERRQ(ierr); 603 /* dloc is location of diagonal in factor */ 604 ierr = PetscMalloc1(n+1,&dloc);CHKERRQ(ierr); 605 dloc[0] = 0; 606 for (prow=0; prow<n; prow++) { 607 608 /* copy prow into linked list */ 609 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 610 if (!nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[prow],prow); 611 xi = aj + ai[r[prow]]; 612 fill[n] = n; 613 fill[prow] = -1; /* marker for diagonal entry */ 614 while (nz--) { 615 fm = n; 616 idx = ic[*xi++]; 617 do { 618 m = fm; 619 fm = fill[m]; 620 } while (fm < idx); 621 fill[m] = idx; 622 fill[idx] = fm; 623 im[idx] = 0; 624 } 625 626 /* make sure diagonal entry is included */ 627 if (diagonal_fill && fill[prow] == -1) { 628 fm = n; 629 while (fill[fm] < prow) fm = fill[fm]; 630 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 631 fill[fm] = prow; 632 im[prow] = 0; 633 nzf++; 634 dcount++; 635 } 636 637 nzi = 0; 638 row = fill[n]; 639 while (row < prow) { 640 incrlev = im[row] + 1; 641 nz = dloc[row]; 642 xi = ajnew + ainew[row] + nz + 1; 643 flev = ajfill + ainew[row] + nz + 1; 644 nnz = ainew[row+1] - ainew[row] - nz - 1; 645 fm = row; 646 while (nnz-- > 0) { 647 idx = *xi++; 648 if (*flev + incrlev > levels) { 649 flev++; 650 continue; 651 } 652 do { 653 m = fm; 654 fm = fill[m]; 655 } while (fm < idx); 656 if (fm != idx) { 657 im[idx] = *flev + incrlev; 658 fill[m] = idx; 659 fill[idx] = fm; 660 fm = idx; 661 nzf++; 662 } else if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 663 flev++; 664 } 665 row = fill[row]; 666 nzi++; 667 } 668 /* copy new filled row into permanent storage */ 669 ainew[prow+1] = ainew[prow] + nzf; 670 if (ainew[prow+1] > jmax) { 671 672 /* estimate how much additional space we will need */ 673 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 674 /* just double the memory each time */ 675 PetscInt maxadd = jmax; 676 /* maxadd = (int)(((f*ai[n]+1)*(n-prow+5))/n); */ 677 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 678 jmax += maxadd; 679 680 /* allocate a longer ajnew and ajfill */ 681 ierr = PetscMalloc1(jmax,&xitmp);CHKERRQ(ierr); 682 ierr = PetscMemcpy(xitmp,ajnew,ainew[prow]*sizeof(PetscInt));CHKERRQ(ierr); 683 ierr = PetscFree(ajnew);CHKERRQ(ierr); 684 ajnew = xitmp; 685 ierr = PetscMalloc1(jmax,&xitmp);CHKERRQ(ierr); 686 ierr = PetscMemcpy(xitmp,ajfill,ainew[prow]*sizeof(PetscInt));CHKERRQ(ierr); 687 ierr = PetscFree(ajfill);CHKERRQ(ierr); 688 ajfill = xitmp; 689 reallocate++; /* count how many reallocations are needed */ 690 } 691 xitmp = ajnew + ainew[prow]; 692 flev = ajfill + ainew[prow]; 693 dloc[prow] = nzi; 694 fm = fill[n]; 695 while (nzf--) { 696 *xitmp++ = fm; 697 *flev++ = im[fm]; 698 fm = fill[fm]; 699 } 700 /* make sure row has diagonal entry */ 701 if (ajnew[ainew[prow]+dloc[prow]] != prow) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 702 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",prow); 703 } 704 ierr = PetscFree(ajfill);CHKERRQ(ierr); 705 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 706 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 707 ierr = PetscFree(fill);CHKERRQ(ierr); 708 ierr = PetscFree(im);CHKERRQ(ierr); 709 710 #if defined(PETSC_USE_INFO) 711 { 712 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 713 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocate,(double)f,(double)af);CHKERRQ(ierr); 714 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 715 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 716 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 717 if (diagonal_fill) { 718 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 719 } 720 } 721 #endif 722 723 /* put together the new matrix */ 724 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(fact,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 725 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 726 b = (Mat_SeqBAIJ*)fact->data; 727 728 b->free_a = PETSC_TRUE; 729 b->free_ij = PETSC_TRUE; 730 b->singlemalloc = PETSC_FALSE; 731 732 ierr = PetscMalloc1(bs2*ainew[n],&b->a);CHKERRQ(ierr); 733 734 b->j = ajnew; 735 b->i = ainew; 736 for (i=0; i<n; i++) dloc[i] += ainew[i]; 737 b->diag = dloc; 738 b->free_diag = PETSC_TRUE; 739 b->ilen = 0; 740 b->imax = 0; 741 b->row = isrow; 742 b->col = iscol; 743 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 744 745 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 746 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 747 b->icol = isicol; 748 ierr = PetscMalloc1(bs*n+bs,&b->solve_work);CHKERRQ(ierr); 749 /* In b structure: Free imax, ilen, old a, old j. 750 Allocate dloc, solve_work, new a, new j */ 751 ierr = PetscLogObjectMemory((PetscObject)fact,(ainew[n]-n)*(sizeof(PetscInt))+bs2*ainew[n]*sizeof(PetscScalar));CHKERRQ(ierr); 752 b->maxnz = b->nz = ainew[n]; 753 754 fact->info.factor_mallocs = reallocate; 755 fact->info.fill_ratio_given = f; 756 fact->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 757 758 ierr = MatSeqBAIJSetNumericFactorization_inplace(fact,both_identity);CHKERRQ(ierr); 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE" 764 PetscErrorCode MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE(Mat A) 765 { 766 /* Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; */ 767 /* int i,*AJ=a->j,nz=a->nz; */ 768 769 PetscFunctionBegin; 770 /* Undo Column scaling */ 771 /* while (nz--) { */ 772 /* AJ[i] = AJ[i]/4; */ 773 /* } */ 774 /* This should really invoke a push/pop logic, but we don't have that yet. */ 775 A->ops->setunfactored = NULL; 776 PetscFunctionReturn(0); 777 } 778 779 #undef __FUNCT__ 780 #define __FUNCT__ "MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj" 781 PetscErrorCode MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj(Mat A) 782 { 783 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 784 PetscInt *AJ=a->j,nz=a->nz; 785 unsigned short *aj=(unsigned short*)AJ; 786 787 PetscFunctionBegin; 788 /* Is this really necessary? */ 789 while (nz--) { 790 AJ[nz] = (int)((unsigned int)aj[nz]); /* First extend, then convert to signed. */ 791 } 792 A->ops->setunfactored = NULL; 793 PetscFunctionReturn(0); 794 } 795 796 797