1 2 /* 3 Factorization code for BAIJ format. 4 */ 5 6 #include <../src/mat/impls/baij/seq/baij.h> 7 #include <petsc/private/kernels/blockinvert.h> 8 #include <petscbt.h> 9 #include <../src/mat/utils/freespace.h> 10 11 /* ----------------------------------------------------------------*/ 12 extern PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat,Mat,MatDuplicateOption,PetscBool); 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering" 16 /* 17 This is not much faster than MatLUFactorNumeric_SeqBAIJ_N() but the solve is faster at least sometimes 18 */ 19 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info) 20 { 21 Mat C =B; 22 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; 23 PetscErrorCode ierr; 24 PetscInt i,j,k,ipvt[15]; 25 const PetscInt n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*ajtmp,*bjtmp,*bdiag=b->diag,*pj; 26 PetscInt nz,nzL,row; 27 MatScalar *rtmp,*pc,*mwork,*pv,*vv,work[225]; 28 const MatScalar *v,*aa=a->a; 29 PetscInt bs2 = a->bs2,bs=A->rmap->bs,flg; 30 PetscInt sol_ver; 31 PetscBool allowzeropivot,zeropivotdetected; 32 33 PetscFunctionBegin; 34 ierr = PetscOptionsGetInt(NULL,((PetscObject)A)->prefix,"-sol_ver",&sol_ver,NULL);CHKERRQ(ierr); 35 36 /* generate work space needed by the factorization */ 37 ierr = PetscMalloc2(bs2*n,&rtmp,bs2,&mwork);CHKERRQ(ierr); 38 ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); 39 40 for (i=0; i<n; i++) { 41 /* zero rtmp */ 42 /* L part */ 43 nz = bi[i+1] - bi[i]; 44 bjtmp = bj + bi[i]; 45 for (j=0; j<nz; j++) { 46 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 47 } 48 49 /* U part */ 50 nz = bdiag[i] - bdiag[i+1]; 51 bjtmp = bj + bdiag[i+1]+1; 52 for (j=0; j<nz; j++) { 53 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 54 } 55 56 /* load in initial (unfactored row) */ 57 nz = ai[i+1] - ai[i]; 58 ajtmp = aj + ai[i]; 59 v = aa + bs2*ai[i]; 60 for (j=0; j<nz; j++) { 61 ierr = PetscMemcpy(rtmp+bs2*ajtmp[j],v+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 62 } 63 64 /* elimination */ 65 bjtmp = bj + bi[i]; 66 nzL = bi[i+1] - bi[i]; 67 for (k=0; k < nzL; k++) { 68 row = bjtmp[k]; 69 pc = rtmp + bs2*row; 70 for (flg=0,j=0; j<bs2; j++) { 71 if (pc[j]!=0.0) { 72 flg = 1; 73 break; 74 } 75 } 76 if (flg) { 77 pv = b->a + bs2*bdiag[row]; 78 PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); 79 /*ierr = PetscKernel_A_gets_A_times_B_15(pc,pv,mwork);CHKERRQ(ierr);*/ 80 pj = b->j + bdiag[row+1]+1; /* begining of U(row,:) */ 81 pv = b->a + bs2*(bdiag[row+1]+1); 82 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries inU(row,:), excluding diag */ 83 for (j=0; j<nz; j++) { 84 vv = rtmp + bs2*pj[j]; 85 PetscKernel_A_gets_A_minus_B_times_C(bs,vv,pc,pv); 86 /* ierr = PetscKernel_A_gets_A_minus_B_times_C_15(vv,pc,pv);CHKERRQ(ierr); */ 87 pv += bs2; 88 } 89 ierr = PetscLogFlops(2*bs2*bs*(nz+1)-bs2);CHKERRQ(ierr); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 90 } 91 } 92 93 /* finished row so stick it into b->a */ 94 /* L part */ 95 pv = b->a + bs2*bi[i]; 96 pj = b->j + bi[i]; 97 nz = bi[i+1] - bi[i]; 98 for (j=0; j<nz; j++) { 99 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 100 } 101 102 /* Mark diagonal and invert diagonal for simplier triangular solves */ 103 pv = b->a + bs2*bdiag[i]; 104 pj = b->j + bdiag[i]; 105 ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); 106 /* PetscKernel_A_gets_inverse_A(bs,pv,pivots,work); */ 107 allowzeropivot = PetscNot(A->erroriffailure); 108 ierr = PetscKernel_A_gets_inverse_A_15(pv,ipvt,work,info->shiftamount,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 109 if (zeropivotdetected) C->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 110 111 /* U part */ 112 pv = b->a + bs2*(bdiag[i+1]+1); 113 pj = b->j + bdiag[i+1]+1; 114 nz = bdiag[i] - bdiag[i+1] - 1; 115 for (j=0; j<nz; j++) { 116 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 117 } 118 } 119 120 ierr = PetscFree2(rtmp,mwork);CHKERRQ(ierr); 121 122 C->ops->solve = MatSolve_SeqBAIJ_15_NaturalOrdering_ver1; 123 C->ops->solvetranspose = MatSolve_SeqBAIJ_N_NaturalOrdering; 124 C->assembled = PETSC_TRUE; 125 126 ierr = PetscLogFlops(1.333333333333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 127 PetscFunctionReturn(0); 128 } 129 130 #undef __FUNCT__ 131 #define __FUNCT__ "MatLUFactorNumeric_SeqBAIJ_N" 132 PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N(Mat B,Mat A,const MatFactorInfo *info) 133 { 134 Mat C =B; 135 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)C->data; 136 IS isrow = b->row,isicol = b->icol; 137 PetscErrorCode ierr; 138 const PetscInt *r,*ic; 139 PetscInt i,j,k,n=a->mbs,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 140 PetscInt *ajtmp,*bjtmp,nz,nzL,row,*bdiag=b->diag,*pj; 141 MatScalar *rtmp,*pc,*mwork,*v,*pv,*aa=a->a; 142 PetscInt bs=A->rmap->bs,bs2 = a->bs2,*v_pivots,flg; 143 MatScalar *v_work; 144 PetscBool col_identity,row_identity,both_identity; 145 146 PetscFunctionBegin; 147 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 148 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 149 150 ierr = PetscMalloc1(bs2*n,&rtmp);CHKERRQ(ierr); 151 ierr = PetscMemzero(rtmp,bs2*n*sizeof(MatScalar));CHKERRQ(ierr); 152 153 /* generate work space needed by dense LU factorization */ 154 ierr = PetscMalloc3(bs,&v_work,bs2,&mwork,bs,&v_pivots);CHKERRQ(ierr); 155 156 for (i=0; i<n; i++) { 157 /* zero rtmp */ 158 /* L part */ 159 nz = bi[i+1] - bi[i]; 160 bjtmp = bj + bi[i]; 161 for (j=0; j<nz; j++) { 162 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 163 } 164 165 /* U part */ 166 nz = bdiag[i] - bdiag[i+1]; 167 bjtmp = bj + bdiag[i+1]+1; 168 for (j=0; j<nz; j++) { 169 ierr = PetscMemzero(rtmp+bs2*bjtmp[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 170 } 171 172 /* load in initial (unfactored row) */ 173 nz = ai[r[i]+1] - ai[r[i]]; 174 ajtmp = aj + ai[r[i]]; 175 v = aa + bs2*ai[r[i]]; 176 for (j=0; j<nz; j++) { 177 ierr = PetscMemcpy(rtmp+bs2*ic[ajtmp[j]],v+bs2*j,bs2*sizeof(MatScalar));CHKERRQ(ierr); 178 } 179 180 /* elimination */ 181 bjtmp = bj + bi[i]; 182 nzL = bi[i+1] - bi[i]; 183 for (k=0; k < nzL; k++) { 184 row = bjtmp[k]; 185 pc = rtmp + bs2*row; 186 for (flg=0,j=0; j<bs2; j++) { 187 if (pc[j]!=0.0) { 188 flg = 1; 189 break; 190 } 191 } 192 if (flg) { 193 pv = b->a + bs2*bdiag[row]; 194 PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); /* *pc = *pc * (*pv); */ 195 pj = b->j + bdiag[row+1]+1; /* begining of U(row,:) */ 196 pv = b->a + bs2*(bdiag[row+1]+1); 197 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries inU(row,:), excluding diag */ 198 for (j=0; j<nz; j++) { 199 PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); 200 } 201 ierr = PetscLogFlops(2*bs2*bs*(nz+1)-bs2);CHKERRQ(ierr); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */ 202 } 203 } 204 205 /* finished row so stick it into b->a */ 206 /* L part */ 207 pv = b->a + bs2*bi[i]; 208 pj = b->j + bi[i]; 209 nz = bi[i+1] - bi[i]; 210 for (j=0; j<nz; j++) { 211 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 212 } 213 214 /* Mark diagonal and invert diagonal for simplier triangular solves */ 215 pv = b->a + bs2*bdiag[i]; 216 pj = b->j + bdiag[i]; 217 /* if (*pj != i)SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"row %d != *pj %d",i,*pj); */ 218 ierr = PetscMemcpy(pv,rtmp+bs2*pj[0],bs2*sizeof(MatScalar));CHKERRQ(ierr); 219 ierr = PetscKernel_A_gets_inverse_A(bs,pv,v_pivots,v_work);CHKERRQ(ierr); 220 221 /* U part */ 222 pv = b->a + bs2*(bdiag[i+1]+1); 223 pj = b->j + bdiag[i+1]+1; 224 nz = bdiag[i] - bdiag[i+1] - 1; 225 for (j=0; j<nz; j++) { 226 ierr = PetscMemcpy(pv+bs2*j,rtmp+bs2*pj[j],bs2*sizeof(MatScalar));CHKERRQ(ierr); 227 } 228 } 229 230 ierr = PetscFree(rtmp);CHKERRQ(ierr); 231 ierr = PetscFree3(v_work,mwork,v_pivots);CHKERRQ(ierr); 232 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 233 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 234 235 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 236 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 237 238 both_identity = (PetscBool) (row_identity && col_identity); 239 if (both_identity) { 240 C->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering; 241 } else { 242 C->ops->solve = MatSolve_SeqBAIJ_N; 243 } 244 C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_N; 245 246 C->assembled = PETSC_TRUE; 247 248 ierr = PetscLogFlops(1.333333333333*bs*bs2*b->mbs);CHKERRQ(ierr); /* from inverting diagonal blocks */ 249 PetscFunctionReturn(0); 250 } 251 252 /* 253 ilu(0) with natural ordering under new data structure. 254 See MatILUFactorSymbolic_SeqAIJ_ilu0() for detailed description 255 because this code is almost identical to MatILUFactorSymbolic_SeqAIJ_ilu0_inplace(). 256 */ 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatILUFactorSymbolic_SeqBAIJ_ilu0" 260 PetscErrorCode MatILUFactorSymbolic_SeqBAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 261 { 262 263 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 264 PetscErrorCode ierr; 265 PetscInt n=a->mbs,*ai=a->i,*aj,*adiag=a->diag,bs2 = a->bs2; 266 PetscInt i,j,nz,*bi,*bj,*bdiag,bi_temp; 267 268 PetscFunctionBegin; 269 ierr = MatDuplicateNoCreate_SeqBAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 270 b = (Mat_SeqBAIJ*)(fact)->data; 271 272 /* allocate matrix arrays for new data structure */ 273 ierr = PetscMalloc3(bs2*ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr); 274 ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 275 276 b->singlemalloc = PETSC_TRUE; 277 b->free_a = PETSC_TRUE; 278 b->free_ij = PETSC_TRUE; 279 fact->preallocated = PETSC_TRUE; 280 fact->assembled = PETSC_TRUE; 281 if (!b->diag) { 282 ierr = PetscMalloc1(n+1,&b->diag);CHKERRQ(ierr); 283 ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 284 } 285 bdiag = b->diag; 286 287 if (n > 0) { 288 ierr = PetscMemzero(b->a,bs2*ai[n]*sizeof(MatScalar));CHKERRQ(ierr); 289 } 290 291 /* set bi and bj with new data structure */ 292 bi = b->i; 293 bj = b->j; 294 295 /* L part */ 296 bi[0] = 0; 297 for (i=0; i<n; i++) { 298 nz = adiag[i] - ai[i]; 299 bi[i+1] = bi[i] + nz; 300 aj = a->j + ai[i]; 301 for (j=0; j<nz; j++) { 302 *bj = aj[j]; bj++; 303 } 304 } 305 306 /* U part */ 307 bi_temp = bi[n]; 308 bdiag[n] = bi[n]-1; 309 for (i=n-1; i>=0; i--) { 310 nz = ai[i+1] - adiag[i] - 1; 311 bi_temp = bi_temp + nz + 1; 312 aj = a->j + adiag[i] + 1; 313 for (j=0; j<nz; j++) { 314 *bj = aj[j]; bj++; 315 } 316 /* diag[i] */ 317 *bj = i; bj++; 318 bdiag[i] = bi_temp - 1; 319 } 320 PetscFunctionReturn(0); 321 } 322 323 #undef __FUNCT__ 324 #define __FUNCT__ "MatILUFactorSymbolic_SeqBAIJ" 325 PetscErrorCode MatILUFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 326 { 327 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 328 IS isicol; 329 PetscErrorCode ierr; 330 const PetscInt *r,*ic; 331 PetscInt n=a->mbs,*ai=a->i,*aj=a->j,d; 332 PetscInt *bi,*cols,nnz,*cols_lvl; 333 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 334 PetscInt i,levels,diagonal_fill; 335 PetscBool col_identity,row_identity,both_identity; 336 PetscReal f; 337 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 338 PetscBT lnkbt; 339 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 340 PetscFreeSpaceList free_space =NULL,current_space=NULL; 341 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 342 PetscBool missing; 343 PetscInt bs=A->rmap->bs,bs2=a->bs2; 344 345 PetscFunctionBegin; 346 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 347 if (bs>1) { /* check shifttype */ 348 if (info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix"); 349 } 350 351 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 352 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 353 354 f = info->fill; 355 levels = (PetscInt)info->levels; 356 diagonal_fill = (PetscInt)info->diagonal_fill; 357 358 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 359 360 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 361 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 362 363 both_identity = (PetscBool) (row_identity && col_identity); 364 365 if (!levels && both_identity) { 366 /* special case: ilu(0) with natural ordering */ 367 ierr = MatILUFactorSymbolic_SeqBAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 368 ierr = MatSeqBAIJSetNumericFactorization(fact,both_identity);CHKERRQ(ierr); 369 370 fact->factortype = MAT_FACTOR_ILU; 371 (fact)->info.factor_mallocs = 0; 372 (fact)->info.fill_ratio_given = info->fill; 373 (fact)->info.fill_ratio_needed = 1.0; 374 375 b = (Mat_SeqBAIJ*)(fact)->data; 376 b->row = isrow; 377 b->col = iscol; 378 b->icol = isicol; 379 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 380 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 381 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 382 383 ierr = PetscMalloc1((n+1)*bs,&b->solve_work);CHKERRQ(ierr); 384 PetscFunctionReturn(0); 385 } 386 387 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 388 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 389 390 /* get new row pointers */ 391 ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 392 bi[0] = 0; 393 /* bdiag is location of diagonal in factor */ 394 ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 395 bdiag[0] = 0; 396 397 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 398 399 /* create a linked list for storing column indices of the active row */ 400 nlnk = n + 1; 401 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 402 403 /* initial FreeSpace size is f*(ai[n]+1) */ 404 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 405 current_space = free_space; 406 ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr); 407 current_space_lvl = free_space_lvl; 408 409 for (i=0; i<n; i++) { 410 nzi = 0; 411 /* copy current row into linked list */ 412 nnz = ai[r[i]+1] - ai[r[i]]; 413 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 414 cols = aj + ai[r[i]]; 415 lnk[i] = -1; /* marker to indicate if diagonal exists */ 416 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 417 nzi += nlnk; 418 419 /* make sure diagonal entry is included */ 420 if (diagonal_fill && lnk[i] == -1) { 421 fm = n; 422 while (lnk[fm] < i) fm = lnk[fm]; 423 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 424 lnk[fm] = i; 425 lnk_lvl[i] = 0; 426 nzi++; dcount++; 427 } 428 429 /* add pivot rows into the active row */ 430 nzbd = 0; 431 prow = lnk[n]; 432 while (prow < i) { 433 nnz = bdiag[prow]; 434 cols = bj_ptr[prow] + nnz + 1; 435 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 436 nnz = bi[prow+1] - bi[prow] - nnz - 1; 437 438 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 439 nzi += nlnk; 440 prow = lnk[prow]; 441 nzbd++; 442 } 443 bdiag[i] = nzbd; 444 bi[i+1] = bi[i] + nzi; 445 446 /* if free space is not available, make more free space */ 447 if (current_space->local_remaining<nzi) { 448 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,(n - i))); /* estimated and max additional space needed */ 449 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 450 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 451 reallocs++; 452 } 453 454 /* copy data into free_space and free_space_lvl, then initialize lnk */ 455 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 456 457 bj_ptr[i] = current_space->array; 458 bjlvl_ptr[i] = current_space_lvl->array; 459 460 /* make sure the active row i has diagonal entry */ 461 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 462 463 current_space->array += nzi; 464 current_space->local_used += nzi; 465 current_space->local_remaining -= nzi; 466 467 current_space_lvl->array += nzi; 468 current_space_lvl->local_used += nzi; 469 current_space_lvl->local_remaining -= nzi; 470 } 471 472 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 473 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 474 475 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 476 ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 477 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 478 479 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 480 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 481 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 482 483 #if defined(PETSC_USE_INFO) 484 { 485 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 486 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 487 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 488 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 489 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 490 if (diagonal_fill) { 491 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 492 } 493 } 494 #endif 495 496 /* put together the new matrix */ 497 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(fact,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 498 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 499 500 b = (Mat_SeqBAIJ*)(fact)->data; 501 b->free_a = PETSC_TRUE; 502 b->free_ij = PETSC_TRUE; 503 b->singlemalloc = PETSC_FALSE; 504 505 ierr = PetscMalloc1(bs2*(bdiag[0]+1),&b->a);CHKERRQ(ierr); 506 507 b->j = bj; 508 b->i = bi; 509 b->diag = bdiag; 510 b->free_diag = PETSC_TRUE; 511 b->ilen = 0; 512 b->imax = 0; 513 b->row = isrow; 514 b->col = iscol; 515 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 516 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 517 b->icol = isicol; 518 519 ierr = PetscMalloc1(bs*n+bs,&b->solve_work);CHKERRQ(ierr); 520 /* In b structure: Free imax, ilen, old a, old j. 521 Allocate bdiag, solve_work, new a, new j */ 522 ierr = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1) * (sizeof(PetscInt)+bs2*sizeof(PetscScalar)));CHKERRQ(ierr); 523 b->maxnz = b->nz = bdiag[0]+1; 524 525 fact->info.factor_mallocs = reallocs; 526 fact->info.fill_ratio_given = f; 527 fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 528 529 ierr = MatSeqBAIJSetNumericFactorization(fact,both_identity);CHKERRQ(ierr); 530 PetscFunctionReturn(0); 531 } 532 533 /* 534 This code is virtually identical to MatILUFactorSymbolic_SeqAIJ 535 except that the data structure of Mat_SeqAIJ is slightly different. 536 Not a good example of code reuse. 537 */ 538 #undef __FUNCT__ 539 #define __FUNCT__ "MatILUFactorSymbolic_SeqBAIJ_inplace" 540 PetscErrorCode MatILUFactorSymbolic_SeqBAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 541 { 542 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 543 IS isicol; 544 PetscErrorCode ierr; 545 const PetscInt *r,*ic,*ai = a->i,*aj = a->j,*xi; 546 PetscInt prow,n = a->mbs,*ainew,*ajnew,jmax,*fill,nz,*im,*ajfill,*flev,*xitmp; 547 PetscInt *dloc,idx,row,m,fm,nzf,nzi,reallocate = 0,dcount = 0; 548 PetscInt incrlev,nnz,i,bs = A->rmap->bs,bs2 = a->bs2,levels,diagonal_fill,dd; 549 PetscBool col_identity,row_identity,both_identity,flg; 550 PetscReal f; 551 552 PetscFunctionBegin; 553 ierr = MatMissingDiagonal_SeqBAIJ(A,&flg,&dd);CHKERRQ(ierr); 554 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix A is missing diagonal entry in row %D",dd); 555 556 f = info->fill; 557 levels = (PetscInt)info->levels; 558 diagonal_fill = (PetscInt)info->diagonal_fill; 559 560 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 561 562 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 563 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 564 both_identity = (PetscBool) (row_identity && col_identity); 565 566 if (!levels && both_identity) { /* special case copy the nonzero structure */ 567 ierr = MatDuplicateNoCreate_SeqBAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 568 ierr = MatSeqBAIJSetNumericFactorization_inplace(fact,both_identity);CHKERRQ(ierr); 569 570 fact->factortype = MAT_FACTOR_ILU; 571 b = (Mat_SeqBAIJ*)fact->data; 572 b->row = isrow; 573 b->col = iscol; 574 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 575 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 576 b->icol = isicol; 577 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 578 579 ierr = PetscMalloc1((n+1)*bs,&b->solve_work);CHKERRQ(ierr); 580 PetscFunctionReturn(0); 581 } 582 583 /* general case perform the symbolic factorization */ 584 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 585 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 586 587 /* get new row pointers */ 588 ierr = PetscMalloc1(n+1,&ainew);CHKERRQ(ierr); 589 ainew[0] = 0; 590 /* don't know how many column pointers are needed so estimate */ 591 jmax = (PetscInt)(f*ai[n] + 1); 592 ierr = PetscMalloc1(jmax,&ajnew);CHKERRQ(ierr); 593 /* ajfill is level of fill for each fill entry */ 594 ierr = PetscMalloc1(jmax,&ajfill);CHKERRQ(ierr); 595 /* fill is a linked list of nonzeros in active row */ 596 ierr = PetscMalloc1(n+1,&fill);CHKERRQ(ierr); 597 /* im is level for each filled value */ 598 ierr = PetscMalloc1(n+1,&im);CHKERRQ(ierr); 599 /* dloc is location of diagonal in factor */ 600 ierr = PetscMalloc1(n+1,&dloc);CHKERRQ(ierr); 601 dloc[0] = 0; 602 for (prow=0; prow<n; prow++) { 603 604 /* copy prow into linked list */ 605 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 606 if (!nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[prow],prow); 607 xi = aj + ai[r[prow]]; 608 fill[n] = n; 609 fill[prow] = -1; /* marker for diagonal entry */ 610 while (nz--) { 611 fm = n; 612 idx = ic[*xi++]; 613 do { 614 m = fm; 615 fm = fill[m]; 616 } while (fm < idx); 617 fill[m] = idx; 618 fill[idx] = fm; 619 im[idx] = 0; 620 } 621 622 /* make sure diagonal entry is included */ 623 if (diagonal_fill && fill[prow] == -1) { 624 fm = n; 625 while (fill[fm] < prow) fm = fill[fm]; 626 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 627 fill[fm] = prow; 628 im[prow] = 0; 629 nzf++; 630 dcount++; 631 } 632 633 nzi = 0; 634 row = fill[n]; 635 while (row < prow) { 636 incrlev = im[row] + 1; 637 nz = dloc[row]; 638 xi = ajnew + ainew[row] + nz + 1; 639 flev = ajfill + ainew[row] + nz + 1; 640 nnz = ainew[row+1] - ainew[row] - nz - 1; 641 fm = row; 642 while (nnz-- > 0) { 643 idx = *xi++; 644 if (*flev + incrlev > levels) { 645 flev++; 646 continue; 647 } 648 do { 649 m = fm; 650 fm = fill[m]; 651 } while (fm < idx); 652 if (fm != idx) { 653 im[idx] = *flev + incrlev; 654 fill[m] = idx; 655 fill[idx] = fm; 656 fm = idx; 657 nzf++; 658 } else if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 659 flev++; 660 } 661 row = fill[row]; 662 nzi++; 663 } 664 /* copy new filled row into permanent storage */ 665 ainew[prow+1] = ainew[prow] + nzf; 666 if (ainew[prow+1] > jmax) { 667 668 /* estimate how much additional space we will need */ 669 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 670 /* just double the memory each time */ 671 PetscInt maxadd = jmax; 672 /* maxadd = (int)(((f*ai[n]+1)*(n-prow+5))/n); */ 673 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 674 jmax += maxadd; 675 676 /* allocate a longer ajnew and ajfill */ 677 ierr = PetscMalloc1(jmax,&xitmp);CHKERRQ(ierr); 678 ierr = PetscMemcpy(xitmp,ajnew,ainew[prow]*sizeof(PetscInt));CHKERRQ(ierr); 679 ierr = PetscFree(ajnew);CHKERRQ(ierr); 680 ajnew = xitmp; 681 ierr = PetscMalloc1(jmax,&xitmp);CHKERRQ(ierr); 682 ierr = PetscMemcpy(xitmp,ajfill,ainew[prow]*sizeof(PetscInt));CHKERRQ(ierr); 683 ierr = PetscFree(ajfill);CHKERRQ(ierr); 684 ajfill = xitmp; 685 reallocate++; /* count how many reallocations are needed */ 686 } 687 xitmp = ajnew + ainew[prow]; 688 flev = ajfill + ainew[prow]; 689 dloc[prow] = nzi; 690 fm = fill[n]; 691 while (nzf--) { 692 *xitmp++ = fm; 693 *flev++ = im[fm]; 694 fm = fill[fm]; 695 } 696 /* make sure row has diagonal entry */ 697 if (ajnew[ainew[prow]+dloc[prow]] != prow) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 698 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",prow); 699 } 700 ierr = PetscFree(ajfill);CHKERRQ(ierr); 701 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 702 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 703 ierr = PetscFree(fill);CHKERRQ(ierr); 704 ierr = PetscFree(im);CHKERRQ(ierr); 705 706 #if defined(PETSC_USE_INFO) 707 { 708 PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 709 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocate,(double)f,(double)af);CHKERRQ(ierr); 710 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 711 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 712 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 713 if (diagonal_fill) { 714 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr); 715 } 716 } 717 #endif 718 719 /* put together the new matrix */ 720 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(fact,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 721 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 722 b = (Mat_SeqBAIJ*)fact->data; 723 724 b->free_a = PETSC_TRUE; 725 b->free_ij = PETSC_TRUE; 726 b->singlemalloc = PETSC_FALSE; 727 728 ierr = PetscMalloc1(bs2*ainew[n],&b->a);CHKERRQ(ierr); 729 730 b->j = ajnew; 731 b->i = ainew; 732 for (i=0; i<n; i++) dloc[i] += ainew[i]; 733 b->diag = dloc; 734 b->free_diag = PETSC_TRUE; 735 b->ilen = 0; 736 b->imax = 0; 737 b->row = isrow; 738 b->col = iscol; 739 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 740 741 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 742 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 743 b->icol = isicol; 744 ierr = PetscMalloc1(bs*n+bs,&b->solve_work);CHKERRQ(ierr); 745 /* In b structure: Free imax, ilen, old a, old j. 746 Allocate dloc, solve_work, new a, new j */ 747 ierr = PetscLogObjectMemory((PetscObject)fact,(ainew[n]-n)*(sizeof(PetscInt))+bs2*ainew[n]*sizeof(PetscScalar));CHKERRQ(ierr); 748 b->maxnz = b->nz = ainew[n]; 749 750 fact->info.factor_mallocs = reallocate; 751 fact->info.fill_ratio_given = f; 752 fact->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[prow]); 753 754 ierr = MatSeqBAIJSetNumericFactorization_inplace(fact,both_identity);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE" 760 PetscErrorCode MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE(Mat A) 761 { 762 /* Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; */ 763 /* int i,*AJ=a->j,nz=a->nz; */ 764 765 PetscFunctionBegin; 766 /* Undo Column scaling */ 767 /* while (nz--) { */ 768 /* AJ[i] = AJ[i]/4; */ 769 /* } */ 770 /* This should really invoke a push/pop logic, but we don't have that yet. */ 771 A->ops->setunfactored = NULL; 772 PetscFunctionReturn(0); 773 } 774 775 #undef __FUNCT__ 776 #define __FUNCT__ "MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj" 777 PetscErrorCode MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj(Mat A) 778 { 779 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 780 PetscInt *AJ=a->j,nz=a->nz; 781 unsigned short *aj=(unsigned short*)AJ; 782 783 PetscFunctionBegin; 784 /* Is this really necessary? */ 785 while (nz--) { 786 AJ[nz] = (int)((unsigned int)aj[nz]); /* First extend, then convert to signed. */ 787 } 788 A->ops->setunfactored = NULL; 789 PetscFunctionReturn(0); 790 } 791 792 793