1 2 /* 3 Factorization code for BAIJ format. 4 */ 5 #include <../src/mat/impls/baij/seq/baij.h> 6 #include <petsc/private/kernels/blockinvert.h> 7 8 /* 9 This is used to set the numeric factorization for both LU and ILU symbolic factorization 10 */ 11 PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool natural) 12 { 13 PetscFunctionBegin; 14 if (natural) { 15 switch (fact->rmap->bs) { 16 case 1: 17 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 18 break; 19 case 2: 20 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 21 break; 22 case 3: 23 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 24 break; 25 case 4: 26 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 27 break; 28 case 5: 29 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 30 break; 31 case 6: 32 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 33 break; 34 case 7: 35 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 36 break; 37 case 9: 38 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 39 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering; 40 #else 41 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 42 #endif 43 break; 44 case 15: 45 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering; 46 break; 47 default: 48 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 49 break; 50 } 51 } else { 52 switch (fact->rmap->bs) { 53 case 1: 54 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 55 break; 56 case 2: 57 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 58 break; 59 case 3: 60 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 61 break; 62 case 4: 63 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 64 break; 65 case 5: 66 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 67 break; 68 case 6: 69 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 70 break; 71 case 7: 72 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 73 break; 74 default: 75 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 76 break; 77 } 78 } 79 PetscFunctionReturn(0); 80 } 81 82 PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool natural) 83 { 84 PetscFunctionBegin; 85 if (natural) { 86 switch (inA->rmap->bs) { 87 case 1: 88 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 89 break; 90 case 2: 91 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace; 92 break; 93 case 3: 94 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace; 95 break; 96 case 4: 97 #if defined(PETSC_USE_REAL_MAT_SINGLE) 98 { 99 PetscBool sse_enabled_local; 100 PetscCall(PetscSSEIsEnabled(inA->comm,&sse_enabled_local,NULL)); 101 if (sse_enabled_local) { 102 # if defined(PETSC_HAVE_SSE) 103 int i,*AJ=a->j,nz=a->nz,n=a->mbs; 104 if (n==(unsigned short)n) { 105 unsigned short *aj=(unsigned short*)AJ; 106 for (i=0; i<nz; i++) aj[i] = (unsigned short)AJ[i]; 107 108 inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj; 109 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj; 110 111 PetscCall(PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n")); 112 } else { 113 /* Scale the column indices for easier indexing in MatSolve. */ 114 /* for (i=0;i<nz;i++) { */ 115 /* AJ[i] = AJ[i]*4; */ 116 /* } */ 117 inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE; 118 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 119 120 PetscCall(PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n")); 121 } 122 # else 123 /* This should never be reached. If so, problem in PetscSSEIsEnabled. */ 124 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable"); 125 # endif 126 } else { 127 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 128 } 129 } 130 #else 131 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 132 #endif 133 break; 134 case 5: 135 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace; 136 break; 137 case 6: 138 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace; 139 break; 140 case 7: 141 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace; 142 break; 143 default: 144 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 145 break; 146 } 147 } else { 148 switch (inA->rmap->bs) { 149 case 1: 150 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 151 break; 152 case 2: 153 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace; 154 break; 155 case 3: 156 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace; 157 break; 158 case 4: 159 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace; 160 break; 161 case 5: 162 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace; 163 break; 164 case 6: 165 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace; 166 break; 167 case 7: 168 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace; 169 break; 170 default: 171 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 172 break; 173 } 174 } 175 PetscFunctionReturn(0); 176 } 177 178 /* 179 The symbolic factorization code is identical to that for AIJ format, 180 except for very small changes since this is now a SeqBAIJ datastructure. 181 NOT good code reuse. 182 */ 183 #include <petscbt.h> 184 #include <../src/mat/utils/freespace.h> 185 186 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 187 { 188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 189 PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2; 190 PetscBool row_identity,col_identity,both_identity; 191 IS isicol; 192 const PetscInt *r,*ic; 193 PetscInt i,*ai=a->i,*aj=a->j; 194 PetscInt *bi,*bj,*ajtmp; 195 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 196 PetscReal f; 197 PetscInt nlnk,*lnk,k,**bi_ptr; 198 PetscFreeSpaceList free_space=NULL,current_space=NULL; 199 PetscBT lnkbt; 200 PetscBool missing; 201 202 PetscFunctionBegin; 203 PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 204 PetscCall(MatMissingDiagonal(A,&missing,&i)); 205 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i); 206 207 if (bs>1) { /* check shifttype */ 208 PetscCheckFalse(info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix"); 209 } 210 211 PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol)); 212 PetscCall(ISGetIndices(isrow,&r)); 213 PetscCall(ISGetIndices(isicol,&ic)); 214 215 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 216 PetscCall(PetscMalloc1(n+1,&bi)); 217 PetscCall(PetscMalloc1(n+1,&bdiag)); 218 bi[0] = bdiag[0] = 0; 219 220 /* linked list for storing column indices of the active row */ 221 nlnk = n + 1; 222 PetscCall(PetscLLCreate(n,n,nlnk,lnk,lnkbt)); 223 224 PetscCall(PetscMalloc2(n+1,&bi_ptr,n+1,&im)); 225 226 /* initial FreeSpace size is f*(ai[n]+1) */ 227 f = info->fill; 228 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space)); 229 230 current_space = free_space; 231 232 for (i=0; i<n; i++) { 233 /* copy previous fill into linked list */ 234 nzi = 0; 235 nnz = ai[r[i]+1] - ai[r[i]]; 236 ajtmp = aj + ai[r[i]]; 237 PetscCall(PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt)); 238 nzi += nlnk; 239 240 /* add pivot rows into linked list */ 241 row = lnk[n]; 242 while (row < i) { 243 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 244 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 245 PetscCall(PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im)); 246 nzi += nlnk; 247 row = lnk[row]; 248 } 249 bi[i+1] = bi[i] + nzi; 250 im[i] = nzi; 251 252 /* mark bdiag */ 253 nzbd = 0; 254 nnz = nzi; 255 k = lnk[n]; 256 while (nnz-- && k < i) { 257 nzbd++; 258 k = lnk[k]; 259 } 260 bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 261 262 /* if free space is not available, make more free space */ 263 if (current_space->local_remaining<nzi) { 264 nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n - i,nzi)); /* estimated and max additional space needed */ 265 PetscCall(PetscFreeSpaceGet(nnz,¤t_space)); 266 reallocs++; 267 } 268 269 /* copy data into free space, then initialize lnk */ 270 PetscCall(PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt)); 271 272 bi_ptr[i] = current_space->array; 273 current_space->array += nzi; 274 current_space->local_used += nzi; 275 current_space->local_remaining -= nzi; 276 } 277 278 PetscCall(ISRestoreIndices(isrow,&r)); 279 PetscCall(ISRestoreIndices(isicol,&ic)); 280 281 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 282 PetscCall(PetscMalloc1(bi[n]+1,&bj)); 283 PetscCall(PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag)); 284 PetscCall(PetscLLDestroy(lnk,lnkbt)); 285 PetscCall(PetscFree2(bi_ptr,im)); 286 287 /* put together the new matrix */ 288 PetscCall(MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL)); 289 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)isicol)); 290 b = (Mat_SeqBAIJ*)(B)->data; 291 292 b->free_a = PETSC_TRUE; 293 b->free_ij = PETSC_TRUE; 294 b->singlemalloc = PETSC_FALSE; 295 296 PetscCall(PetscMalloc1((bdiag[0]+1)*bs2,&b->a)); 297 b->j = bj; 298 b->i = bi; 299 b->diag = bdiag; 300 b->free_diag = PETSC_TRUE; 301 b->ilen = NULL; 302 b->imax = NULL; 303 b->row = isrow; 304 b->col = iscol; 305 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 306 307 PetscCall(PetscObjectReference((PetscObject)isrow)); 308 PetscCall(PetscObjectReference((PetscObject)iscol)); 309 b->icol = isicol; 310 PetscCall(PetscMalloc1(bs*n+bs,&b->solve_work)); 311 PetscCall(PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2))); 312 313 b->maxnz = b->nz = bdiag[0]+1; 314 315 B->factortype = MAT_FACTOR_LU; 316 B->info.factor_mallocs = reallocs; 317 B->info.fill_ratio_given = f; 318 319 if (ai[n] != 0) { 320 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 321 } else { 322 B->info.fill_ratio_needed = 0.0; 323 } 324 #if defined(PETSC_USE_INFO) 325 if (ai[n] != 0) { 326 PetscReal af = B->info.fill_ratio_needed; 327 PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af)); 328 PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af)); 329 PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af)); 330 PetscCall(PetscInfo(A,"for best performance.\n")); 331 } else { 332 PetscCall(PetscInfo(A,"Empty matrix\n")); 333 } 334 #endif 335 336 PetscCall(ISIdentity(isrow,&row_identity)); 337 PetscCall(ISIdentity(iscol,&col_identity)); 338 339 both_identity = (PetscBool) (row_identity && col_identity); 340 341 PetscCall(MatSeqBAIJSetNumericFactorization(B,both_identity)); 342 PetscFunctionReturn(0); 343 } 344 345 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 346 { 347 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 348 PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2; 349 PetscBool row_identity,col_identity,both_identity; 350 IS isicol; 351 const PetscInt *r,*ic; 352 PetscInt i,*ai=a->i,*aj=a->j; 353 PetscInt *bi,*bj,*ajtmp; 354 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 355 PetscReal f; 356 PetscInt nlnk,*lnk,k,**bi_ptr; 357 PetscFreeSpaceList free_space=NULL,current_space=NULL; 358 PetscBT lnkbt; 359 PetscBool missing; 360 361 PetscFunctionBegin; 362 PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 363 PetscCall(MatMissingDiagonal(A,&missing,&i)); 364 PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i); 365 366 PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol)); 367 PetscCall(ISGetIndices(isrow,&r)); 368 PetscCall(ISGetIndices(isicol,&ic)); 369 370 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 371 PetscCall(PetscMalloc1(n+1,&bi)); 372 PetscCall(PetscMalloc1(n+1,&bdiag)); 373 374 bi[0] = bdiag[0] = 0; 375 376 /* linked list for storing column indices of the active row */ 377 nlnk = n + 1; 378 PetscCall(PetscLLCreate(n,n,nlnk,lnk,lnkbt)); 379 380 PetscCall(PetscMalloc2(n+1,&bi_ptr,n+1,&im)); 381 382 /* initial FreeSpace size is f*(ai[n]+1) */ 383 f = info->fill; 384 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space)); 385 current_space = free_space; 386 387 for (i=0; i<n; i++) { 388 /* copy previous fill into linked list */ 389 nzi = 0; 390 nnz = ai[r[i]+1] - ai[r[i]]; 391 ajtmp = aj + ai[r[i]]; 392 PetscCall(PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt)); 393 nzi += nlnk; 394 395 /* add pivot rows into linked list */ 396 row = lnk[n]; 397 while (row < i) { 398 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 399 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 400 PetscCall(PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im)); 401 nzi += nlnk; 402 row = lnk[row]; 403 } 404 bi[i+1] = bi[i] + nzi; 405 im[i] = nzi; 406 407 /* mark bdiag */ 408 nzbd = 0; 409 nnz = nzi; 410 k = lnk[n]; 411 while (nnz-- && k < i) { 412 nzbd++; 413 k = lnk[k]; 414 } 415 bdiag[i] = bi[i] + nzbd; 416 417 /* if free space is not available, make more free space */ 418 if (current_space->local_remaining<nzi) { 419 nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */ 420 PetscCall(PetscFreeSpaceGet(nnz,¤t_space)); 421 reallocs++; 422 } 423 424 /* copy data into free space, then initialize lnk */ 425 PetscCall(PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt)); 426 427 bi_ptr[i] = current_space->array; 428 current_space->array += nzi; 429 current_space->local_used += nzi; 430 current_space->local_remaining -= nzi; 431 } 432 #if defined(PETSC_USE_INFO) 433 if (ai[n] != 0) { 434 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 435 PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af)); 436 PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af)); 437 PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af)); 438 PetscCall(PetscInfo(A,"for best performance.\n")); 439 } else { 440 PetscCall(PetscInfo(A,"Empty matrix\n")); 441 } 442 #endif 443 444 PetscCall(ISRestoreIndices(isrow,&r)); 445 PetscCall(ISRestoreIndices(isicol,&ic)); 446 447 /* destroy list of free space and other temporary array(s) */ 448 PetscCall(PetscMalloc1(bi[n]+1,&bj)); 449 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 450 PetscCall(PetscLLDestroy(lnk,lnkbt)); 451 PetscCall(PetscFree2(bi_ptr,im)); 452 453 /* put together the new matrix */ 454 PetscCall(MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL)); 455 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)isicol)); 456 b = (Mat_SeqBAIJ*)(B)->data; 457 458 b->free_a = PETSC_TRUE; 459 b->free_ij = PETSC_TRUE; 460 b->singlemalloc = PETSC_FALSE; 461 462 PetscCall(PetscMalloc1((bi[n]+1)*bs2,&b->a)); 463 b->j = bj; 464 b->i = bi; 465 b->diag = bdiag; 466 b->free_diag = PETSC_TRUE; 467 b->ilen = NULL; 468 b->imax = NULL; 469 b->row = isrow; 470 b->col = iscol; 471 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 472 473 PetscCall(PetscObjectReference((PetscObject)isrow)); 474 PetscCall(PetscObjectReference((PetscObject)iscol)); 475 b->icol = isicol; 476 477 PetscCall(PetscMalloc1(bs*n+bs,&b->solve_work)); 478 PetscCall(PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2))); 479 480 b->maxnz = b->nz = bi[n]; 481 482 (B)->factortype = MAT_FACTOR_LU; 483 (B)->info.factor_mallocs = reallocs; 484 (B)->info.fill_ratio_given = f; 485 486 if (ai[n] != 0) { 487 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 488 } else { 489 (B)->info.fill_ratio_needed = 0.0; 490 } 491 492 PetscCall(ISIdentity(isrow,&row_identity)); 493 PetscCall(ISIdentity(iscol,&col_identity)); 494 495 both_identity = (PetscBool) (row_identity && col_identity); 496 497 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B,both_identity)); 498 PetscFunctionReturn(0); 499 } 500