1 #define PETSCMAT_DLL 2 3 /* 4 Factorization code for BAIJ format. 5 */ 6 #include "../src/mat/impls/baij/seq/baij.h" 7 #include "../src/mat/blockinvert.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatSeqBAIJSetNumericFactorization" 11 /* 12 This is used to set the numeric factorization for both LU and ILU symbolic factorization 13 */ 14 PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat inA,PetscTruth natural) 15 { 16 PetscFunctionBegin; 17 if (natural) { 18 switch (inA->rmap->bs) { 19 case 1: 20 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 21 break; 22 case 2: 23 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 24 break; 25 case 3: 26 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 27 break; 28 case 4: 29 #if defined(PETSC_USE_MAT_SINGLE) 30 { 31 PetscTruth sse_enabled_local; 32 PetscErrorCode ierr; 33 ierr = PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);CHKERRQ(ierr); 34 if (sse_enabled_local) { 35 # if defined(PETSC_HAVE_SSE) 36 int i,*AJ=a->j,nz=a->nz,n=a->mbs; 37 if (n==(unsigned short)n) { 38 unsigned short *aj=(unsigned short *)AJ; 39 for (i=0;i<nz;i++) { 40 aj[i] = (unsigned short)AJ[i]; 41 } 42 inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj; 43 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj; 44 ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");CHKERRQ(ierr); 45 } else { 46 /* Scale the column indices for easier indexing in MatSolve. */ 47 /* for (i=0;i<nz;i++) { */ 48 /* AJ[i] = AJ[i]*4; */ 49 /* } */ 50 inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE; 51 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 52 ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");CHKERRQ(ierr); 53 } 54 # else 55 /* This should never be reached. If so, problem in PetscSSEIsEnabled. */ 56 SETERRQ(PETSC_ERR_SUP,"SSE Hardware unavailable"); 57 # endif 58 } else { 59 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 60 } 61 } 62 #else 63 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 64 #endif 65 break; 66 case 5: 67 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 68 break; 69 case 6: 70 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 71 break; 72 case 7: 73 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 74 break; 75 default: 76 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 77 break; 78 } 79 } else { 80 switch (inA->rmap->bs) { 81 case 1: 82 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 83 break; 84 case 2: 85 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 86 break; 87 case 3: 88 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 89 break; 90 case 4: 91 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 92 break; 93 case 5: 94 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 95 break; 96 case 6: 97 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 98 break; 99 case 7: 100 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 101 break; 102 default: 103 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 104 break; 105 } 106 } 107 PetscFunctionReturn(0); 108 } 109 110 /* 111 The symbolic factorization code is identical to that for AIJ format, 112 except for very small changes since this is now a SeqBAIJ datastructure. 113 NOT good code reuse. 114 */ 115 #include "petscbt.h" 116 #include "../src/mat/utils/freespace.h" 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ_newdatastruct" 120 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_newdatastruct(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 121 { 122 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 123 PetscInt n=a->mbs,bs = A->rmap->bs,bs2=a->bs2; 124 PetscTruth row_identity,col_identity,both_identity; 125 IS isicol; 126 PetscErrorCode ierr; 127 const PetscInt *r,*ic; 128 PetscInt i,*ai=a->i,*aj=a->j; 129 PetscInt *bi,*bj,*ajtmp; 130 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 131 PetscReal f; 132 PetscInt nlnk,*lnk,k,**bi_ptr; 133 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 134 PetscBT lnkbt; 135 136 PetscFunctionBegin; 137 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 138 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 139 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 140 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 141 142 /* get new row pointers */ 143 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 144 bi[0] = 0; 145 146 /* bdiag is location of diagonal in factor */ 147 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 148 bdiag[0] = 0; 149 150 /* linked list for storing column indices of the active row */ 151 nlnk = n + 1; 152 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 153 154 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 155 156 /* initial FreeSpace size is f*(ai[n]+1) */ 157 f = info->fill; 158 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 159 current_space = free_space; 160 161 for (i=0; i<n; i++) { 162 /* copy previous fill into linked list */ 163 nzi = 0; 164 nnz = ai[r[i]+1] - ai[r[i]]; 165 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 166 ajtmp = aj + ai[r[i]]; 167 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 168 nzi += nlnk; 169 170 /* add pivot rows into linked list */ 171 row = lnk[n]; 172 while (row < i) { 173 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 174 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 175 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 176 nzi += nlnk; 177 row = lnk[row]; 178 } 179 bi[i+1] = bi[i] + nzi; 180 im[i] = nzi; 181 182 /* mark bdiag */ 183 nzbd = 0; 184 nnz = nzi; 185 k = lnk[n]; 186 while (nnz-- && k < i){ 187 nzbd++; 188 k = lnk[k]; 189 } 190 bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 191 192 /* if free space is not available, make more free space */ 193 if (current_space->local_remaining<nzi) { 194 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 195 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 196 reallocs++; 197 } 198 199 /* copy data into free space, then initialize lnk */ 200 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 201 bi_ptr[i] = current_space->array; 202 current_space->array += nzi; 203 current_space->local_used += nzi; 204 current_space->local_remaining -= nzi; 205 } 206 #if defined(PETSC_USE_INFO) 207 if (ai[n] != 0) { 208 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 209 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 210 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 211 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 212 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 213 } else { 214 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 215 } 216 #endif 217 218 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 219 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 220 221 /* destroy list of free space and other temporary array(s) */ 222 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 223 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 224 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 225 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 226 227 /* put together the new matrix */ 228 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 229 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 230 b = (Mat_SeqBAIJ*)(B)->data; 231 b->free_a = PETSC_TRUE; 232 b->free_ij = PETSC_TRUE; 233 b->singlemalloc = PETSC_FALSE; 234 ierr = PetscMalloc((bi[2*n+1])*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 235 b->j = bj; 236 b->i = bi; 237 b->diag = bdiag; 238 b->free_diag = PETSC_TRUE; 239 b->ilen = 0; 240 b->imax = 0; 241 b->row = isrow; 242 b->col = iscol; 243 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 244 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 245 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 246 b->icol = isicol; 247 ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 248 ierr = PetscLogObjectMemory(B,(bi[2*n+1])*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr); 249 250 b->maxnz = b->nz = bi[2*n+1]; 251 B->factor = MAT_FACTOR_LU; 252 B->info.factor_mallocs = reallocs; 253 B->info.fill_ratio_given = f; 254 255 if (ai[n] != 0) { 256 B->info.fill_ratio_needed = ((PetscReal)bi[2*n+1])/((PetscReal)ai[n]); 257 } else { 258 B->info.fill_ratio_needed = 0.0; 259 } 260 261 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 262 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 263 both_identity = (PetscTruth) (row_identity && col_identity); 264 if(both_identity){ 265 switch(bs){ 266 case 2: 267 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct; 268 B->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering_newdatastruct; 269 break; 270 case 3: 271 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_newdatastruct; 272 B->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct; 273 break; 274 case 4: 275 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_newdatastruct; 276 B->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering_newdatastruct; 277 break; 278 case 5: 279 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_newdatastruct; 280 B->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering_newdatastruct; 281 break; 282 case 6: 283 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_newdatastruct; 284 B->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering_newdatastruct; 285 break; 286 case 7: 287 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_newdatastruct; 288 B->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering_newdatastruct; 289 break; 290 default: 291 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct; 292 B->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct; 293 } 294 } 295 else { 296 switch(bs){ 297 case 2: 298 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_newdatastruct; 299 B->ops->solve = MatSolve_SeqBAIJ_2_newdatastruct; 300 break; 301 case 3: 302 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_newdatastruct; 303 B->ops->solve = MatSolve_SeqBAIJ_3_newdatastruct; 304 break; 305 case 4: 306 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_newdatastruct; 307 B->ops->solve = MatSolve_SeqBAIJ_4_newdatastruct; 308 break; 309 case 5: 310 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_newdatastruct; 311 B->ops->solve = MatSolve_SeqBAIJ_5_newdatastruct; 312 break; 313 case 6: 314 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_newdatastruct; 315 B->ops->solve = MatSolve_SeqBAIJ_6_newdatastruct; 316 break; 317 case 7: 318 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_newdatastruct; 319 B->ops->solve = MatSolve_SeqBAIJ_7_newdatastruct; 320 break; 321 default: 322 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct; 323 B->ops->solve = MatSolve_SeqBAIJ_N_newdatastruct; 324 } 325 } 326 /* ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); */ 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ_newdatastruct_v2" 332 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_newdatastruct_v2(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 333 { 334 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 335 PetscInt n=a->mbs,bs = A->rmap->bs,bs2=a->bs2; 336 PetscTruth row_identity,col_identity,both_identity; 337 IS isicol; 338 PetscErrorCode ierr; 339 const PetscInt *r,*ic; 340 PetscInt i,*ai=a->i,*aj=a->j; 341 PetscInt *bi,*bj,*ajtmp; 342 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 343 PetscReal f; 344 PetscInt nlnk,*lnk,k,**bi_ptr; 345 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 346 PetscBT lnkbt; 347 348 PetscFunctionBegin; 349 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 350 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 351 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 352 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 353 354 /* get new row pointers */ 355 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 356 bi[0] = 0; 357 358 /* bdiag is location of diagonal in factor */ 359 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 360 bdiag[0] = 0; 361 362 /* linked list for storing column indices of the active row */ 363 nlnk = n + 1; 364 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 365 366 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 367 368 /* initial FreeSpace size is f*(ai[n]+1) */ 369 f = info->fill; 370 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 371 current_space = free_space; 372 373 for (i=0; i<n; i++) { 374 /* copy previous fill into linked list */ 375 nzi = 0; 376 nnz = ai[r[i]+1] - ai[r[i]]; 377 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 378 ajtmp = aj + ai[r[i]]; 379 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 380 nzi += nlnk; 381 382 /* add pivot rows into linked list */ 383 row = lnk[n]; 384 while (row < i) { 385 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 386 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 387 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 388 nzi += nlnk; 389 row = lnk[row]; 390 } 391 bi[i+1] = bi[i] + nzi; 392 im[i] = nzi; 393 394 /* mark bdiag */ 395 nzbd = 0; 396 nnz = nzi; 397 k = lnk[n]; 398 while (nnz-- && k < i){ 399 nzbd++; 400 k = lnk[k]; 401 } 402 bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU_v2() */ 403 404 /* if free space is not available, make more free space */ 405 if (current_space->local_remaining<nzi) { 406 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 407 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 408 reallocs++; 409 } 410 411 /* copy data into free space, then initialize lnk */ 412 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 413 bi_ptr[i] = current_space->array; 414 current_space->array += nzi; 415 current_space->local_used += nzi; 416 current_space->local_remaining -= nzi; 417 } 418 #if defined(PETSC_USE_INFO) 419 if (ai[n] != 0) { 420 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 421 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 422 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 423 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 424 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 425 } else { 426 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 427 } 428 #endif 429 430 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 431 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 432 433 /* destroy list of free space and other temporary array(s) */ 434 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 435 ierr = PetscFreeSpaceContiguous_LU_v2(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 436 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 437 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 438 439 /* put together the new matrix */ 440 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 441 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 442 b = (Mat_SeqBAIJ*)(B)->data; 443 b->free_a = PETSC_TRUE; 444 b->free_ij = PETSC_TRUE; 445 b->singlemalloc = PETSC_FALSE; 446 ierr = PetscMalloc((bdiag[0]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 447 b->j = bj; 448 b->i = bi; 449 b->diag = bdiag; 450 b->free_diag = PETSC_TRUE; 451 b->ilen = 0; 452 b->imax = 0; 453 b->row = isrow; 454 b->col = iscol; 455 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 456 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 457 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 458 b->icol = isicol; 459 ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 460 ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr); 461 462 b->maxnz = b->nz = bdiag[0]+1; 463 B->factor = MAT_FACTOR_LU; 464 B->info.factor_mallocs = reallocs; 465 B->info.fill_ratio_given = f; 466 467 if (ai[n] != 0) { 468 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 469 } else { 470 B->info.fill_ratio_needed = 0.0; 471 } 472 473 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 474 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 475 both_identity = (PetscTruth) (row_identity && col_identity); 476 if(both_identity){ 477 switch(bs){ 478 case 2: 479 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct_v2; 480 B->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering_newdatastruct_v2; 481 break; 482 case 3: 483 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_newdatastruct_v2; 484 B->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct_v2; 485 break; 486 case 4: 487 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_newdatastruct_v2; 488 B->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering_newdatastruct_v2; 489 break; 490 case 5: 491 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_newdatastruct_v2; 492 B->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering_newdatastruct_v2; 493 break; 494 case 6: 495 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_newdatastruct_v2; 496 B->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering_newdatastruct_v2; 497 break; 498 case 7: 499 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_newdatastruct_v2; 500 B->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering_newdatastruct_v2; 501 break; 502 default: 503 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct_v2; 504 B->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct_v2; 505 } 506 } 507 else { 508 switch(bs){ 509 case 2: 510 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_newdatastruct_v2; 511 B->ops->solve = MatSolve_SeqBAIJ_2_newdatastruct_v2; 512 break; 513 case 3: 514 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_newdatastruct_v2; 515 B->ops->solve = MatSolve_SeqBAIJ_3_newdatastruct_v2; 516 break; 517 case 4: 518 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_newdatastruct_v2; 519 B->ops->solve = MatSolve_SeqBAIJ_4_newdatastruct_v2; 520 break; 521 case 5: 522 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_newdatastruct_v2; 523 B->ops->solve = MatSolve_SeqBAIJ_5_newdatastruct_v2; 524 break; 525 case 6: 526 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_newdatastruct_v2; 527 B->ops->solve = MatSolve_SeqBAIJ_6_newdatastruct_v2; 528 break; 529 case 7: 530 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_newdatastruct; 531 B->ops->solve = MatSolve_SeqBAIJ_7_newdatastruct; 532 break; 533 default: 534 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct; 535 B->ops->solve = MatSolve_SeqBAIJ_N_newdatastruct; 536 } 537 } 538 /* ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); */ 539 PetscFunctionReturn(0); 540 } 541 542 #undef __FUNCT__ 543 #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ" 544 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 545 { 546 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 547 PetscInt n=a->mbs,bs = A->rmap->bs,bs2=a->bs2; 548 PetscTruth row_identity,col_identity,both_identity; 549 IS isicol; 550 PetscErrorCode ierr; 551 const PetscInt *r,*ic; 552 PetscInt i,*ai=a->i,*aj=a->j; 553 PetscInt *bi,*bj,*ajtmp; 554 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 555 PetscReal f; 556 PetscInt nlnk,*lnk,k,**bi_ptr; 557 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 558 PetscBT lnkbt; 559 PetscTruth newdatastruct=PETSC_FALSE; 560 PetscTruth newdatastruct_v2=PETSC_FALSE; 561 562 PetscFunctionBegin; 563 ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr); 564 if(newdatastruct){ 565 ierr = MatLUFactorSymbolic_SeqBAIJ_newdatastruct(B,A,isrow,iscol,info);CHKERRQ(ierr); 566 PetscFunctionReturn(0); 567 } 568 569 ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_new_v2",&newdatastruct_v2,PETSC_NULL);CHKERRQ(ierr); 570 if(newdatastruct_v2){ 571 ierr = MatLUFactorSymbolic_SeqBAIJ_newdatastruct_v2(B,A,isrow,iscol,info);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574 575 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 576 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 577 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 578 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 579 580 /* get new row pointers */ 581 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 582 bi[0] = 0; 583 584 /* bdiag is location of diagonal in factor */ 585 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 586 bdiag[0] = 0; 587 588 /* linked list for storing column indices of the active row */ 589 nlnk = n + 1; 590 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 591 592 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 593 594 /* initial FreeSpace size is f*(ai[n]+1) */ 595 f = info->fill; 596 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 597 current_space = free_space; 598 599 for (i=0; i<n; i++) { 600 /* copy previous fill into linked list */ 601 nzi = 0; 602 nnz = ai[r[i]+1] - ai[r[i]]; 603 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 604 ajtmp = aj + ai[r[i]]; 605 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 606 nzi += nlnk; 607 608 /* add pivot rows into linked list */ 609 row = lnk[n]; 610 while (row < i) { 611 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 612 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 613 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 614 nzi += nlnk; 615 row = lnk[row]; 616 } 617 bi[i+1] = bi[i] + nzi; 618 im[i] = nzi; 619 620 /* mark bdiag */ 621 nzbd = 0; 622 nnz = nzi; 623 k = lnk[n]; 624 while (nnz-- && k < i){ 625 nzbd++; 626 k = lnk[k]; 627 } 628 bdiag[i] = bi[i] + nzbd; 629 630 /* if free space is not available, make more free space */ 631 if (current_space->local_remaining<nzi) { 632 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 633 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 634 reallocs++; 635 } 636 637 /* copy data into free space, then initialize lnk */ 638 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 639 bi_ptr[i] = current_space->array; 640 current_space->array += nzi; 641 current_space->local_used += nzi; 642 current_space->local_remaining -= nzi; 643 } 644 #if defined(PETSC_USE_INFO) 645 if (ai[n] != 0) { 646 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 647 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 648 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 649 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 650 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 651 } else { 652 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 653 } 654 #endif 655 656 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 657 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 658 659 /* destroy list of free space and other temporary array(s) */ 660 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 661 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 662 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 663 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 664 665 /* put together the new matrix */ 666 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 667 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 668 b = (Mat_SeqBAIJ*)(B)->data; 669 b->free_a = PETSC_TRUE; 670 b->free_ij = PETSC_TRUE; 671 b->singlemalloc = PETSC_FALSE; 672 ierr = PetscMalloc((bi[n]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 673 b->j = bj; 674 b->i = bi; 675 b->diag = bdiag; 676 b->free_diag = PETSC_TRUE; 677 b->ilen = 0; 678 b->imax = 0; 679 b->row = isrow; 680 b->col = iscol; 681 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 682 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 683 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 684 b->icol = isicol; 685 ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 686 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr); 687 688 b->maxnz = b->nz = bi[n] ; 689 (B)->factor = MAT_FACTOR_LU; 690 (B)->info.factor_mallocs = reallocs; 691 (B)->info.fill_ratio_given = f; 692 693 if (ai[n] != 0) { 694 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 695 } else { 696 (B)->info.fill_ratio_needed = 0.0; 697 } 698 699 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 700 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 701 both_identity = (PetscTruth) (row_identity && col_identity); 702 ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); 703 PetscFunctionReturn(0); 704 } 705 706