1be1d678aSKris Buschelman #define PETSCMAT_DLL 2be1d678aSKris Buschelman 383287d42SBarry Smith /* 483287d42SBarry Smith Factorization code for BAIJ format. 583287d42SBarry Smith */ 67c4f633dSBarry Smith #include "../src/mat/impls/baij/seq/baij.h" 7c60f0209SBarry Smith #include "../src/mat/blockinvert.h" 883287d42SBarry Smith 9db4efbfdSBarry Smith #undef __FUNCT__ 10db4efbfdSBarry Smith #define __FUNCT__ "MatSeqBAIJSetNumericFactorization" 11db4efbfdSBarry Smith /* 12db4efbfdSBarry Smith This is used to set the numeric factorization for both LU and ILU symbolic factorization 13db4efbfdSBarry Smith */ 14db4efbfdSBarry Smith PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat inA,PetscTruth natural) 15db4efbfdSBarry Smith { 16db4efbfdSBarry Smith PetscFunctionBegin; 17db4efbfdSBarry Smith if (natural) { 18db4efbfdSBarry Smith switch (inA->rmap->bs) { 19db4efbfdSBarry Smith case 1: 20db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 21db4efbfdSBarry Smith break; 22db4efbfdSBarry Smith case 2: 23db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 24db4efbfdSBarry Smith break; 25db4efbfdSBarry Smith case 3: 26db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 27db4efbfdSBarry Smith break; 28db4efbfdSBarry Smith case 4: 29db4efbfdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 30db4efbfdSBarry Smith { 31db4efbfdSBarry Smith PetscTruth sse_enabled_local; 32db4efbfdSBarry Smith PetscErrorCode ierr; 33db4efbfdSBarry Smith ierr = PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);CHKERRQ(ierr); 34db4efbfdSBarry Smith if (sse_enabled_local) { 35db4efbfdSBarry Smith # if defined(PETSC_HAVE_SSE) 36db4efbfdSBarry Smith int i,*AJ=a->j,nz=a->nz,n=a->mbs; 37db4efbfdSBarry Smith if (n==(unsigned short)n) { 38db4efbfdSBarry Smith unsigned short *aj=(unsigned short *)AJ; 39db4efbfdSBarry Smith for (i=0;i<nz;i++) { 40db4efbfdSBarry Smith aj[i] = (unsigned short)AJ[i]; 41db4efbfdSBarry Smith } 42db4efbfdSBarry Smith inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj; 43db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj; 44db4efbfdSBarry Smith ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");CHKERRQ(ierr); 45db4efbfdSBarry Smith } else { 46db4efbfdSBarry Smith /* Scale the column indices for easier indexing in MatSolve. */ 47db4efbfdSBarry Smith /* for (i=0;i<nz;i++) { */ 48db4efbfdSBarry Smith /* AJ[i] = AJ[i]*4; */ 49db4efbfdSBarry Smith /* } */ 50db4efbfdSBarry Smith inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE; 51db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 52db4efbfdSBarry Smith ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");CHKERRQ(ierr); 53db4efbfdSBarry Smith } 54db4efbfdSBarry Smith # else 55db4efbfdSBarry Smith /* This should never be reached. If so, problem in PetscSSEIsEnabled. */ 56db4efbfdSBarry Smith SETERRQ(PETSC_ERR_SUP,"SSE Hardware unavailable"); 57db4efbfdSBarry Smith # endif 58db4efbfdSBarry Smith } else { 59db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 60db4efbfdSBarry Smith } 61db4efbfdSBarry Smith } 62db4efbfdSBarry Smith #else 63db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 64db4efbfdSBarry Smith #endif 65db4efbfdSBarry Smith break; 66db4efbfdSBarry Smith case 5: 67db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 68db4efbfdSBarry Smith break; 69db4efbfdSBarry Smith case 6: 70db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 71db4efbfdSBarry Smith break; 72db4efbfdSBarry Smith case 7: 73db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 74db4efbfdSBarry Smith break; 75db4efbfdSBarry Smith default: 76db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 77db4efbfdSBarry Smith break; 78db4efbfdSBarry Smith } 79db4efbfdSBarry Smith } else { 80db4efbfdSBarry Smith switch (inA->rmap->bs) { 81db4efbfdSBarry Smith case 1: 82db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 83db4efbfdSBarry Smith break; 84db4efbfdSBarry Smith case 2: 85db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 86db4efbfdSBarry Smith break; 87db4efbfdSBarry Smith case 3: 88db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 89db4efbfdSBarry Smith break; 90db4efbfdSBarry Smith case 4: 91db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 92db4efbfdSBarry Smith break; 93db4efbfdSBarry Smith case 5: 94db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 95db4efbfdSBarry Smith break; 96db4efbfdSBarry Smith case 6: 97db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 98db4efbfdSBarry Smith break; 99db4efbfdSBarry Smith case 7: 100db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 101db4efbfdSBarry Smith break; 102db4efbfdSBarry Smith default: 103db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 104db4efbfdSBarry Smith break; 105db4efbfdSBarry Smith } 106db4efbfdSBarry Smith } 107db4efbfdSBarry Smith PetscFunctionReturn(0); 108db4efbfdSBarry Smith } 109db4efbfdSBarry Smith 11083287d42SBarry Smith /* 11183287d42SBarry Smith The symbolic factorization code is identical to that for AIJ format, 11283287d42SBarry Smith except for very small changes since this is now a SeqBAIJ datastructure. 11383287d42SBarry Smith NOT good code reuse. 11483287d42SBarry Smith */ 115c7272abeSHong Zhang #include "petscbt.h" 116c7272abeSHong Zhang #include "../src/mat/utils/freespace.h" 117c7272abeSHong Zhang 1184a2ae208SSatish Balay #undef __FUNCT__ 119faca2338SShri Abhyankar #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ_newdatastruct" 120faca2338SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_newdatastruct(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 12183287d42SBarry Smith { 12283287d42SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 123c7272abeSHong Zhang PetscInt n=a->mbs,bs = A->rmap->bs,bs2=a->bs2; 124c7272abeSHong Zhang PetscTruth row_identity,col_identity,both_identity; 12583287d42SBarry Smith IS isicol; 1266849ba73SBarry Smith PetscErrorCode ierr; 127c7272abeSHong Zhang const PetscInt *r,*ic; 128c7272abeSHong Zhang PetscInt i,*ai=a->i,*aj=a->j; 129c7272abeSHong Zhang PetscInt *bi,*bj,*ajtmp; 130c7272abeSHong Zhang PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 131c7272abeSHong Zhang PetscReal f; 132c7272abeSHong Zhang PetscInt nlnk,*lnk,k,**bi_ptr; 133c7272abeSHong Zhang PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 134c7272abeSHong Zhang PetscBT lnkbt; 13583287d42SBarry Smith 13683287d42SBarry Smith PetscFunctionBegin; 137d0f46423SBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 13883287d42SBarry Smith ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 13983287d42SBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 14083287d42SBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 14183287d42SBarry Smith 14283287d42SBarry Smith /* get new row pointers */ 143faca2338SShri Abhyankar ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 144faca2338SShri Abhyankar bi[0] = 0; 145faca2338SShri Abhyankar 146faca2338SShri Abhyankar /* bdiag is location of diagonal in factor */ 147faca2338SShri Abhyankar ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 148faca2338SShri Abhyankar bdiag[0] = 0; 149faca2338SShri Abhyankar 150faca2338SShri Abhyankar /* linked list for storing column indices of the active row */ 151faca2338SShri Abhyankar nlnk = n + 1; 152faca2338SShri Abhyankar ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 153faca2338SShri Abhyankar 154faca2338SShri Abhyankar ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 155faca2338SShri Abhyankar 156faca2338SShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 157faca2338SShri Abhyankar f = info->fill; 158faca2338SShri Abhyankar ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 159faca2338SShri Abhyankar current_space = free_space; 160faca2338SShri Abhyankar 161faca2338SShri Abhyankar for (i=0; i<n; i++) { 162faca2338SShri Abhyankar /* copy previous fill into linked list */ 163faca2338SShri Abhyankar nzi = 0; 164faca2338SShri Abhyankar nnz = ai[r[i]+1] - ai[r[i]]; 165faca2338SShri Abhyankar if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 166faca2338SShri Abhyankar ajtmp = aj + ai[r[i]]; 167faca2338SShri Abhyankar ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 168faca2338SShri Abhyankar nzi += nlnk; 169faca2338SShri Abhyankar 170faca2338SShri Abhyankar /* add pivot rows into linked list */ 171faca2338SShri Abhyankar row = lnk[n]; 172faca2338SShri Abhyankar while (row < i) { 1738bfb2e2bSShri Abhyankar nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 174faca2338SShri Abhyankar ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 175faca2338SShri Abhyankar ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 176faca2338SShri Abhyankar nzi += nlnk; 177faca2338SShri Abhyankar row = lnk[row]; 178faca2338SShri Abhyankar } 179faca2338SShri Abhyankar bi[i+1] = bi[i] + nzi; 180faca2338SShri Abhyankar im[i] = nzi; 181faca2338SShri Abhyankar 182faca2338SShri Abhyankar /* mark bdiag */ 183faca2338SShri Abhyankar nzbd = 0; 184faca2338SShri Abhyankar nnz = nzi; 185faca2338SShri Abhyankar k = lnk[n]; 186faca2338SShri Abhyankar while (nnz-- && k < i){ 187faca2338SShri Abhyankar nzbd++; 188faca2338SShri Abhyankar k = lnk[k]; 189faca2338SShri Abhyankar } 190783ef271SHong Zhang bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 191faca2338SShri Abhyankar 192faca2338SShri Abhyankar /* if free space is not available, make more free space */ 193faca2338SShri Abhyankar if (current_space->local_remaining<nzi) { 194faca2338SShri Abhyankar nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 195faca2338SShri Abhyankar ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 196faca2338SShri Abhyankar reallocs++; 197faca2338SShri Abhyankar } 198faca2338SShri Abhyankar 199faca2338SShri Abhyankar /* copy data into free space, then initialize lnk */ 200faca2338SShri Abhyankar ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 201faca2338SShri Abhyankar bi_ptr[i] = current_space->array; 202faca2338SShri Abhyankar current_space->array += nzi; 203faca2338SShri Abhyankar current_space->local_used += nzi; 204faca2338SShri Abhyankar current_space->local_remaining -= nzi; 205faca2338SShri Abhyankar } 206faca2338SShri Abhyankar #if defined(PETSC_USE_INFO) 207faca2338SShri Abhyankar if (ai[n] != 0) { 208faca2338SShri Abhyankar PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 209faca2338SShri Abhyankar ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 210faca2338SShri Abhyankar ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 211faca2338SShri Abhyankar ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 212faca2338SShri Abhyankar ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 213faca2338SShri Abhyankar } else { 214faca2338SShri Abhyankar ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 215faca2338SShri Abhyankar } 216faca2338SShri Abhyankar #endif 217faca2338SShri Abhyankar 218faca2338SShri Abhyankar ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 219faca2338SShri Abhyankar ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 220faca2338SShri Abhyankar 221faca2338SShri Abhyankar /* destroy list of free space and other temporary array(s) */ 222faca2338SShri Abhyankar ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 223783ef271SHong Zhang ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 224faca2338SShri Abhyankar ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 225faca2338SShri Abhyankar ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 226faca2338SShri Abhyankar 227faca2338SShri Abhyankar /* put together the new matrix */ 228faca2338SShri Abhyankar ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 229faca2338SShri Abhyankar ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 230faca2338SShri Abhyankar b = (Mat_SeqBAIJ*)(B)->data; 231faca2338SShri Abhyankar b->free_a = PETSC_TRUE; 232faca2338SShri Abhyankar b->free_ij = PETSC_TRUE; 233faca2338SShri Abhyankar b->singlemalloc = PETSC_FALSE; 234faca2338SShri Abhyankar ierr = PetscMalloc((bi[2*n+1])*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 235faca2338SShri Abhyankar b->j = bj; 236faca2338SShri Abhyankar b->i = bi; 237faca2338SShri Abhyankar b->diag = bdiag; 2387f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 239faca2338SShri Abhyankar b->ilen = 0; 240faca2338SShri Abhyankar b->imax = 0; 241faca2338SShri Abhyankar b->row = isrow; 242faca2338SShri Abhyankar b->col = iscol; 243faca2338SShri Abhyankar b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 244faca2338SShri Abhyankar ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 245faca2338SShri Abhyankar ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 246faca2338SShri Abhyankar b->icol = isicol; 247faca2338SShri Abhyankar ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 248faca2338SShri Abhyankar ierr = PetscLogObjectMemory(B,(bi[2*n+1])*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr); 249faca2338SShri Abhyankar 250faca2338SShri Abhyankar b->maxnz = b->nz = bi[2*n+1]; 2514c000e2eSHong Zhang B->factor = MAT_FACTOR_LU; 2524c000e2eSHong Zhang B->info.factor_mallocs = reallocs; 2534c000e2eSHong Zhang B->info.fill_ratio_given = f; 254faca2338SShri Abhyankar 255faca2338SShri Abhyankar if (ai[n] != 0) { 2564c000e2eSHong Zhang B->info.fill_ratio_needed = ((PetscReal)bi[2*n+1])/((PetscReal)ai[n]); 257faca2338SShri Abhyankar } else { 2584c000e2eSHong Zhang B->info.fill_ratio_needed = 0.0; 259faca2338SShri Abhyankar } 260b588c5a2SHong Zhang 261faca2338SShri Abhyankar ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 262faca2338SShri Abhyankar ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 263faca2338SShri Abhyankar both_identity = (PetscTruth) (row_identity && col_identity); 2648bfb2e2bSShri Abhyankar if(both_identity){ 2658bfb2e2bSShri Abhyankar switch(bs){ 2668bfb2e2bSShri Abhyankar case 2: 2674c000e2eSHong Zhang B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct; 2684c000e2eSHong Zhang B->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering_newdatastruct; 2698bfb2e2bSShri Abhyankar break; 2708bfb2e2bSShri Abhyankar case 3: 271359bf53fSShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_newdatastruct; 2724c000e2eSHong Zhang B->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct; 2738bfb2e2bSShri Abhyankar break; 2748bfb2e2bSShri Abhyankar case 4: 275209027a4SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_newdatastruct; 2764c000e2eSHong Zhang B->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering_newdatastruct; 2778bfb2e2bSShri Abhyankar break; 2788bfb2e2bSShri Abhyankar case 5: 2790deeaf61SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_newdatastruct; 2804c000e2eSHong Zhang B->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering_newdatastruct; 2818bfb2e2bSShri Abhyankar break; 2828bfb2e2bSShri Abhyankar case 6: 283bef36659SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_newdatastruct; 2844c000e2eSHong Zhang B->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering_newdatastruct; 2858bfb2e2bSShri Abhyankar break; 2868bfb2e2bSShri Abhyankar case 7: 287bef36659SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_newdatastruct; 2884c000e2eSHong Zhang B->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering_newdatastruct; 2898bfb2e2bSShri Abhyankar break; 2908bfb2e2bSShri Abhyankar default: 291bef36659SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct; 2924c000e2eSHong Zhang B->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct; 2938bfb2e2bSShri Abhyankar } 2948bfb2e2bSShri Abhyankar } 2958bfb2e2bSShri Abhyankar else { 2968bfb2e2bSShri Abhyankar switch(bs){ 2978bfb2e2bSShri Abhyankar case 2: 2984c000e2eSHong Zhang B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_newdatastruct; 2994c000e2eSHong Zhang B->ops->solve = MatSolve_SeqBAIJ_2_newdatastruct; 3008bfb2e2bSShri Abhyankar break; 3018bfb2e2bSShri Abhyankar case 3: 30217542729SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_newdatastruct; 3034c000e2eSHong Zhang B->ops->solve = MatSolve_SeqBAIJ_3_newdatastruct; 3048bfb2e2bSShri Abhyankar break; 3058bfb2e2bSShri Abhyankar case 4: 306209027a4SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_newdatastruct; 3074c000e2eSHong Zhang B->ops->solve = MatSolve_SeqBAIJ_4_newdatastruct; 3088bfb2e2bSShri Abhyankar break; 3098bfb2e2bSShri Abhyankar case 5: 3100deeaf61SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_newdatastruct; 3114c000e2eSHong Zhang B->ops->solve = MatSolve_SeqBAIJ_5_newdatastruct; 3128bfb2e2bSShri Abhyankar break; 3138bfb2e2bSShri Abhyankar case 6: 314bef36659SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_newdatastruct; 3154c000e2eSHong Zhang B->ops->solve = MatSolve_SeqBAIJ_6_newdatastruct; 3168bfb2e2bSShri Abhyankar break; 3178bfb2e2bSShri Abhyankar case 7: 318bef36659SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_newdatastruct; 3194c000e2eSHong Zhang B->ops->solve = MatSolve_SeqBAIJ_7_newdatastruct; 3208bfb2e2bSShri Abhyankar break; 3218bfb2e2bSShri Abhyankar default: 322bef36659SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct; 3234c000e2eSHong Zhang B->ops->solve = MatSolve_SeqBAIJ_N_newdatastruct; 3248bfb2e2bSShri Abhyankar } 3258bfb2e2bSShri Abhyankar } 326b588c5a2SHong Zhang /* ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); */ 327faca2338SShri Abhyankar PetscFunctionReturn(0); 328faca2338SShri Abhyankar } 329faca2338SShri Abhyankar 330faca2338SShri Abhyankar #undef __FUNCT__ 331*b2b2dd24SShri Abhyankar #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ_newdatastruct_v2" 332*b2b2dd24SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_newdatastruct_v2(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 333*b2b2dd24SShri Abhyankar { 334*b2b2dd24SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 335*b2b2dd24SShri Abhyankar PetscInt n=a->mbs,bs = A->rmap->bs,bs2=a->bs2; 336*b2b2dd24SShri Abhyankar PetscTruth row_identity,col_identity,both_identity; 337*b2b2dd24SShri Abhyankar IS isicol; 338*b2b2dd24SShri Abhyankar PetscErrorCode ierr; 339*b2b2dd24SShri Abhyankar const PetscInt *r,*ic; 340*b2b2dd24SShri Abhyankar PetscInt i,*ai=a->i,*aj=a->j; 341*b2b2dd24SShri Abhyankar PetscInt *bi,*bj,*ajtmp; 342*b2b2dd24SShri Abhyankar PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 343*b2b2dd24SShri Abhyankar PetscReal f; 344*b2b2dd24SShri Abhyankar PetscInt nlnk,*lnk,k,**bi_ptr; 345*b2b2dd24SShri Abhyankar PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 346*b2b2dd24SShri Abhyankar PetscBT lnkbt; 347*b2b2dd24SShri Abhyankar 348*b2b2dd24SShri Abhyankar PetscFunctionBegin; 349*b2b2dd24SShri Abhyankar if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 350*b2b2dd24SShri Abhyankar ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 351*b2b2dd24SShri Abhyankar ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 352*b2b2dd24SShri Abhyankar ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 353*b2b2dd24SShri Abhyankar 354*b2b2dd24SShri Abhyankar /* get new row pointers */ 355*b2b2dd24SShri Abhyankar ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 356*b2b2dd24SShri Abhyankar bi[0] = 0; 357*b2b2dd24SShri Abhyankar 358*b2b2dd24SShri Abhyankar /* bdiag is location of diagonal in factor */ 359*b2b2dd24SShri Abhyankar ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 360*b2b2dd24SShri Abhyankar bdiag[0] = 0; 361*b2b2dd24SShri Abhyankar 362*b2b2dd24SShri Abhyankar /* linked list for storing column indices of the active row */ 363*b2b2dd24SShri Abhyankar nlnk = n + 1; 364*b2b2dd24SShri Abhyankar ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 365*b2b2dd24SShri Abhyankar 366*b2b2dd24SShri Abhyankar ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 367*b2b2dd24SShri Abhyankar 368*b2b2dd24SShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 369*b2b2dd24SShri Abhyankar f = info->fill; 370*b2b2dd24SShri Abhyankar ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 371*b2b2dd24SShri Abhyankar current_space = free_space; 372*b2b2dd24SShri Abhyankar 373*b2b2dd24SShri Abhyankar for (i=0; i<n; i++) { 374*b2b2dd24SShri Abhyankar /* copy previous fill into linked list */ 375*b2b2dd24SShri Abhyankar nzi = 0; 376*b2b2dd24SShri Abhyankar nnz = ai[r[i]+1] - ai[r[i]]; 377*b2b2dd24SShri Abhyankar if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 378*b2b2dd24SShri Abhyankar ajtmp = aj + ai[r[i]]; 379*b2b2dd24SShri Abhyankar ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 380*b2b2dd24SShri Abhyankar nzi += nlnk; 381*b2b2dd24SShri Abhyankar 382*b2b2dd24SShri Abhyankar /* add pivot rows into linked list */ 383*b2b2dd24SShri Abhyankar row = lnk[n]; 384*b2b2dd24SShri Abhyankar while (row < i) { 385*b2b2dd24SShri Abhyankar nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 386*b2b2dd24SShri Abhyankar ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 387*b2b2dd24SShri Abhyankar ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 388*b2b2dd24SShri Abhyankar nzi += nlnk; 389*b2b2dd24SShri Abhyankar row = lnk[row]; 390*b2b2dd24SShri Abhyankar } 391*b2b2dd24SShri Abhyankar bi[i+1] = bi[i] + nzi; 392*b2b2dd24SShri Abhyankar im[i] = nzi; 393*b2b2dd24SShri Abhyankar 394*b2b2dd24SShri Abhyankar /* mark bdiag */ 395*b2b2dd24SShri Abhyankar nzbd = 0; 396*b2b2dd24SShri Abhyankar nnz = nzi; 397*b2b2dd24SShri Abhyankar k = lnk[n]; 398*b2b2dd24SShri Abhyankar while (nnz-- && k < i){ 399*b2b2dd24SShri Abhyankar nzbd++; 400*b2b2dd24SShri Abhyankar k = lnk[k]; 401*b2b2dd24SShri Abhyankar } 402*b2b2dd24SShri Abhyankar bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU_v2() */ 403*b2b2dd24SShri Abhyankar 404*b2b2dd24SShri Abhyankar /* if free space is not available, make more free space */ 405*b2b2dd24SShri Abhyankar if (current_space->local_remaining<nzi) { 406*b2b2dd24SShri Abhyankar nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 407*b2b2dd24SShri Abhyankar ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 408*b2b2dd24SShri Abhyankar reallocs++; 409*b2b2dd24SShri Abhyankar } 410*b2b2dd24SShri Abhyankar 411*b2b2dd24SShri Abhyankar /* copy data into free space, then initialize lnk */ 412*b2b2dd24SShri Abhyankar ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 413*b2b2dd24SShri Abhyankar bi_ptr[i] = current_space->array; 414*b2b2dd24SShri Abhyankar current_space->array += nzi; 415*b2b2dd24SShri Abhyankar current_space->local_used += nzi; 416*b2b2dd24SShri Abhyankar current_space->local_remaining -= nzi; 417*b2b2dd24SShri Abhyankar } 418*b2b2dd24SShri Abhyankar #if defined(PETSC_USE_INFO) 419*b2b2dd24SShri Abhyankar if (ai[n] != 0) { 420*b2b2dd24SShri Abhyankar PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 421*b2b2dd24SShri Abhyankar ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 422*b2b2dd24SShri Abhyankar ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 423*b2b2dd24SShri Abhyankar ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 424*b2b2dd24SShri Abhyankar ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 425*b2b2dd24SShri Abhyankar } else { 426*b2b2dd24SShri Abhyankar ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 427*b2b2dd24SShri Abhyankar } 428*b2b2dd24SShri Abhyankar #endif 429*b2b2dd24SShri Abhyankar 430*b2b2dd24SShri Abhyankar ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 431*b2b2dd24SShri Abhyankar ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 432*b2b2dd24SShri Abhyankar 433*b2b2dd24SShri Abhyankar /* destroy list of free space and other temporary array(s) */ 434*b2b2dd24SShri Abhyankar ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 435*b2b2dd24SShri Abhyankar ierr = PetscFreeSpaceContiguous_LU_v2(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 436*b2b2dd24SShri Abhyankar ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 437*b2b2dd24SShri Abhyankar ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 438*b2b2dd24SShri Abhyankar 439*b2b2dd24SShri Abhyankar /* put together the new matrix */ 440*b2b2dd24SShri Abhyankar ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 441*b2b2dd24SShri Abhyankar ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 442*b2b2dd24SShri Abhyankar b = (Mat_SeqBAIJ*)(B)->data; 443*b2b2dd24SShri Abhyankar b->free_a = PETSC_TRUE; 444*b2b2dd24SShri Abhyankar b->free_ij = PETSC_TRUE; 445*b2b2dd24SShri Abhyankar b->singlemalloc = PETSC_FALSE; 446*b2b2dd24SShri Abhyankar ierr = PetscMalloc((bdiag[0]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 447*b2b2dd24SShri Abhyankar b->j = bj; 448*b2b2dd24SShri Abhyankar b->i = bi; 449*b2b2dd24SShri Abhyankar b->diag = bdiag; 450*b2b2dd24SShri Abhyankar b->free_diag = PETSC_TRUE; 451*b2b2dd24SShri Abhyankar b->ilen = 0; 452*b2b2dd24SShri Abhyankar b->imax = 0; 453*b2b2dd24SShri Abhyankar b->row = isrow; 454*b2b2dd24SShri Abhyankar b->col = iscol; 455*b2b2dd24SShri Abhyankar b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 456*b2b2dd24SShri Abhyankar ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 457*b2b2dd24SShri Abhyankar ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 458*b2b2dd24SShri Abhyankar b->icol = isicol; 459*b2b2dd24SShri Abhyankar ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 460*b2b2dd24SShri Abhyankar ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr); 461*b2b2dd24SShri Abhyankar 462*b2b2dd24SShri Abhyankar b->maxnz = b->nz = bdiag[0]+1; 463*b2b2dd24SShri Abhyankar B->factor = MAT_FACTOR_LU; 464*b2b2dd24SShri Abhyankar B->info.factor_mallocs = reallocs; 465*b2b2dd24SShri Abhyankar B->info.fill_ratio_given = f; 466*b2b2dd24SShri Abhyankar 467*b2b2dd24SShri Abhyankar if (ai[n] != 0) { 468*b2b2dd24SShri Abhyankar B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 469*b2b2dd24SShri Abhyankar } else { 470*b2b2dd24SShri Abhyankar B->info.fill_ratio_needed = 0.0; 471*b2b2dd24SShri Abhyankar } 472*b2b2dd24SShri Abhyankar 473*b2b2dd24SShri Abhyankar ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 474*b2b2dd24SShri Abhyankar ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 475*b2b2dd24SShri Abhyankar both_identity = (PetscTruth) (row_identity && col_identity); 476*b2b2dd24SShri Abhyankar if(both_identity){ 477*b2b2dd24SShri Abhyankar switch(bs){ 478*b2b2dd24SShri Abhyankar case 2: 479*b2b2dd24SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct_v2; 480*b2b2dd24SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering_newdatastruct_v2; 481*b2b2dd24SShri Abhyankar break; 482*b2b2dd24SShri Abhyankar case 3: 483*b2b2dd24SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_newdatastruct_v2; 484*b2b2dd24SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct_v2; 485*b2b2dd24SShri Abhyankar break; 486*b2b2dd24SShri Abhyankar case 4: 487*b2b2dd24SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_newdatastruct_v2; 488*b2b2dd24SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering_newdatastruct_v2; 489*b2b2dd24SShri Abhyankar break; 490*b2b2dd24SShri Abhyankar case 5: 491*b2b2dd24SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_newdatastruct; 492*b2b2dd24SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering_newdatastruct; 493*b2b2dd24SShri Abhyankar break; 494*b2b2dd24SShri Abhyankar case 6: 495*b2b2dd24SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_newdatastruct; 496*b2b2dd24SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering_newdatastruct; 497*b2b2dd24SShri Abhyankar break; 498*b2b2dd24SShri Abhyankar case 7: 499*b2b2dd24SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_newdatastruct; 500*b2b2dd24SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering_newdatastruct; 501*b2b2dd24SShri Abhyankar break; 502*b2b2dd24SShri Abhyankar default: 503*b2b2dd24SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct; 504*b2b2dd24SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct; 505*b2b2dd24SShri Abhyankar } 506*b2b2dd24SShri Abhyankar } 507*b2b2dd24SShri Abhyankar else { 508*b2b2dd24SShri Abhyankar switch(bs){ 509*b2b2dd24SShri Abhyankar case 2: 510*b2b2dd24SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_newdatastruct; 511*b2b2dd24SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_2_newdatastruct; 512*b2b2dd24SShri Abhyankar break; 513*b2b2dd24SShri Abhyankar case 3: 514*b2b2dd24SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_newdatastruct; 515*b2b2dd24SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_3_newdatastruct; 516*b2b2dd24SShri Abhyankar break; 517*b2b2dd24SShri Abhyankar case 4: 518*b2b2dd24SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_newdatastruct; 519*b2b2dd24SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_4_newdatastruct; 520*b2b2dd24SShri Abhyankar break; 521*b2b2dd24SShri Abhyankar case 5: 522*b2b2dd24SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_newdatastruct; 523*b2b2dd24SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_5_newdatastruct; 524*b2b2dd24SShri Abhyankar break; 525*b2b2dd24SShri Abhyankar case 6: 526*b2b2dd24SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_newdatastruct; 527*b2b2dd24SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_6_newdatastruct; 528*b2b2dd24SShri Abhyankar break; 529*b2b2dd24SShri Abhyankar case 7: 530*b2b2dd24SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_newdatastruct; 531*b2b2dd24SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_7_newdatastruct; 532*b2b2dd24SShri Abhyankar break; 533*b2b2dd24SShri Abhyankar default: 534*b2b2dd24SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct; 535*b2b2dd24SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_N_newdatastruct; 536*b2b2dd24SShri Abhyankar } 537*b2b2dd24SShri Abhyankar } 538*b2b2dd24SShri Abhyankar /* ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); */ 539*b2b2dd24SShri Abhyankar PetscFunctionReturn(0); 540*b2b2dd24SShri Abhyankar } 541*b2b2dd24SShri Abhyankar 542*b2b2dd24SShri Abhyankar #undef __FUNCT__ 543faca2338SShri Abhyankar #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ" 544faca2338SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 545faca2338SShri Abhyankar { 546faca2338SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 547faca2338SShri Abhyankar PetscInt n=a->mbs,bs = A->rmap->bs,bs2=a->bs2; 548faca2338SShri Abhyankar PetscTruth row_identity,col_identity,both_identity; 549faca2338SShri Abhyankar IS isicol; 550faca2338SShri Abhyankar PetscErrorCode ierr; 551faca2338SShri Abhyankar const PetscInt *r,*ic; 552faca2338SShri Abhyankar PetscInt i,*ai=a->i,*aj=a->j; 553faca2338SShri Abhyankar PetscInt *bi,*bj,*ajtmp; 554faca2338SShri Abhyankar PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 555faca2338SShri Abhyankar PetscReal f; 556faca2338SShri Abhyankar PetscInt nlnk,*lnk,k,**bi_ptr; 557faca2338SShri Abhyankar PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 558faca2338SShri Abhyankar PetscBT lnkbt; 559faca2338SShri Abhyankar PetscTruth newdatastruct=PETSC_FALSE; 560*b2b2dd24SShri Abhyankar PetscTruth newdatastruct_v2=PETSC_FALSE; 561faca2338SShri Abhyankar 562faca2338SShri Abhyankar PetscFunctionBegin; 563faca2338SShri Abhyankar ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr); 564faca2338SShri Abhyankar if(newdatastruct){ 565faca2338SShri Abhyankar ierr = MatLUFactorSymbolic_SeqBAIJ_newdatastruct(B,A,isrow,iscol,info);CHKERRQ(ierr); 566faca2338SShri Abhyankar PetscFunctionReturn(0); 567faca2338SShri Abhyankar } 568faca2338SShri Abhyankar 569*b2b2dd24SShri Abhyankar ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_new_v2",&newdatastruct_v2,PETSC_NULL);CHKERRQ(ierr); 570*b2b2dd24SShri Abhyankar if(newdatastruct_v2){ 571*b2b2dd24SShri Abhyankar ierr = MatLUFactorSymbolic_SeqBAIJ_newdatastruct_v2(B,A,isrow,iscol,info);CHKERRQ(ierr); 572*b2b2dd24SShri Abhyankar PetscFunctionReturn(0); 573*b2b2dd24SShri Abhyankar } 574*b2b2dd24SShri Abhyankar 575faca2338SShri Abhyankar if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 576faca2338SShri Abhyankar ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 577faca2338SShri Abhyankar ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 578faca2338SShri Abhyankar ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 579faca2338SShri Abhyankar 580faca2338SShri Abhyankar /* get new row pointers */ 581c7272abeSHong Zhang ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 582c7272abeSHong Zhang bi[0] = 0; 583c7272abeSHong Zhang 584c7272abeSHong Zhang /* bdiag is location of diagonal in factor */ 585c7272abeSHong Zhang ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 586c7272abeSHong Zhang bdiag[0] = 0; 587c7272abeSHong Zhang 588c7272abeSHong Zhang /* linked list for storing column indices of the active row */ 589c7272abeSHong Zhang nlnk = n + 1; 590c7272abeSHong Zhang ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 591c7272abeSHong Zhang 592c7272abeSHong Zhang ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 593c7272abeSHong Zhang 594c7272abeSHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 595c7272abeSHong Zhang f = info->fill; 596c7272abeSHong Zhang ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 597c7272abeSHong Zhang current_space = free_space; 59883287d42SBarry Smith 59983287d42SBarry Smith for (i=0; i<n; i++) { 600c7272abeSHong Zhang /* copy previous fill into linked list */ 60183287d42SBarry Smith nzi = 0; 602c7272abeSHong Zhang nnz = ai[r[i]+1] - ai[r[i]]; 603c7272abeSHong Zhang if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 604c7272abeSHong Zhang ajtmp = aj + ai[r[i]]; 605c7272abeSHong Zhang ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 606c7272abeSHong Zhang nzi += nlnk; 607c7272abeSHong Zhang 608c7272abeSHong Zhang /* add pivot rows into linked list */ 609c7272abeSHong Zhang row = lnk[n]; 610c7272abeSHong Zhang while (row < i) { 611c7272abeSHong Zhang nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 612c7272abeSHong Zhang ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 613c7272abeSHong Zhang ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 614c7272abeSHong Zhang nzi += nlnk; 615c7272abeSHong Zhang row = lnk[row]; 61683287d42SBarry Smith } 617c7272abeSHong Zhang bi[i+1] = bi[i] + nzi; 618c7272abeSHong Zhang im[i] = nzi; 619c7272abeSHong Zhang 620c7272abeSHong Zhang /* mark bdiag */ 621c7272abeSHong Zhang nzbd = 0; 622c7272abeSHong Zhang nnz = nzi; 623c7272abeSHong Zhang k = lnk[n]; 624c7272abeSHong Zhang while (nnz-- && k < i){ 625c7272abeSHong Zhang nzbd++; 626c7272abeSHong Zhang k = lnk[k]; 627c7272abeSHong Zhang } 628c7272abeSHong Zhang bdiag[i] = bi[i] + nzbd; 629c7272abeSHong Zhang 630c7272abeSHong Zhang /* if free space is not available, make more free space */ 631c7272abeSHong Zhang if (current_space->local_remaining<nzi) { 632c7272abeSHong Zhang nnz = (n - i)*nzi; /* estimated and max additional space needed */ 633c7272abeSHong Zhang ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 634c7272abeSHong Zhang reallocs++; 63583287d42SBarry Smith } 63683287d42SBarry Smith 637c7272abeSHong Zhang /* copy data into free space, then initialize lnk */ 638c7272abeSHong Zhang ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 639c7272abeSHong Zhang bi_ptr[i] = current_space->array; 640c7272abeSHong Zhang current_space->array += nzi; 641c7272abeSHong Zhang current_space->local_used += nzi; 642c7272abeSHong Zhang current_space->local_remaining -= nzi; 643c7272abeSHong Zhang } 6446cf91177SBarry Smith #if defined(PETSC_USE_INFO) 64583287d42SBarry Smith if (ai[n] != 0) { 646c7272abeSHong Zhang PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 647ae15b995SBarry Smith ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 648ae15b995SBarry Smith ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 649ae15b995SBarry Smith ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 650ae15b995SBarry Smith ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 65183287d42SBarry Smith } else { 652c7272abeSHong Zhang ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 65383287d42SBarry Smith } 65463ba0a88SBarry Smith #endif 65583287d42SBarry Smith 65683287d42SBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 65783287d42SBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 65883287d42SBarry Smith 659c7272abeSHong Zhang /* destroy list of free space and other temporary array(s) */ 660c7272abeSHong Zhang ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 661c7272abeSHong Zhang ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 662c7272abeSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 663c7272abeSHong Zhang ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 66483287d42SBarry Smith 66583287d42SBarry Smith /* put together the new matrix */ 666719d5645SBarry Smith ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 667719d5645SBarry Smith ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 668719d5645SBarry Smith b = (Mat_SeqBAIJ*)(B)->data; 669e6b907acSBarry Smith b->free_a = PETSC_TRUE; 670e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 671c7272abeSHong Zhang b->singlemalloc = PETSC_FALSE; 672c7272abeSHong Zhang ierr = PetscMalloc((bi[n]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 673c7272abeSHong Zhang b->j = bj; 674c7272abeSHong Zhang b->i = bi; 675c7272abeSHong Zhang b->diag = bdiag; 6767f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 67783287d42SBarry Smith b->ilen = 0; 67883287d42SBarry Smith b->imax = 0; 67983287d42SBarry Smith b->row = isrow; 68083287d42SBarry Smith b->col = iscol; 681d3d32019SBarry Smith b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 68283287d42SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 68383287d42SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 68483287d42SBarry Smith b->icol = isicol; 68587828ca2SBarry Smith ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 686c7272abeSHong Zhang ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr); 68783287d42SBarry Smith 688c7272abeSHong Zhang b->maxnz = b->nz = bi[n] ; 689c7272abeSHong Zhang (B)->factor = MAT_FACTOR_LU; 690719d5645SBarry Smith (B)->info.factor_mallocs = reallocs; 691719d5645SBarry Smith (B)->info.fill_ratio_given = f; 692c7272abeSHong Zhang 69383287d42SBarry Smith if (ai[n] != 0) { 694c7272abeSHong Zhang (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 69583287d42SBarry Smith } else { 696719d5645SBarry Smith (B)->info.fill_ratio_needed = 0.0; 69783287d42SBarry Smith } 698c7272abeSHong Zhang 699db4efbfdSBarry Smith ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 700db4efbfdSBarry Smith ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 7017d18ce8fSMatthew Knepley both_identity = (PetscTruth) (row_identity && col_identity); 70241df41f0SMatthew Knepley ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); 70383287d42SBarry Smith PetscFunctionReturn(0); 70483287d42SBarry Smith } 705db4efbfdSBarry Smith 706