1be1d678aSKris Buschelman #define PETSCMAT_DLL 2be1d678aSKris Buschelman 383287d42SBarry Smith /* 483287d42SBarry Smith Factorization code for BAIJ format. 583287d42SBarry Smith */ 67c4f633dSBarry Smith #include "../src/mat/impls/baij/seq/baij.h" 7c60f0209SBarry Smith #include "../src/mat/blockinvert.h" 883287d42SBarry Smith 9db4efbfdSBarry Smith #undef __FUNCT__ 10ae3d28f0SHong Zhang #define __FUNCT__ "MatSeqBAIJSetNumericFactorization_newdatastruct" 11db4efbfdSBarry Smith /* 12db4efbfdSBarry Smith This is used to set the numeric factorization for both LU and ILU symbolic factorization 13db4efbfdSBarry Smith */ 14ae3d28f0SHong Zhang PetscErrorCode MatSeqBAIJSetNumericFactorization_newdatastruct(Mat fact,PetscTruth natural) 15ae3d28f0SHong Zhang { 16ae3d28f0SHong Zhang PetscFunctionBegin; 17*6929473cSShri Abhyankar if(natural){ 18*6929473cSShri Abhyankar switch (fact->rmap->bs){ 19*6929473cSShri Abhyankar case 2: 20*6929473cSShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct; 21*6929473cSShri Abhyankar break; 22*6929473cSShri Abhyankar case 3: 23*6929473cSShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_newdatastruct; 24*6929473cSShri Abhyankar break; 25*6929473cSShri Abhyankar case 4: 26*6929473cSShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_newdatastruct; 27*6929473cSShri Abhyankar break; 28*6929473cSShri Abhyankar case 5: 29*6929473cSShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_newdatastruct; 30*6929473cSShri Abhyankar break; 31*6929473cSShri Abhyankar case 6: 32*6929473cSShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_newdatastruct; 33*6929473cSShri Abhyankar break; 34*6929473cSShri Abhyankar case 7: 35*6929473cSShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_newdatastruct; 36*6929473cSShri Abhyankar break; 37*6929473cSShri Abhyankar default: 38*6929473cSShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct; 39*6929473cSShri Abhyankar break; 40*6929473cSShri Abhyankar } 41*6929473cSShri Abhyankar } 42*6929473cSShri Abhyankar else{ 43ae3d28f0SHong Zhang switch (fact->rmap->bs){ 44ae3d28f0SHong Zhang case 2: 45ae3d28f0SHong Zhang fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_newdatastruct; 46ae3d28f0SHong Zhang break; 47ae3d28f0SHong Zhang case 3: 48ae3d28f0SHong Zhang fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_newdatastruct; 49ae3d28f0SHong Zhang break; 50ae3d28f0SHong Zhang case 4: 51ae3d28f0SHong Zhang fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_newdatastruct; 52ae3d28f0SHong Zhang break; 53ae3d28f0SHong Zhang case 5: 54ae3d28f0SHong Zhang fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_newdatastruct; 55ae3d28f0SHong Zhang break; 56ae3d28f0SHong Zhang case 6: 57ae3d28f0SHong Zhang fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_newdatastruct; 58ae3d28f0SHong Zhang break; 59ae3d28f0SHong Zhang case 7: 60ae3d28f0SHong Zhang fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_newdatastruct; 61ae3d28f0SHong Zhang break; 62ae3d28f0SHong Zhang default: 63ae3d28f0SHong Zhang fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct; 64ae3d28f0SHong Zhang break; 65ae3d28f0SHong Zhang } 66*6929473cSShri Abhyankar } 67ae3d28f0SHong Zhang PetscFunctionReturn(0); 68ae3d28f0SHong Zhang } 69ae3d28f0SHong Zhang 70db4efbfdSBarry Smith PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat inA,PetscTruth natural) 71db4efbfdSBarry Smith { 72db4efbfdSBarry Smith PetscFunctionBegin; 73db4efbfdSBarry Smith if (natural) { 74db4efbfdSBarry Smith switch (inA->rmap->bs) { 75db4efbfdSBarry Smith case 1: 76db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 77db4efbfdSBarry Smith break; 78db4efbfdSBarry Smith case 2: 79db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 80db4efbfdSBarry Smith break; 81db4efbfdSBarry Smith case 3: 82db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 83db4efbfdSBarry Smith break; 84db4efbfdSBarry Smith case 4: 8565460251SBarry Smith #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 86db4efbfdSBarry Smith { 87db4efbfdSBarry Smith PetscTruth sse_enabled_local; 88db4efbfdSBarry Smith PetscErrorCode ierr; 89db4efbfdSBarry Smith ierr = PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);CHKERRQ(ierr); 90db4efbfdSBarry Smith if (sse_enabled_local) { 91db4efbfdSBarry Smith # if defined(PETSC_HAVE_SSE) 92db4efbfdSBarry Smith int i,*AJ=a->j,nz=a->nz,n=a->mbs; 93db4efbfdSBarry Smith if (n==(unsigned short)n) { 94db4efbfdSBarry Smith unsigned short *aj=(unsigned short *)AJ; 95db4efbfdSBarry Smith for (i=0;i<nz;i++) { 96db4efbfdSBarry Smith aj[i] = (unsigned short)AJ[i]; 97db4efbfdSBarry Smith } 98db4efbfdSBarry Smith inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj; 99db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj; 100db4efbfdSBarry Smith ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");CHKERRQ(ierr); 101db4efbfdSBarry Smith } else { 102db4efbfdSBarry Smith /* Scale the column indices for easier indexing in MatSolve. */ 103db4efbfdSBarry Smith /* for (i=0;i<nz;i++) { */ 104db4efbfdSBarry Smith /* AJ[i] = AJ[i]*4; */ 105db4efbfdSBarry Smith /* } */ 106db4efbfdSBarry Smith inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE; 107db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 108db4efbfdSBarry Smith ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");CHKERRQ(ierr); 109db4efbfdSBarry Smith } 110db4efbfdSBarry Smith # else 111db4efbfdSBarry Smith /* This should never be reached. If so, problem in PetscSSEIsEnabled. */ 112db4efbfdSBarry Smith SETERRQ(PETSC_ERR_SUP,"SSE Hardware unavailable"); 113db4efbfdSBarry Smith # endif 114db4efbfdSBarry Smith } else { 115db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 116db4efbfdSBarry Smith } 117db4efbfdSBarry Smith } 118db4efbfdSBarry Smith #else 119db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 120db4efbfdSBarry Smith #endif 121db4efbfdSBarry Smith break; 122db4efbfdSBarry Smith case 5: 123db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 124db4efbfdSBarry Smith break; 125db4efbfdSBarry Smith case 6: 126db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 127db4efbfdSBarry Smith break; 128db4efbfdSBarry Smith case 7: 129db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 130db4efbfdSBarry Smith break; 131db4efbfdSBarry Smith default: 132db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 133db4efbfdSBarry Smith break; 134db4efbfdSBarry Smith } 135db4efbfdSBarry Smith } else { 136db4efbfdSBarry Smith switch (inA->rmap->bs) { 137db4efbfdSBarry Smith case 1: 138db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 139db4efbfdSBarry Smith break; 140db4efbfdSBarry Smith case 2: 141db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 142db4efbfdSBarry Smith break; 143db4efbfdSBarry Smith case 3: 144db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 145db4efbfdSBarry Smith break; 146db4efbfdSBarry Smith case 4: 147db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 148db4efbfdSBarry Smith break; 149db4efbfdSBarry Smith case 5: 150db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 151db4efbfdSBarry Smith break; 152db4efbfdSBarry Smith case 6: 153db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 154db4efbfdSBarry Smith break; 155db4efbfdSBarry Smith case 7: 156db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 157db4efbfdSBarry Smith break; 158db4efbfdSBarry Smith default: 159db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 160db4efbfdSBarry Smith break; 161db4efbfdSBarry Smith } 162db4efbfdSBarry Smith } 163db4efbfdSBarry Smith PetscFunctionReturn(0); 164db4efbfdSBarry Smith } 165db4efbfdSBarry Smith 16683287d42SBarry Smith /* 16783287d42SBarry Smith The symbolic factorization code is identical to that for AIJ format, 16883287d42SBarry Smith except for very small changes since this is now a SeqBAIJ datastructure. 16983287d42SBarry Smith NOT good code reuse. 17083287d42SBarry Smith */ 171c7272abeSHong Zhang #include "petscbt.h" 172c7272abeSHong Zhang #include "../src/mat/utils/freespace.h" 173c7272abeSHong Zhang 1744a2ae208SSatish Balay #undef __FUNCT__ 175faca2338SShri Abhyankar #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ_newdatastruct" 176faca2338SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_newdatastruct(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 17783287d42SBarry Smith { 17883287d42SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 179c7272abeSHong Zhang PetscInt n=a->mbs,bs = A->rmap->bs,bs2=a->bs2; 180c7272abeSHong Zhang PetscTruth row_identity,col_identity,both_identity; 18183287d42SBarry Smith IS isicol; 1826849ba73SBarry Smith PetscErrorCode ierr; 183c7272abeSHong Zhang const PetscInt *r,*ic; 184c7272abeSHong Zhang PetscInt i,*ai=a->i,*aj=a->j; 185c7272abeSHong Zhang PetscInt *bi,*bj,*ajtmp; 186c7272abeSHong Zhang PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 187c7272abeSHong Zhang PetscReal f; 188c7272abeSHong Zhang PetscInt nlnk,*lnk,k,**bi_ptr; 189c7272abeSHong Zhang PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 190c7272abeSHong Zhang PetscBT lnkbt; 19183287d42SBarry Smith 19283287d42SBarry Smith PetscFunctionBegin; 193d0f46423SBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 19483287d42SBarry Smith ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 19583287d42SBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 19683287d42SBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 19783287d42SBarry Smith 198bc74c81fSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 199bc74c81fSJed Brown ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 200bc74c81fSJed Brown ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 201a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 202b2b2dd24SShri Abhyankar 203b2b2dd24SShri Abhyankar /* linked list for storing column indices of the active row */ 204b2b2dd24SShri Abhyankar nlnk = n + 1; 205b2b2dd24SShri Abhyankar ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 206b2b2dd24SShri Abhyankar 207b2b2dd24SShri Abhyankar ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 208b2b2dd24SShri Abhyankar 209b2b2dd24SShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 210b2b2dd24SShri Abhyankar f = info->fill; 211b2b2dd24SShri Abhyankar ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 212b2b2dd24SShri Abhyankar current_space = free_space; 213b2b2dd24SShri Abhyankar 214b2b2dd24SShri Abhyankar for (i=0; i<n; i++) { 215b2b2dd24SShri Abhyankar /* copy previous fill into linked list */ 216b2b2dd24SShri Abhyankar nzi = 0; 217b2b2dd24SShri Abhyankar nnz = ai[r[i]+1] - ai[r[i]]; 218b2b2dd24SShri Abhyankar if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 219b2b2dd24SShri Abhyankar ajtmp = aj + ai[r[i]]; 220b2b2dd24SShri Abhyankar ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 221b2b2dd24SShri Abhyankar nzi += nlnk; 222b2b2dd24SShri Abhyankar 223b2b2dd24SShri Abhyankar /* add pivot rows into linked list */ 224b2b2dd24SShri Abhyankar row = lnk[n]; 225b2b2dd24SShri Abhyankar while (row < i) { 226b2b2dd24SShri Abhyankar nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 227b2b2dd24SShri Abhyankar ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 228b2b2dd24SShri Abhyankar ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 229b2b2dd24SShri Abhyankar nzi += nlnk; 230b2b2dd24SShri Abhyankar row = lnk[row]; 231b2b2dd24SShri Abhyankar } 232b2b2dd24SShri Abhyankar bi[i+1] = bi[i] + nzi; 233b2b2dd24SShri Abhyankar im[i] = nzi; 234b2b2dd24SShri Abhyankar 235b2b2dd24SShri Abhyankar /* mark bdiag */ 236b2b2dd24SShri Abhyankar nzbd = 0; 237b2b2dd24SShri Abhyankar nnz = nzi; 238b2b2dd24SShri Abhyankar k = lnk[n]; 239b2b2dd24SShri Abhyankar while (nnz-- && k < i){ 240b2b2dd24SShri Abhyankar nzbd++; 241b2b2dd24SShri Abhyankar k = lnk[k]; 242b2b2dd24SShri Abhyankar } 243b2b2dd24SShri Abhyankar bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU_v2() */ 244b2b2dd24SShri Abhyankar 245b2b2dd24SShri Abhyankar /* if free space is not available, make more free space */ 246b2b2dd24SShri Abhyankar if (current_space->local_remaining<nzi) { 247b2b2dd24SShri Abhyankar nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 248b2b2dd24SShri Abhyankar ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 249b2b2dd24SShri Abhyankar reallocs++; 250b2b2dd24SShri Abhyankar } 251b2b2dd24SShri Abhyankar 252b2b2dd24SShri Abhyankar /* copy data into free space, then initialize lnk */ 253b2b2dd24SShri Abhyankar ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 254b2b2dd24SShri Abhyankar bi_ptr[i] = current_space->array; 255b2b2dd24SShri Abhyankar current_space->array += nzi; 256b2b2dd24SShri Abhyankar current_space->local_used += nzi; 257b2b2dd24SShri Abhyankar current_space->local_remaining -= nzi; 258b2b2dd24SShri Abhyankar } 259b2b2dd24SShri Abhyankar #if defined(PETSC_USE_INFO) 260b2b2dd24SShri Abhyankar if (ai[n] != 0) { 261b2b2dd24SShri Abhyankar PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 262b2b2dd24SShri Abhyankar ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 263b2b2dd24SShri Abhyankar ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 264b2b2dd24SShri Abhyankar ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 265b2b2dd24SShri Abhyankar ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 266b2b2dd24SShri Abhyankar } else { 267b2b2dd24SShri Abhyankar ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 268b2b2dd24SShri Abhyankar } 269b2b2dd24SShri Abhyankar #endif 270b2b2dd24SShri Abhyankar 271b2b2dd24SShri Abhyankar ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 272b2b2dd24SShri Abhyankar ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 273b2b2dd24SShri Abhyankar 274b2b2dd24SShri Abhyankar /* destroy list of free space and other temporary array(s) */ 275b2b2dd24SShri Abhyankar ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 276b2b2dd24SShri Abhyankar ierr = PetscFreeSpaceContiguous_LU_v2(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 277b2b2dd24SShri Abhyankar ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 278b2b2dd24SShri Abhyankar ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 279b2b2dd24SShri Abhyankar 280b2b2dd24SShri Abhyankar /* put together the new matrix */ 281b2b2dd24SShri Abhyankar ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 282b2b2dd24SShri Abhyankar ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 283b2b2dd24SShri Abhyankar b = (Mat_SeqBAIJ*)(B)->data; 284b2b2dd24SShri Abhyankar b->free_a = PETSC_TRUE; 285b2b2dd24SShri Abhyankar b->free_ij = PETSC_TRUE; 286b2b2dd24SShri Abhyankar b->singlemalloc = PETSC_FALSE; 287b2b2dd24SShri Abhyankar ierr = PetscMalloc((bdiag[0]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 288b2b2dd24SShri Abhyankar b->j = bj; 289b2b2dd24SShri Abhyankar b->i = bi; 290b2b2dd24SShri Abhyankar b->diag = bdiag; 291b2b2dd24SShri Abhyankar b->free_diag = PETSC_TRUE; 292b2b2dd24SShri Abhyankar b->ilen = 0; 293b2b2dd24SShri Abhyankar b->imax = 0; 294b2b2dd24SShri Abhyankar b->row = isrow; 295b2b2dd24SShri Abhyankar b->col = iscol; 296b2b2dd24SShri Abhyankar b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 297b2b2dd24SShri Abhyankar ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 298b2b2dd24SShri Abhyankar ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 299b2b2dd24SShri Abhyankar b->icol = isicol; 300b2b2dd24SShri Abhyankar ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 301b2b2dd24SShri Abhyankar ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr); 302b2b2dd24SShri Abhyankar 303b2b2dd24SShri Abhyankar b->maxnz = b->nz = bdiag[0]+1; 304b2b2dd24SShri Abhyankar B->factor = MAT_FACTOR_LU; 305b2b2dd24SShri Abhyankar B->info.factor_mallocs = reallocs; 306b2b2dd24SShri Abhyankar B->info.fill_ratio_given = f; 307b2b2dd24SShri Abhyankar 308b2b2dd24SShri Abhyankar if (ai[n] != 0) { 309b2b2dd24SShri Abhyankar B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 310b2b2dd24SShri Abhyankar } else { 311b2b2dd24SShri Abhyankar B->info.fill_ratio_needed = 0.0; 312b2b2dd24SShri Abhyankar } 313b2b2dd24SShri Abhyankar 314b2b2dd24SShri Abhyankar ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 315b2b2dd24SShri Abhyankar ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 316b2b2dd24SShri Abhyankar both_identity = (PetscTruth) (row_identity && col_identity); 317b2b2dd24SShri Abhyankar if(both_identity){ 318b2b2dd24SShri Abhyankar switch(bs){ 319b2b2dd24SShri Abhyankar case 2: 320c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct; 321a2d6a19aSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering_newdatastruct; 322b2b2dd24SShri Abhyankar break; 323b2b2dd24SShri Abhyankar case 3: 324c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_newdatastruct; 325a2d6a19aSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct; 326b2b2dd24SShri Abhyankar break; 327b2b2dd24SShri Abhyankar case 4: 328c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_newdatastruct; 329a2d6a19aSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering_newdatastruct; 330b2b2dd24SShri Abhyankar break; 331b2b2dd24SShri Abhyankar case 5: 332c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_newdatastruct; 333a2d6a19aSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering_newdatastruct; 334b2b2dd24SShri Abhyankar break; 335b2b2dd24SShri Abhyankar case 6: 336c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_newdatastruct; 337a2d6a19aSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering_newdatastruct; 338b2b2dd24SShri Abhyankar break; 339b2b2dd24SShri Abhyankar case 7: 340c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_newdatastruct; 341a2d6a19aSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering_newdatastruct; 342b2b2dd24SShri Abhyankar break; 343b2b2dd24SShri Abhyankar default: 344c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct; 345a2d6a19aSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct; 346b2b2dd24SShri Abhyankar } 347b2b2dd24SShri Abhyankar } 348b2b2dd24SShri Abhyankar else { 349b2b2dd24SShri Abhyankar switch(bs){ 350b2b2dd24SShri Abhyankar case 2: 351c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_newdatastruct; 352a2d6a19aSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_2_newdatastruct; 353b2b2dd24SShri Abhyankar break; 354b2b2dd24SShri Abhyankar case 3: 355c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_newdatastruct; 356a2d6a19aSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_3_newdatastruct; 357b2b2dd24SShri Abhyankar break; 358b2b2dd24SShri Abhyankar case 4: 359c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_newdatastruct; 360a2d6a19aSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_4_newdatastruct; 361b2b2dd24SShri Abhyankar break; 362b2b2dd24SShri Abhyankar case 5: 363c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_newdatastruct; 364a2d6a19aSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_5_newdatastruct; 365b2b2dd24SShri Abhyankar break; 366b2b2dd24SShri Abhyankar case 6: 367c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_newdatastruct; 368a2d6a19aSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_6_newdatastruct; 369b2b2dd24SShri Abhyankar break; 370b2b2dd24SShri Abhyankar case 7: 371c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_newdatastruct; 372a2d6a19aSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_7_newdatastruct; 373b2b2dd24SShri Abhyankar break; 374b2b2dd24SShri Abhyankar default: 375c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct; 376a2d6a19aSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_N_newdatastruct; 377b2b2dd24SShri Abhyankar } 378b2b2dd24SShri Abhyankar } 379b2b2dd24SShri Abhyankar /* ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); */ 380b2b2dd24SShri Abhyankar PetscFunctionReturn(0); 381b2b2dd24SShri Abhyankar } 382b2b2dd24SShri Abhyankar 383b2b2dd24SShri Abhyankar #undef __FUNCT__ 384faca2338SShri Abhyankar #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ" 385faca2338SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 386faca2338SShri Abhyankar { 387faca2338SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 388faca2338SShri Abhyankar PetscInt n=a->mbs,bs = A->rmap->bs,bs2=a->bs2; 389faca2338SShri Abhyankar PetscTruth row_identity,col_identity,both_identity; 390faca2338SShri Abhyankar IS isicol; 391faca2338SShri Abhyankar PetscErrorCode ierr; 392faca2338SShri Abhyankar const PetscInt *r,*ic; 393faca2338SShri Abhyankar PetscInt i,*ai=a->i,*aj=a->j; 394faca2338SShri Abhyankar PetscInt *bi,*bj,*ajtmp; 395faca2338SShri Abhyankar PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 396faca2338SShri Abhyankar PetscReal f; 397faca2338SShri Abhyankar PetscInt nlnk,*lnk,k,**bi_ptr; 398faca2338SShri Abhyankar PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 399faca2338SShri Abhyankar PetscBT lnkbt; 400faca2338SShri Abhyankar PetscTruth newdatastruct=PETSC_FALSE; 401faca2338SShri Abhyankar 402faca2338SShri Abhyankar PetscFunctionBegin; 403faca2338SShri Abhyankar ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr); 404faca2338SShri Abhyankar if(newdatastruct){ 405faca2338SShri Abhyankar ierr = MatLUFactorSymbolic_SeqBAIJ_newdatastruct(B,A,isrow,iscol,info);CHKERRQ(ierr); 406faca2338SShri Abhyankar PetscFunctionReturn(0); 407faca2338SShri Abhyankar } 408faca2338SShri Abhyankar 409faca2338SShri Abhyankar if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 410faca2338SShri Abhyankar ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 411faca2338SShri Abhyankar ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 412faca2338SShri Abhyankar ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 413faca2338SShri Abhyankar 414bc74c81fSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 415bc74c81fSJed Brown ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 416bc74c81fSJed Brown ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 417bc74c81fSJed Brown 418a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 419c7272abeSHong Zhang 420c7272abeSHong Zhang /* linked list for storing column indices of the active row */ 421c7272abeSHong Zhang nlnk = n + 1; 422c7272abeSHong Zhang ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 423c7272abeSHong Zhang 424c7272abeSHong Zhang ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 425c7272abeSHong Zhang 426c7272abeSHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 427c7272abeSHong Zhang f = info->fill; 428c7272abeSHong Zhang ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 429c7272abeSHong Zhang current_space = free_space; 43083287d42SBarry Smith 43183287d42SBarry Smith for (i=0; i<n; i++) { 432c7272abeSHong Zhang /* copy previous fill into linked list */ 43383287d42SBarry Smith nzi = 0; 434c7272abeSHong Zhang nnz = ai[r[i]+1] - ai[r[i]]; 435c7272abeSHong Zhang if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 436c7272abeSHong Zhang ajtmp = aj + ai[r[i]]; 437c7272abeSHong Zhang ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 438c7272abeSHong Zhang nzi += nlnk; 439c7272abeSHong Zhang 440c7272abeSHong Zhang /* add pivot rows into linked list */ 441c7272abeSHong Zhang row = lnk[n]; 442c7272abeSHong Zhang while (row < i) { 443c7272abeSHong Zhang nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 444c7272abeSHong Zhang ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 445c7272abeSHong Zhang ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 446c7272abeSHong Zhang nzi += nlnk; 447c7272abeSHong Zhang row = lnk[row]; 44883287d42SBarry Smith } 449c7272abeSHong Zhang bi[i+1] = bi[i] + nzi; 450c7272abeSHong Zhang im[i] = nzi; 451c7272abeSHong Zhang 452c7272abeSHong Zhang /* mark bdiag */ 453c7272abeSHong Zhang nzbd = 0; 454c7272abeSHong Zhang nnz = nzi; 455c7272abeSHong Zhang k = lnk[n]; 456c7272abeSHong Zhang while (nnz-- && k < i){ 457c7272abeSHong Zhang nzbd++; 458c7272abeSHong Zhang k = lnk[k]; 459c7272abeSHong Zhang } 460c7272abeSHong Zhang bdiag[i] = bi[i] + nzbd; 461c7272abeSHong Zhang 462c7272abeSHong Zhang /* if free space is not available, make more free space */ 463c7272abeSHong Zhang if (current_space->local_remaining<nzi) { 464c7272abeSHong Zhang nnz = (n - i)*nzi; /* estimated and max additional space needed */ 465c7272abeSHong Zhang ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 466c7272abeSHong Zhang reallocs++; 46783287d42SBarry Smith } 46883287d42SBarry Smith 469c7272abeSHong Zhang /* copy data into free space, then initialize lnk */ 470c7272abeSHong Zhang ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 471c7272abeSHong Zhang bi_ptr[i] = current_space->array; 472c7272abeSHong Zhang current_space->array += nzi; 473c7272abeSHong Zhang current_space->local_used += nzi; 474c7272abeSHong Zhang current_space->local_remaining -= nzi; 475c7272abeSHong Zhang } 4766cf91177SBarry Smith #if defined(PETSC_USE_INFO) 47783287d42SBarry Smith if (ai[n] != 0) { 478c7272abeSHong Zhang PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 479ae15b995SBarry Smith ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 480ae15b995SBarry Smith ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 481ae15b995SBarry Smith ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 482ae15b995SBarry Smith ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 48383287d42SBarry Smith } else { 484c7272abeSHong Zhang ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 48583287d42SBarry Smith } 48663ba0a88SBarry Smith #endif 48783287d42SBarry Smith 48883287d42SBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 48983287d42SBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 49083287d42SBarry Smith 491c7272abeSHong Zhang /* destroy list of free space and other temporary array(s) */ 492c7272abeSHong Zhang ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 493c7272abeSHong Zhang ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 494c7272abeSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 495c7272abeSHong Zhang ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 49683287d42SBarry Smith 49783287d42SBarry Smith /* put together the new matrix */ 498719d5645SBarry Smith ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 499719d5645SBarry Smith ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 500719d5645SBarry Smith b = (Mat_SeqBAIJ*)(B)->data; 501e6b907acSBarry Smith b->free_a = PETSC_TRUE; 502e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 503c7272abeSHong Zhang b->singlemalloc = PETSC_FALSE; 504c7272abeSHong Zhang ierr = PetscMalloc((bi[n]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 505c7272abeSHong Zhang b->j = bj; 506c7272abeSHong Zhang b->i = bi; 507c7272abeSHong Zhang b->diag = bdiag; 5087f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 50983287d42SBarry Smith b->ilen = 0; 51083287d42SBarry Smith b->imax = 0; 51183287d42SBarry Smith b->row = isrow; 51283287d42SBarry Smith b->col = iscol; 513d3d32019SBarry Smith b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 51483287d42SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 51583287d42SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 51683287d42SBarry Smith b->icol = isicol; 51787828ca2SBarry Smith ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 518c7272abeSHong Zhang ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr); 51983287d42SBarry Smith 520c7272abeSHong Zhang b->maxnz = b->nz = bi[n] ; 521c7272abeSHong Zhang (B)->factor = MAT_FACTOR_LU; 522719d5645SBarry Smith (B)->info.factor_mallocs = reallocs; 523719d5645SBarry Smith (B)->info.fill_ratio_given = f; 524c7272abeSHong Zhang 52583287d42SBarry Smith if (ai[n] != 0) { 526c7272abeSHong Zhang (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 52783287d42SBarry Smith } else { 528719d5645SBarry Smith (B)->info.fill_ratio_needed = 0.0; 52983287d42SBarry Smith } 530c7272abeSHong Zhang 531db4efbfdSBarry Smith ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 532db4efbfdSBarry Smith ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 5337d18ce8fSMatthew Knepley both_identity = (PetscTruth) (row_identity && col_identity); 53441df41f0SMatthew Knepley ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); 53583287d42SBarry Smith PetscFunctionReturn(0); 53683287d42SBarry Smith } 537db4efbfdSBarry Smith 538