1be1d678aSKris Buschelman #define PETSCMAT_DLL 2be1d678aSKris Buschelman 383287d42SBarry Smith /* 483287d42SBarry Smith Factorization code for BAIJ format. 583287d42SBarry Smith */ 67c4f633dSBarry Smith #include "../src/mat/impls/baij/seq/baij.h" 7c60f0209SBarry Smith #include "../src/mat/blockinvert.h" 883287d42SBarry Smith 9db4efbfdSBarry Smith #undef __FUNCT__ 104dd39f65SShri Abhyankar #define __FUNCT__ "MatSeqBAIJSetNumericFactorization" 11db4efbfdSBarry Smith /* 12db4efbfdSBarry Smith This is used to set the numeric factorization for both LU and ILU symbolic factorization 13db4efbfdSBarry Smith */ 144dd39f65SShri Abhyankar PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscTruth natural) 15ae3d28f0SHong Zhang { 16ae3d28f0SHong Zhang PetscFunctionBegin; 176929473cSShri Abhyankar if(natural){ 186929473cSShri Abhyankar switch (fact->rmap->bs){ 19*048b5e81SShri Abhyankar case 1: 20*048b5e81SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 21*048b5e81SShri Abhyankar break; 226929473cSShri Abhyankar case 2: 234dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 246929473cSShri Abhyankar break; 256929473cSShri Abhyankar case 3: 264dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 276929473cSShri Abhyankar break; 286929473cSShri Abhyankar case 4: 294dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 306929473cSShri Abhyankar break; 316929473cSShri Abhyankar case 5: 324dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 336929473cSShri Abhyankar break; 346929473cSShri Abhyankar case 6: 354dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 366929473cSShri Abhyankar break; 376929473cSShri Abhyankar case 7: 384dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 396929473cSShri Abhyankar break; 4029a97285SShri Abhyankar case 15: 4129a97285SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering; 4229a97285SShri Abhyankar break; 436929473cSShri Abhyankar default: 444dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 456929473cSShri Abhyankar break; 466929473cSShri Abhyankar } 476929473cSShri Abhyankar } 486929473cSShri Abhyankar else{ 49ae3d28f0SHong Zhang switch (fact->rmap->bs){ 50*048b5e81SShri Abhyankar case 1: 51*048b5e81SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 52*048b5e81SShri Abhyankar break; 53ae3d28f0SHong Zhang case 2: 544dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 55ae3d28f0SHong Zhang break; 56ae3d28f0SHong Zhang case 3: 574dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 58ae3d28f0SHong Zhang break; 59ae3d28f0SHong Zhang case 4: 604dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 61ae3d28f0SHong Zhang break; 62ae3d28f0SHong Zhang case 5: 634dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 64ae3d28f0SHong Zhang break; 65ae3d28f0SHong Zhang case 6: 664dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 67ae3d28f0SHong Zhang break; 68ae3d28f0SHong Zhang case 7: 694dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 70ae3d28f0SHong Zhang break; 71ae3d28f0SHong Zhang default: 724dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 73ae3d28f0SHong Zhang break; 74ae3d28f0SHong Zhang } 756929473cSShri Abhyankar } 76ae3d28f0SHong Zhang PetscFunctionReturn(0); 77ae3d28f0SHong Zhang } 78ae3d28f0SHong Zhang 798b1456e3SHong Zhang PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscTruth natural) 80db4efbfdSBarry Smith { 81db4efbfdSBarry Smith PetscFunctionBegin; 82db4efbfdSBarry Smith if (natural) { 83db4efbfdSBarry Smith switch (inA->rmap->bs) { 84db4efbfdSBarry Smith case 1: 8506e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 86db4efbfdSBarry Smith break; 87db4efbfdSBarry Smith case 2: 8806e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace; 89db4efbfdSBarry Smith break; 90db4efbfdSBarry Smith case 3: 9106e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace; 92db4efbfdSBarry Smith break; 93db4efbfdSBarry Smith case 4: 9465460251SBarry Smith #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 95db4efbfdSBarry Smith { 96db4efbfdSBarry Smith PetscTruth sse_enabled_local; 97db4efbfdSBarry Smith PetscErrorCode ierr; 98db4efbfdSBarry Smith ierr = PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);CHKERRQ(ierr); 99db4efbfdSBarry Smith if (sse_enabled_local) { 100db4efbfdSBarry Smith # if defined(PETSC_HAVE_SSE) 101db4efbfdSBarry Smith int i,*AJ=a->j,nz=a->nz,n=a->mbs; 102db4efbfdSBarry Smith if (n==(unsigned short)n) { 103db4efbfdSBarry Smith unsigned short *aj=(unsigned short *)AJ; 104db4efbfdSBarry Smith for (i=0;i<nz;i++) { 105db4efbfdSBarry Smith aj[i] = (unsigned short)AJ[i]; 106db4efbfdSBarry Smith } 107db4efbfdSBarry Smith inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj; 108db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj; 109db4efbfdSBarry Smith ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");CHKERRQ(ierr); 110db4efbfdSBarry Smith } else { 111db4efbfdSBarry Smith /* Scale the column indices for easier indexing in MatSolve. */ 112db4efbfdSBarry Smith /* for (i=0;i<nz;i++) { */ 113db4efbfdSBarry Smith /* AJ[i] = AJ[i]*4; */ 114db4efbfdSBarry Smith /* } */ 115db4efbfdSBarry Smith inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE; 116db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 117db4efbfdSBarry Smith ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");CHKERRQ(ierr); 118db4efbfdSBarry Smith } 119db4efbfdSBarry Smith # else 120db4efbfdSBarry Smith /* This should never be reached. If so, problem in PetscSSEIsEnabled. */ 121e32f2f54SBarry Smith SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable"); 122db4efbfdSBarry Smith # endif 123db4efbfdSBarry Smith } else { 12406e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 125db4efbfdSBarry Smith } 126db4efbfdSBarry Smith } 127db4efbfdSBarry Smith #else 12806e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 129db4efbfdSBarry Smith #endif 130db4efbfdSBarry Smith break; 131db4efbfdSBarry Smith case 5: 13206e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace; 133db4efbfdSBarry Smith break; 134db4efbfdSBarry Smith case 6: 13506e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace; 136db4efbfdSBarry Smith break; 137db4efbfdSBarry Smith case 7: 13806e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace; 139db4efbfdSBarry Smith break; 140db4efbfdSBarry Smith default: 14106e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 142db4efbfdSBarry Smith break; 143db4efbfdSBarry Smith } 144db4efbfdSBarry Smith } else { 145db4efbfdSBarry Smith switch (inA->rmap->bs) { 146db4efbfdSBarry Smith case 1: 14706e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 148db4efbfdSBarry Smith break; 149db4efbfdSBarry Smith case 2: 15006e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace; 151db4efbfdSBarry Smith break; 152db4efbfdSBarry Smith case 3: 15306e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace; 154db4efbfdSBarry Smith break; 155db4efbfdSBarry Smith case 4: 15606e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace; 157db4efbfdSBarry Smith break; 158db4efbfdSBarry Smith case 5: 15906e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace; 160db4efbfdSBarry Smith break; 161db4efbfdSBarry Smith case 6: 16206e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace; 163db4efbfdSBarry Smith break; 164db4efbfdSBarry Smith case 7: 16506e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace; 166db4efbfdSBarry Smith break; 167db4efbfdSBarry Smith default: 16806e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 169db4efbfdSBarry Smith break; 170db4efbfdSBarry Smith } 171db4efbfdSBarry Smith } 172db4efbfdSBarry Smith PetscFunctionReturn(0); 173db4efbfdSBarry Smith } 174db4efbfdSBarry Smith 17583287d42SBarry Smith /* 17683287d42SBarry Smith The symbolic factorization code is identical to that for AIJ format, 17783287d42SBarry Smith except for very small changes since this is now a SeqBAIJ datastructure. 17883287d42SBarry Smith NOT good code reuse. 17983287d42SBarry Smith */ 180c7272abeSHong Zhang #include "petscbt.h" 181c7272abeSHong Zhang #include "../src/mat/utils/freespace.h" 182c7272abeSHong Zhang 1834a2ae208SSatish Balay #undef __FUNCT__ 1844dd39f65SShri Abhyankar #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ" 1854dd39f65SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 18683287d42SBarry Smith { 18783287d42SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 188c7272abeSHong Zhang PetscInt n=a->mbs,bs = A->rmap->bs,bs2=a->bs2; 189c7272abeSHong Zhang PetscTruth row_identity,col_identity,both_identity; 19083287d42SBarry Smith IS isicol; 1916849ba73SBarry Smith PetscErrorCode ierr; 192c7272abeSHong Zhang const PetscInt *r,*ic; 193c7272abeSHong Zhang PetscInt i,*ai=a->i,*aj=a->j; 194c7272abeSHong Zhang PetscInt *bi,*bj,*ajtmp; 195c7272abeSHong Zhang PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 196c7272abeSHong Zhang PetscReal f; 197c7272abeSHong Zhang PetscInt nlnk,*lnk,k,**bi_ptr; 198c7272abeSHong Zhang PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 199c7272abeSHong Zhang PetscBT lnkbt; 20083287d42SBarry Smith 20183287d42SBarry Smith PetscFunctionBegin; 202e32f2f54SBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 20383287d42SBarry Smith ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 20483287d42SBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 20583287d42SBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 20683287d42SBarry Smith 207bc74c81fSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 208bc74c81fSJed Brown ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 209bc74c81fSJed Brown ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 210a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 211b2b2dd24SShri Abhyankar 212b2b2dd24SShri Abhyankar /* linked list for storing column indices of the active row */ 213b2b2dd24SShri Abhyankar nlnk = n + 1; 214b2b2dd24SShri Abhyankar ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 215b2b2dd24SShri Abhyankar 216b2b2dd24SShri Abhyankar ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 217b2b2dd24SShri Abhyankar 218b2b2dd24SShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 219b2b2dd24SShri Abhyankar f = info->fill; 220b2b2dd24SShri Abhyankar ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 221b2b2dd24SShri Abhyankar current_space = free_space; 222b2b2dd24SShri Abhyankar 223b2b2dd24SShri Abhyankar for (i=0; i<n; i++) { 224b2b2dd24SShri Abhyankar /* copy previous fill into linked list */ 225b2b2dd24SShri Abhyankar nzi = 0; 226b2b2dd24SShri Abhyankar nnz = ai[r[i]+1] - ai[r[i]]; 227e32f2f54SBarry Smith if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 228b2b2dd24SShri Abhyankar ajtmp = aj + ai[r[i]]; 229b2b2dd24SShri Abhyankar ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 230b2b2dd24SShri Abhyankar nzi += nlnk; 231b2b2dd24SShri Abhyankar 232b2b2dd24SShri Abhyankar /* add pivot rows into linked list */ 233b2b2dd24SShri Abhyankar row = lnk[n]; 234b2b2dd24SShri Abhyankar while (row < i) { 235b2b2dd24SShri Abhyankar nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 236b2b2dd24SShri Abhyankar ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 237b2b2dd24SShri Abhyankar ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 238b2b2dd24SShri Abhyankar nzi += nlnk; 239b2b2dd24SShri Abhyankar row = lnk[row]; 240b2b2dd24SShri Abhyankar } 241b2b2dd24SShri Abhyankar bi[i+1] = bi[i] + nzi; 242b2b2dd24SShri Abhyankar im[i] = nzi; 243b2b2dd24SShri Abhyankar 244b2b2dd24SShri Abhyankar /* mark bdiag */ 245b2b2dd24SShri Abhyankar nzbd = 0; 246b2b2dd24SShri Abhyankar nnz = nzi; 247b2b2dd24SShri Abhyankar k = lnk[n]; 248b2b2dd24SShri Abhyankar while (nnz-- && k < i){ 249b2b2dd24SShri Abhyankar nzbd++; 250b2b2dd24SShri Abhyankar k = lnk[k]; 251b2b2dd24SShri Abhyankar } 2522ce24eb6SHong Zhang bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 253b2b2dd24SShri Abhyankar 254b2b2dd24SShri Abhyankar /* if free space is not available, make more free space */ 255b2b2dd24SShri Abhyankar if (current_space->local_remaining<nzi) { 256b2b2dd24SShri Abhyankar nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 257b2b2dd24SShri Abhyankar ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 258b2b2dd24SShri Abhyankar reallocs++; 259b2b2dd24SShri Abhyankar } 260b2b2dd24SShri Abhyankar 261b2b2dd24SShri Abhyankar /* copy data into free space, then initialize lnk */ 262b2b2dd24SShri Abhyankar ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 263b2b2dd24SShri Abhyankar bi_ptr[i] = current_space->array; 264b2b2dd24SShri Abhyankar current_space->array += nzi; 265b2b2dd24SShri Abhyankar current_space->local_used += nzi; 266b2b2dd24SShri Abhyankar current_space->local_remaining -= nzi; 267b2b2dd24SShri Abhyankar } 268b2b2dd24SShri Abhyankar #if defined(PETSC_USE_INFO) 269b2b2dd24SShri Abhyankar if (ai[n] != 0) { 270b2b2dd24SShri Abhyankar PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 271b2b2dd24SShri Abhyankar ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 272b2b2dd24SShri Abhyankar ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 273b2b2dd24SShri Abhyankar ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 274b2b2dd24SShri Abhyankar ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 275b2b2dd24SShri Abhyankar } else { 276b2b2dd24SShri Abhyankar ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 277b2b2dd24SShri Abhyankar } 278b2b2dd24SShri Abhyankar #endif 279b2b2dd24SShri Abhyankar 280b2b2dd24SShri Abhyankar ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 281b2b2dd24SShri Abhyankar ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 282b2b2dd24SShri Abhyankar 283b2b2dd24SShri Abhyankar /* destroy list of free space and other temporary array(s) */ 284b2b2dd24SShri Abhyankar ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2852ce24eb6SHong Zhang ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 286b2b2dd24SShri Abhyankar ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 287b2b2dd24SShri Abhyankar ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 288b2b2dd24SShri Abhyankar 289b2b2dd24SShri Abhyankar /* put together the new matrix */ 290b2b2dd24SShri Abhyankar ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 291b2b2dd24SShri Abhyankar ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 292b2b2dd24SShri Abhyankar b = (Mat_SeqBAIJ*)(B)->data; 293b2b2dd24SShri Abhyankar b->free_a = PETSC_TRUE; 294b2b2dd24SShri Abhyankar b->free_ij = PETSC_TRUE; 295b2b2dd24SShri Abhyankar b->singlemalloc = PETSC_FALSE; 296b2b2dd24SShri Abhyankar ierr = PetscMalloc((bdiag[0]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 297b2b2dd24SShri Abhyankar b->j = bj; 298b2b2dd24SShri Abhyankar b->i = bi; 299b2b2dd24SShri Abhyankar b->diag = bdiag; 300b2b2dd24SShri Abhyankar b->free_diag = PETSC_TRUE; 301b2b2dd24SShri Abhyankar b->ilen = 0; 302b2b2dd24SShri Abhyankar b->imax = 0; 303b2b2dd24SShri Abhyankar b->row = isrow; 304b2b2dd24SShri Abhyankar b->col = iscol; 305b2b2dd24SShri Abhyankar b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 306b2b2dd24SShri Abhyankar ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 307b2b2dd24SShri Abhyankar ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 308b2b2dd24SShri Abhyankar b->icol = isicol; 309b2b2dd24SShri Abhyankar ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 310b2b2dd24SShri Abhyankar ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr); 311b2b2dd24SShri Abhyankar 312b2b2dd24SShri Abhyankar b->maxnz = b->nz = bdiag[0]+1; 313d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 314b2b2dd24SShri Abhyankar B->info.factor_mallocs = reallocs; 315b2b2dd24SShri Abhyankar B->info.fill_ratio_given = f; 316b2b2dd24SShri Abhyankar 317b2b2dd24SShri Abhyankar if (ai[n] != 0) { 318b2b2dd24SShri Abhyankar B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 319b2b2dd24SShri Abhyankar } else { 320b2b2dd24SShri Abhyankar B->info.fill_ratio_needed = 0.0; 321b2b2dd24SShri Abhyankar } 322b2b2dd24SShri Abhyankar 323b2b2dd24SShri Abhyankar ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 324b2b2dd24SShri Abhyankar ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 325b2b2dd24SShri Abhyankar both_identity = (PetscTruth) (row_identity && col_identity); 3264dd39f65SShri Abhyankar ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); 327b2b2dd24SShri Abhyankar PetscFunctionReturn(0); 328b2b2dd24SShri Abhyankar } 329b2b2dd24SShri Abhyankar 330b2b2dd24SShri Abhyankar #undef __FUNCT__ 33106e38f1dSHong Zhang #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ_inplace" 33206e38f1dSHong Zhang PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 333faca2338SShri Abhyankar { 334faca2338SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 335faca2338SShri Abhyankar PetscInt n=a->mbs,bs = A->rmap->bs,bs2=a->bs2; 336faca2338SShri Abhyankar PetscTruth row_identity,col_identity,both_identity; 337faca2338SShri Abhyankar IS isicol; 338faca2338SShri Abhyankar PetscErrorCode ierr; 339faca2338SShri Abhyankar const PetscInt *r,*ic; 340faca2338SShri Abhyankar PetscInt i,*ai=a->i,*aj=a->j; 341faca2338SShri Abhyankar PetscInt *bi,*bj,*ajtmp; 342faca2338SShri Abhyankar PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 343faca2338SShri Abhyankar PetscReal f; 344faca2338SShri Abhyankar PetscInt nlnk,*lnk,k,**bi_ptr; 345faca2338SShri Abhyankar PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 346faca2338SShri Abhyankar PetscBT lnkbt; 347faca2338SShri Abhyankar 348faca2338SShri Abhyankar PetscFunctionBegin; 349e32f2f54SBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 350faca2338SShri Abhyankar ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 351faca2338SShri Abhyankar ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 352faca2338SShri Abhyankar ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 353faca2338SShri Abhyankar 354bc74c81fSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 355bc74c81fSJed Brown ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 356bc74c81fSJed Brown ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 357bc74c81fSJed Brown 358a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 359c7272abeSHong Zhang 360c7272abeSHong Zhang /* linked list for storing column indices of the active row */ 361c7272abeSHong Zhang nlnk = n + 1; 362c7272abeSHong Zhang ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 363c7272abeSHong Zhang 364c7272abeSHong Zhang ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 365c7272abeSHong Zhang 366c7272abeSHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 367c7272abeSHong Zhang f = info->fill; 368c7272abeSHong Zhang ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 369c7272abeSHong Zhang current_space = free_space; 37083287d42SBarry Smith 37183287d42SBarry Smith for (i=0; i<n; i++) { 372c7272abeSHong Zhang /* copy previous fill into linked list */ 37383287d42SBarry Smith nzi = 0; 374c7272abeSHong Zhang nnz = ai[r[i]+1] - ai[r[i]]; 375e32f2f54SBarry Smith if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 376c7272abeSHong Zhang ajtmp = aj + ai[r[i]]; 377c7272abeSHong Zhang ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 378c7272abeSHong Zhang nzi += nlnk; 379c7272abeSHong Zhang 380c7272abeSHong Zhang /* add pivot rows into linked list */ 381c7272abeSHong Zhang row = lnk[n]; 382c7272abeSHong Zhang while (row < i) { 383c7272abeSHong Zhang nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 384c7272abeSHong Zhang ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 385c7272abeSHong Zhang ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 386c7272abeSHong Zhang nzi += nlnk; 387c7272abeSHong Zhang row = lnk[row]; 38883287d42SBarry Smith } 389c7272abeSHong Zhang bi[i+1] = bi[i] + nzi; 390c7272abeSHong Zhang im[i] = nzi; 391c7272abeSHong Zhang 392c7272abeSHong Zhang /* mark bdiag */ 393c7272abeSHong Zhang nzbd = 0; 394c7272abeSHong Zhang nnz = nzi; 395c7272abeSHong Zhang k = lnk[n]; 396c7272abeSHong Zhang while (nnz-- && k < i){ 397c7272abeSHong Zhang nzbd++; 398c7272abeSHong Zhang k = lnk[k]; 399c7272abeSHong Zhang } 400c7272abeSHong Zhang bdiag[i] = bi[i] + nzbd; 401c7272abeSHong Zhang 402c7272abeSHong Zhang /* if free space is not available, make more free space */ 403c7272abeSHong Zhang if (current_space->local_remaining<nzi) { 404c7272abeSHong Zhang nnz = (n - i)*nzi; /* estimated and max additional space needed */ 405c7272abeSHong Zhang ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 406c7272abeSHong Zhang reallocs++; 40783287d42SBarry Smith } 40883287d42SBarry Smith 409c7272abeSHong Zhang /* copy data into free space, then initialize lnk */ 410c7272abeSHong Zhang ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 411c7272abeSHong Zhang bi_ptr[i] = current_space->array; 412c7272abeSHong Zhang current_space->array += nzi; 413c7272abeSHong Zhang current_space->local_used += nzi; 414c7272abeSHong Zhang current_space->local_remaining -= nzi; 415c7272abeSHong Zhang } 4166cf91177SBarry Smith #if defined(PETSC_USE_INFO) 41783287d42SBarry Smith if (ai[n] != 0) { 418c7272abeSHong Zhang PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 419ae15b995SBarry Smith ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 420ae15b995SBarry Smith ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 421ae15b995SBarry Smith ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 422ae15b995SBarry Smith ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 42383287d42SBarry Smith } else { 424c7272abeSHong Zhang ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 42583287d42SBarry Smith } 42663ba0a88SBarry Smith #endif 42783287d42SBarry Smith 42883287d42SBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 42983287d42SBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 43083287d42SBarry Smith 431c7272abeSHong Zhang /* destroy list of free space and other temporary array(s) */ 432c7272abeSHong Zhang ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 433c7272abeSHong Zhang ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 434c7272abeSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 435c7272abeSHong Zhang ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 43683287d42SBarry Smith 43783287d42SBarry Smith /* put together the new matrix */ 438719d5645SBarry Smith ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 439719d5645SBarry Smith ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 440719d5645SBarry Smith b = (Mat_SeqBAIJ*)(B)->data; 441e6b907acSBarry Smith b->free_a = PETSC_TRUE; 442e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 443c7272abeSHong Zhang b->singlemalloc = PETSC_FALSE; 444c7272abeSHong Zhang ierr = PetscMalloc((bi[n]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 445c7272abeSHong Zhang b->j = bj; 446c7272abeSHong Zhang b->i = bi; 447c7272abeSHong Zhang b->diag = bdiag; 4487f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 44983287d42SBarry Smith b->ilen = 0; 45083287d42SBarry Smith b->imax = 0; 45183287d42SBarry Smith b->row = isrow; 45283287d42SBarry Smith b->col = iscol; 453d3d32019SBarry Smith b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 45483287d42SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 45583287d42SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 45683287d42SBarry Smith b->icol = isicol; 45787828ca2SBarry Smith ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 458c7272abeSHong Zhang ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr); 45983287d42SBarry Smith 460c7272abeSHong Zhang b->maxnz = b->nz = bi[n] ; 461d5f3da31SBarry Smith (B)->factortype = MAT_FACTOR_LU; 462719d5645SBarry Smith (B)->info.factor_mallocs = reallocs; 463719d5645SBarry Smith (B)->info.fill_ratio_given = f; 464c7272abeSHong Zhang 46583287d42SBarry Smith if (ai[n] != 0) { 466c7272abeSHong Zhang (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 46783287d42SBarry Smith } else { 468719d5645SBarry Smith (B)->info.fill_ratio_needed = 0.0; 46983287d42SBarry Smith } 470c7272abeSHong Zhang 471db4efbfdSBarry Smith ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 472db4efbfdSBarry Smith ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 4737d18ce8fSMatthew Knepley both_identity = (PetscTruth) (row_identity && col_identity); 4748b1456e3SHong Zhang ierr = MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);CHKERRQ(ierr); 47583287d42SBarry Smith PetscFunctionReturn(0); 47683287d42SBarry Smith } 477db4efbfdSBarry Smith 478