1be1d678aSKris Buschelman 283287d42SBarry Smith /* 383287d42SBarry Smith Factorization code for BAIJ format. 483287d42SBarry Smith */ 5c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> 6af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 783287d42SBarry Smith 8db4efbfdSBarry Smith /* 9db4efbfdSBarry Smith This is used to set the numeric factorization for both LU and ILU symbolic factorization 10db4efbfdSBarry Smith */ 11ace3abfcSBarry Smith PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool natural) 12ae3d28f0SHong Zhang { 13ae3d28f0SHong Zhang PetscFunctionBegin; 146929473cSShri Abhyankar if (natural) { 156929473cSShri Abhyankar switch (fact->rmap->bs) { 16048b5e81SShri Abhyankar case 1: 17048b5e81SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 18048b5e81SShri Abhyankar break; 196929473cSShri Abhyankar case 2: 204dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 216929473cSShri Abhyankar break; 226929473cSShri Abhyankar case 3: 234dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 246929473cSShri Abhyankar break; 256929473cSShri Abhyankar case 4: 264dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 276929473cSShri Abhyankar break; 286929473cSShri Abhyankar case 5: 294dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 306929473cSShri Abhyankar break; 316929473cSShri Abhyankar case 6: 324dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 336929473cSShri Abhyankar break; 346929473cSShri Abhyankar case 7: 354dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 366929473cSShri Abhyankar break; 3796e086a2SDaniel Kokron case 9: 38*5f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3996e086a2SDaniel Kokron fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering; 40aee5c371SBarry Smith #else 41aee5c371SBarry Smith fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 42aee5c371SBarry Smith #endif 4396e086a2SDaniel Kokron break; 4429a97285SShri Abhyankar case 15: 4529a97285SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering; 4629a97285SShri Abhyankar break; 476929473cSShri Abhyankar default: 484dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 496929473cSShri Abhyankar break; 506929473cSShri Abhyankar } 516ba06ab7SHong Zhang } else { 52ae3d28f0SHong Zhang switch (fact->rmap->bs) { 53048b5e81SShri Abhyankar case 1: 54048b5e81SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 55048b5e81SShri Abhyankar break; 56ae3d28f0SHong Zhang case 2: 574dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 58ae3d28f0SHong Zhang break; 59ae3d28f0SHong Zhang case 3: 604dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 61ae3d28f0SHong Zhang break; 62ae3d28f0SHong Zhang case 4: 634dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 64ae3d28f0SHong Zhang break; 65ae3d28f0SHong Zhang case 5: 664dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 67ae3d28f0SHong Zhang break; 68ae3d28f0SHong Zhang case 6: 694dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 70ae3d28f0SHong Zhang break; 71ae3d28f0SHong Zhang case 7: 724dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 73ae3d28f0SHong Zhang break; 74ae3d28f0SHong Zhang default: 754dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 76ae3d28f0SHong Zhang break; 77ae3d28f0SHong Zhang } 786929473cSShri Abhyankar } 79ae3d28f0SHong Zhang PetscFunctionReturn(0); 80ae3d28f0SHong Zhang } 81ae3d28f0SHong Zhang 82ace3abfcSBarry Smith PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool natural) 83db4efbfdSBarry Smith { 84db4efbfdSBarry Smith PetscFunctionBegin; 85db4efbfdSBarry Smith if (natural) { 86db4efbfdSBarry Smith switch (inA->rmap->bs) { 87db4efbfdSBarry Smith case 1: 8806e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 89db4efbfdSBarry Smith break; 90db4efbfdSBarry Smith case 2: 9106e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace; 92db4efbfdSBarry Smith break; 93db4efbfdSBarry Smith case 3: 9406e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace; 95db4efbfdSBarry Smith break; 96db4efbfdSBarry Smith case 4: 97ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 98db4efbfdSBarry Smith { 99ace3abfcSBarry Smith PetscBool sse_enabled_local; 100db4efbfdSBarry Smith PetscErrorCode ierr; 1010298fd71SBarry Smith ierr = PetscSSEIsEnabled(inA->comm,&sse_enabled_local,NULL);CHKERRQ(ierr); 102db4efbfdSBarry Smith if (sse_enabled_local) { 103db4efbfdSBarry Smith # if defined(PETSC_HAVE_SSE) 104db4efbfdSBarry Smith int i,*AJ=a->j,nz=a->nz,n=a->mbs; 105db4efbfdSBarry Smith if (n==(unsigned short)n) { 106db4efbfdSBarry Smith unsigned short *aj=(unsigned short*)AJ; 10726fbe8dcSKarl Rupp for (i=0; i<nz; i++) aj[i] = (unsigned short)AJ[i]; 10826fbe8dcSKarl Rupp 109db4efbfdSBarry Smith inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj; 110db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj; 11126fbe8dcSKarl Rupp 112db4efbfdSBarry Smith ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");CHKERRQ(ierr); 113db4efbfdSBarry Smith } else { 114db4efbfdSBarry Smith /* Scale the column indices for easier indexing in MatSolve. */ 115db4efbfdSBarry Smith /* for (i=0;i<nz;i++) { */ 116db4efbfdSBarry Smith /* AJ[i] = AJ[i]*4; */ 117db4efbfdSBarry Smith /* } */ 118db4efbfdSBarry Smith inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE; 119db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 12026fbe8dcSKarl Rupp 121db4efbfdSBarry Smith ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");CHKERRQ(ierr); 122db4efbfdSBarry Smith } 123db4efbfdSBarry Smith # else 124db4efbfdSBarry Smith /* This should never be reached. If so, problem in PetscSSEIsEnabled. */ 125e32f2f54SBarry Smith SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable"); 126db4efbfdSBarry Smith # endif 127db4efbfdSBarry Smith } else { 12806e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 129db4efbfdSBarry Smith } 130db4efbfdSBarry Smith } 131db4efbfdSBarry Smith #else 13206e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 133db4efbfdSBarry Smith #endif 134db4efbfdSBarry Smith break; 135db4efbfdSBarry Smith case 5: 13606e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace; 137db4efbfdSBarry Smith break; 138db4efbfdSBarry Smith case 6: 13906e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace; 140db4efbfdSBarry Smith break; 141db4efbfdSBarry Smith case 7: 14206e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace; 143db4efbfdSBarry Smith break; 144db4efbfdSBarry Smith default: 14506e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 146db4efbfdSBarry Smith break; 147db4efbfdSBarry Smith } 148db4efbfdSBarry Smith } else { 149db4efbfdSBarry Smith switch (inA->rmap->bs) { 150db4efbfdSBarry Smith case 1: 15106e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 152db4efbfdSBarry Smith break; 153db4efbfdSBarry Smith case 2: 15406e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace; 155db4efbfdSBarry Smith break; 156db4efbfdSBarry Smith case 3: 15706e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace; 158db4efbfdSBarry Smith break; 159db4efbfdSBarry Smith case 4: 16006e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace; 161db4efbfdSBarry Smith break; 162db4efbfdSBarry Smith case 5: 16306e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace; 164db4efbfdSBarry Smith break; 165db4efbfdSBarry Smith case 6: 16606e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace; 167db4efbfdSBarry Smith break; 168db4efbfdSBarry Smith case 7: 16906e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace; 170db4efbfdSBarry Smith break; 171db4efbfdSBarry Smith default: 17206e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 173db4efbfdSBarry Smith break; 174db4efbfdSBarry Smith } 175db4efbfdSBarry Smith } 176db4efbfdSBarry Smith PetscFunctionReturn(0); 177db4efbfdSBarry Smith } 178db4efbfdSBarry Smith 17983287d42SBarry Smith /* 18083287d42SBarry Smith The symbolic factorization code is identical to that for AIJ format, 18183287d42SBarry Smith except for very small changes since this is now a SeqBAIJ datastructure. 18283287d42SBarry Smith NOT good code reuse. 18383287d42SBarry Smith */ 184c6db04a5SJed Brown #include <petscbt.h> 185c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 186c7272abeSHong Zhang 1874dd39f65SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 18883287d42SBarry Smith { 18983287d42SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 190c7272abeSHong Zhang PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2; 191ace3abfcSBarry Smith PetscBool row_identity,col_identity,both_identity; 19283287d42SBarry Smith IS isicol; 1936849ba73SBarry Smith PetscErrorCode ierr; 194c7272abeSHong Zhang const PetscInt *r,*ic; 195c7272abeSHong Zhang PetscInt i,*ai=a->i,*aj=a->j; 196c7272abeSHong Zhang PetscInt *bi,*bj,*ajtmp; 197c7272abeSHong Zhang PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 198c7272abeSHong Zhang PetscReal f; 199c7272abeSHong Zhang PetscInt nlnk,*lnk,k,**bi_ptr; 2000298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 201c7272abeSHong Zhang PetscBT lnkbt; 202ece542f9SHong Zhang PetscBool missing; 20383287d42SBarry Smith 20483287d42SBarry Smith PetscFunctionBegin; 205e32f2f54SBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 206ece542f9SHong Zhang ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 207ece542f9SHong Zhang if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 208ece542f9SHong Zhang 2096ba06ab7SHong Zhang if (bs>1) { /* check shifttype */ 210f23aa3ddSBarry Smith if (info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix"); 2116ba06ab7SHong Zhang } 2126ba06ab7SHong Zhang 21383287d42SBarry Smith ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 21483287d42SBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 21583287d42SBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 21683287d42SBarry Smith 217bc74c81fSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 218854ce69bSBarry Smith ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 219854ce69bSBarry Smith ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 220a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 221b2b2dd24SShri Abhyankar 222b2b2dd24SShri Abhyankar /* linked list for storing column indices of the active row */ 223b2b2dd24SShri Abhyankar nlnk = n + 1; 224b2b2dd24SShri Abhyankar ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 225b2b2dd24SShri Abhyankar 226dcca6d9dSJed Brown ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 227b2b2dd24SShri Abhyankar 228b2b2dd24SShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 229b2b2dd24SShri Abhyankar f = info->fill; 230f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 23126fbe8dcSKarl Rupp 232b2b2dd24SShri Abhyankar current_space = free_space; 233b2b2dd24SShri Abhyankar 234b2b2dd24SShri Abhyankar for (i=0; i<n; i++) { 235b2b2dd24SShri Abhyankar /* copy previous fill into linked list */ 236b2b2dd24SShri Abhyankar nzi = 0; 237b2b2dd24SShri Abhyankar nnz = ai[r[i]+1] - ai[r[i]]; 238b2b2dd24SShri Abhyankar ajtmp = aj + ai[r[i]]; 239b2b2dd24SShri Abhyankar ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 240b2b2dd24SShri Abhyankar nzi += nlnk; 241b2b2dd24SShri Abhyankar 242b2b2dd24SShri Abhyankar /* add pivot rows into linked list */ 243b2b2dd24SShri Abhyankar row = lnk[n]; 244b2b2dd24SShri Abhyankar while (row < i) { 245b2b2dd24SShri Abhyankar nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 246b2b2dd24SShri Abhyankar ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 247b2b2dd24SShri Abhyankar ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 248b2b2dd24SShri Abhyankar nzi += nlnk; 249b2b2dd24SShri Abhyankar row = lnk[row]; 250b2b2dd24SShri Abhyankar } 251b2b2dd24SShri Abhyankar bi[i+1] = bi[i] + nzi; 252b2b2dd24SShri Abhyankar im[i] = nzi; 253b2b2dd24SShri Abhyankar 254b2b2dd24SShri Abhyankar /* mark bdiag */ 255b2b2dd24SShri Abhyankar nzbd = 0; 256b2b2dd24SShri Abhyankar nnz = nzi; 257b2b2dd24SShri Abhyankar k = lnk[n]; 258b2b2dd24SShri Abhyankar while (nnz-- && k < i) { 259b2b2dd24SShri Abhyankar nzbd++; 260b2b2dd24SShri Abhyankar k = lnk[k]; 261b2b2dd24SShri Abhyankar } 2622ce24eb6SHong Zhang bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 263b2b2dd24SShri Abhyankar 264b2b2dd24SShri Abhyankar /* if free space is not available, make more free space */ 265b2b2dd24SShri Abhyankar if (current_space->local_remaining<nzi) { 266f91af8c7SBarry Smith nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n - i,nzi)); /* estimated and max additional space needed */ 267b2b2dd24SShri Abhyankar ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 268b2b2dd24SShri Abhyankar reallocs++; 269b2b2dd24SShri Abhyankar } 270b2b2dd24SShri Abhyankar 271b2b2dd24SShri Abhyankar /* copy data into free space, then initialize lnk */ 272b2b2dd24SShri Abhyankar ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 27326fbe8dcSKarl Rupp 274b2b2dd24SShri Abhyankar bi_ptr[i] = current_space->array; 275b2b2dd24SShri Abhyankar current_space->array += nzi; 276b2b2dd24SShri Abhyankar current_space->local_used += nzi; 277b2b2dd24SShri Abhyankar current_space->local_remaining -= nzi; 278b2b2dd24SShri Abhyankar } 279b2b2dd24SShri Abhyankar 280b2b2dd24SShri Abhyankar ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 281b2b2dd24SShri Abhyankar ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 282b2b2dd24SShri Abhyankar 2839263d837SHong Zhang /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 284854ce69bSBarry Smith ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 2852ce24eb6SHong Zhang ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 286b2b2dd24SShri Abhyankar ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 287b2b2dd24SShri Abhyankar ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 288b2b2dd24SShri Abhyankar 289b2b2dd24SShri Abhyankar /* put together the new matrix */ 290367daffbSBarry Smith ierr = MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 2913bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 292b2b2dd24SShri Abhyankar b = (Mat_SeqBAIJ*)(B)->data; 29326fbe8dcSKarl Rupp 294b2b2dd24SShri Abhyankar b->free_a = PETSC_TRUE; 295b2b2dd24SShri Abhyankar b->free_ij = PETSC_TRUE; 296b2b2dd24SShri Abhyankar b->singlemalloc = PETSC_FALSE; 29726fbe8dcSKarl Rupp 298854ce69bSBarry Smith ierr = PetscMalloc1((bdiag[0]+1)*bs2,&b->a);CHKERRQ(ierr); 299b2b2dd24SShri Abhyankar b->j = bj; 300b2b2dd24SShri Abhyankar b->i = bi; 301b2b2dd24SShri Abhyankar b->diag = bdiag; 302b2b2dd24SShri Abhyankar b->free_diag = PETSC_TRUE; 303b2b2dd24SShri Abhyankar b->ilen = 0; 304b2b2dd24SShri Abhyankar b->imax = 0; 305b2b2dd24SShri Abhyankar b->row = isrow; 306b2b2dd24SShri Abhyankar b->col = iscol; 307b2b2dd24SShri Abhyankar b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 30826fbe8dcSKarl Rupp 309b2b2dd24SShri Abhyankar ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 310b2b2dd24SShri Abhyankar ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 311b2b2dd24SShri Abhyankar b->icol = isicol; 312854ce69bSBarry Smith ierr = PetscMalloc1(bs*n+bs,&b->solve_work);CHKERRQ(ierr); 3133bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr); 314b2b2dd24SShri Abhyankar 315b2b2dd24SShri Abhyankar b->maxnz = b->nz = bdiag[0]+1; 31626fbe8dcSKarl Rupp 317d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 318b2b2dd24SShri Abhyankar B->info.factor_mallocs = reallocs; 319b2b2dd24SShri Abhyankar B->info.fill_ratio_given = f; 320b2b2dd24SShri Abhyankar 321b2b2dd24SShri Abhyankar if (ai[n] != 0) { 322b2b2dd24SShri Abhyankar B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 323b2b2dd24SShri Abhyankar } else { 324b2b2dd24SShri Abhyankar B->info.fill_ratio_needed = 0.0; 325b2b2dd24SShri Abhyankar } 3269263d837SHong Zhang #if defined(PETSC_USE_INFO) 3279263d837SHong Zhang if (ai[n] != 0) { 3289263d837SHong Zhang PetscReal af = B->info.fill_ratio_needed; 32957622a8eSBarry Smith ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 33057622a8eSBarry Smith ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 33157622a8eSBarry Smith ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 3329263d837SHong Zhang ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 3339263d837SHong Zhang } else { 3349263d837SHong Zhang ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 3359263d837SHong Zhang } 3369263d837SHong Zhang #endif 337b2b2dd24SShri Abhyankar 338b2b2dd24SShri Abhyankar ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 339b2b2dd24SShri Abhyankar ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 34026fbe8dcSKarl Rupp 341ace3abfcSBarry Smith both_identity = (PetscBool) (row_identity && col_identity); 34226fbe8dcSKarl Rupp 3434dd39f65SShri Abhyankar ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); 344b2b2dd24SShri Abhyankar PetscFunctionReturn(0); 345b2b2dd24SShri Abhyankar } 346b2b2dd24SShri Abhyankar 34706e38f1dSHong Zhang PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 348faca2338SShri Abhyankar { 349faca2338SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 350faca2338SShri Abhyankar PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2; 351ace3abfcSBarry Smith PetscBool row_identity,col_identity,both_identity; 352faca2338SShri Abhyankar IS isicol; 353faca2338SShri Abhyankar PetscErrorCode ierr; 354faca2338SShri Abhyankar const PetscInt *r,*ic; 355faca2338SShri Abhyankar PetscInt i,*ai=a->i,*aj=a->j; 356faca2338SShri Abhyankar PetscInt *bi,*bj,*ajtmp; 357faca2338SShri Abhyankar PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 358faca2338SShri Abhyankar PetscReal f; 359faca2338SShri Abhyankar PetscInt nlnk,*lnk,k,**bi_ptr; 3600298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 361faca2338SShri Abhyankar PetscBT lnkbt; 362ece542f9SHong Zhang PetscBool missing; 363faca2338SShri Abhyankar 364faca2338SShri Abhyankar PetscFunctionBegin; 365e32f2f54SBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 366ece542f9SHong Zhang ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 367ece542f9SHong Zhang if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 368ece542f9SHong Zhang 369faca2338SShri Abhyankar ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 370faca2338SShri Abhyankar ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 371faca2338SShri Abhyankar ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 372faca2338SShri Abhyankar 373bc74c81fSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 374854ce69bSBarry Smith ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); 375854ce69bSBarry Smith ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); 376bc74c81fSJed Brown 377a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 378c7272abeSHong Zhang 379c7272abeSHong Zhang /* linked list for storing column indices of the active row */ 380c7272abeSHong Zhang nlnk = n + 1; 381c7272abeSHong Zhang ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 382c7272abeSHong Zhang 383dcca6d9dSJed Brown ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 384c7272abeSHong Zhang 385c7272abeSHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 386c7272abeSHong Zhang f = info->fill; 387f91af8c7SBarry Smith ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); 388c7272abeSHong Zhang current_space = free_space; 38983287d42SBarry Smith 39083287d42SBarry Smith for (i=0; i<n; i++) { 391c7272abeSHong Zhang /* copy previous fill into linked list */ 39283287d42SBarry Smith nzi = 0; 393c7272abeSHong Zhang nnz = ai[r[i]+1] - ai[r[i]]; 394c7272abeSHong Zhang ajtmp = aj + ai[r[i]]; 395c7272abeSHong Zhang ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 396c7272abeSHong Zhang nzi += nlnk; 397c7272abeSHong Zhang 398c7272abeSHong Zhang /* add pivot rows into linked list */ 399c7272abeSHong Zhang row = lnk[n]; 400c7272abeSHong Zhang while (row < i) { 401c7272abeSHong Zhang nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 402c7272abeSHong Zhang ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 403c7272abeSHong Zhang ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 404c7272abeSHong Zhang nzi += nlnk; 405c7272abeSHong Zhang row = lnk[row]; 40683287d42SBarry Smith } 407c7272abeSHong Zhang bi[i+1] = bi[i] + nzi; 408c7272abeSHong Zhang im[i] = nzi; 409c7272abeSHong Zhang 410c7272abeSHong Zhang /* mark bdiag */ 411c7272abeSHong Zhang nzbd = 0; 412c7272abeSHong Zhang nnz = nzi; 413c7272abeSHong Zhang k = lnk[n]; 414c7272abeSHong Zhang while (nnz-- && k < i) { 415c7272abeSHong Zhang nzbd++; 416c7272abeSHong Zhang k = lnk[k]; 417c7272abeSHong Zhang } 418c7272abeSHong Zhang bdiag[i] = bi[i] + nzbd; 419c7272abeSHong Zhang 420c7272abeSHong Zhang /* if free space is not available, make more free space */ 421c7272abeSHong Zhang if (current_space->local_remaining<nzi) { 422f91af8c7SBarry Smith nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */ 423c7272abeSHong Zhang ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 424c7272abeSHong Zhang reallocs++; 42583287d42SBarry Smith } 42683287d42SBarry Smith 427c7272abeSHong Zhang /* copy data into free space, then initialize lnk */ 428c7272abeSHong Zhang ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 42926fbe8dcSKarl Rupp 430c7272abeSHong Zhang bi_ptr[i] = current_space->array; 431c7272abeSHong Zhang current_space->array += nzi; 432c7272abeSHong Zhang current_space->local_used += nzi; 433c7272abeSHong Zhang current_space->local_remaining -= nzi; 434c7272abeSHong Zhang } 4356cf91177SBarry Smith #if defined(PETSC_USE_INFO) 43683287d42SBarry Smith if (ai[n] != 0) { 437c7272abeSHong Zhang PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 43857622a8eSBarry Smith ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 43957622a8eSBarry Smith ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 44057622a8eSBarry Smith ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 441ae15b995SBarry Smith ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 44283287d42SBarry Smith } else { 443c7272abeSHong Zhang ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 44483287d42SBarry Smith } 44563ba0a88SBarry Smith #endif 44683287d42SBarry Smith 44783287d42SBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 44883287d42SBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 44983287d42SBarry Smith 450c7272abeSHong Zhang /* destroy list of free space and other temporary array(s) */ 451854ce69bSBarry Smith ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); 452c7272abeSHong Zhang ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 453c7272abeSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 454c7272abeSHong Zhang ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 45583287d42SBarry Smith 45683287d42SBarry Smith /* put together the new matrix */ 457367daffbSBarry Smith ierr = MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 4583bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 459719d5645SBarry Smith b = (Mat_SeqBAIJ*)(B)->data; 46026fbe8dcSKarl Rupp 461e6b907acSBarry Smith b->free_a = PETSC_TRUE; 462e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 463c7272abeSHong Zhang b->singlemalloc = PETSC_FALSE; 46426fbe8dcSKarl Rupp 465854ce69bSBarry Smith ierr = PetscMalloc1((bi[n]+1)*bs2,&b->a);CHKERRQ(ierr); 466c7272abeSHong Zhang b->j = bj; 467c7272abeSHong Zhang b->i = bi; 468c7272abeSHong Zhang b->diag = bdiag; 4697f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 47083287d42SBarry Smith b->ilen = 0; 47183287d42SBarry Smith b->imax = 0; 47283287d42SBarry Smith b->row = isrow; 47383287d42SBarry Smith b->col = iscol; 474d3d32019SBarry Smith b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 47526fbe8dcSKarl Rupp 47683287d42SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 47783287d42SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 47883287d42SBarry Smith b->icol = isicol; 47926fbe8dcSKarl Rupp 480854ce69bSBarry Smith ierr = PetscMalloc1(bs*n+bs,&b->solve_work);CHKERRQ(ierr); 4813bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr); 48283287d42SBarry Smith 483c7272abeSHong Zhang b->maxnz = b->nz = bi[n]; 48426fbe8dcSKarl Rupp 485d5f3da31SBarry Smith (B)->factortype = MAT_FACTOR_LU; 486719d5645SBarry Smith (B)->info.factor_mallocs = reallocs; 487719d5645SBarry Smith (B)->info.fill_ratio_given = f; 488c7272abeSHong Zhang 48983287d42SBarry Smith if (ai[n] != 0) { 490c7272abeSHong Zhang (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 49183287d42SBarry Smith } else { 492719d5645SBarry Smith (B)->info.fill_ratio_needed = 0.0; 49383287d42SBarry Smith } 494c7272abeSHong Zhang 495db4efbfdSBarry Smith ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 496db4efbfdSBarry Smith ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 49726fbe8dcSKarl Rupp 498ace3abfcSBarry Smith both_identity = (PetscBool) (row_identity && col_identity); 49926fbe8dcSKarl Rupp 5008b1456e3SHong Zhang ierr = MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);CHKERRQ(ierr); 50183287d42SBarry Smith PetscFunctionReturn(0); 50283287d42SBarry Smith } 503db4efbfdSBarry Smith 504