1be1d678aSKris Buschelman #define PETSCMAT_DLL 2be1d678aSKris Buschelman 383287d42SBarry Smith /* 483287d42SBarry Smith Factorization code for BAIJ format. 583287d42SBarry Smith */ 67c4f633dSBarry Smith #include "../src/mat/impls/baij/seq/baij.h" 7c60f0209SBarry Smith #include "../src/mat/blockinvert.h" 883287d42SBarry Smith 9db4efbfdSBarry Smith #undef __FUNCT__ 10db4efbfdSBarry Smith #define __FUNCT__ "MatSeqBAIJSetNumericFactorization" 11db4efbfdSBarry Smith /* 12db4efbfdSBarry Smith This is used to set the numeric factorization for both LU and ILU symbolic factorization 13db4efbfdSBarry Smith */ 14db4efbfdSBarry Smith PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat inA,PetscTruth natural) 15db4efbfdSBarry Smith { 16db4efbfdSBarry Smith PetscFunctionBegin; 17db4efbfdSBarry Smith if (natural) { 18db4efbfdSBarry Smith switch (inA->rmap->bs) { 19db4efbfdSBarry Smith case 1: 20db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 21db4efbfdSBarry Smith break; 22db4efbfdSBarry Smith case 2: 23db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 24db4efbfdSBarry Smith break; 25db4efbfdSBarry Smith case 3: 26db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 27db4efbfdSBarry Smith break; 28db4efbfdSBarry Smith case 4: 2965460251SBarry Smith #if defined(PETSC_USE_SCALAR_MAT_SINGLE) 30db4efbfdSBarry Smith { 31db4efbfdSBarry Smith PetscTruth sse_enabled_local; 32db4efbfdSBarry Smith PetscErrorCode ierr; 33db4efbfdSBarry Smith ierr = PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);CHKERRQ(ierr); 34db4efbfdSBarry Smith if (sse_enabled_local) { 35db4efbfdSBarry Smith # if defined(PETSC_HAVE_SSE) 36db4efbfdSBarry Smith int i,*AJ=a->j,nz=a->nz,n=a->mbs; 37db4efbfdSBarry Smith if (n==(unsigned short)n) { 38db4efbfdSBarry Smith unsigned short *aj=(unsigned short *)AJ; 39db4efbfdSBarry Smith for (i=0;i<nz;i++) { 40db4efbfdSBarry Smith aj[i] = (unsigned short)AJ[i]; 41db4efbfdSBarry Smith } 42db4efbfdSBarry Smith inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj; 43db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj; 44db4efbfdSBarry Smith ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");CHKERRQ(ierr); 45db4efbfdSBarry Smith } else { 46db4efbfdSBarry Smith /* Scale the column indices for easier indexing in MatSolve. */ 47db4efbfdSBarry Smith /* for (i=0;i<nz;i++) { */ 48db4efbfdSBarry Smith /* AJ[i] = AJ[i]*4; */ 49db4efbfdSBarry Smith /* } */ 50db4efbfdSBarry Smith inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE; 51db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 52db4efbfdSBarry Smith ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");CHKERRQ(ierr); 53db4efbfdSBarry Smith } 54db4efbfdSBarry Smith # else 55db4efbfdSBarry Smith /* This should never be reached. If so, problem in PetscSSEIsEnabled. */ 56db4efbfdSBarry Smith SETERRQ(PETSC_ERR_SUP,"SSE Hardware unavailable"); 57db4efbfdSBarry Smith # endif 58db4efbfdSBarry Smith } else { 59db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 60db4efbfdSBarry Smith } 61db4efbfdSBarry Smith } 62db4efbfdSBarry Smith #else 63db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 64db4efbfdSBarry Smith #endif 65db4efbfdSBarry Smith break; 66db4efbfdSBarry Smith case 5: 67db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 68db4efbfdSBarry Smith break; 69db4efbfdSBarry Smith case 6: 70db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 71db4efbfdSBarry Smith break; 72db4efbfdSBarry Smith case 7: 73db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 74db4efbfdSBarry Smith break; 75db4efbfdSBarry Smith default: 76db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 77db4efbfdSBarry Smith break; 78db4efbfdSBarry Smith } 79db4efbfdSBarry Smith } else { 80db4efbfdSBarry Smith switch (inA->rmap->bs) { 81db4efbfdSBarry Smith case 1: 82db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 83db4efbfdSBarry Smith break; 84db4efbfdSBarry Smith case 2: 85db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 86db4efbfdSBarry Smith break; 87db4efbfdSBarry Smith case 3: 88db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 89db4efbfdSBarry Smith break; 90db4efbfdSBarry Smith case 4: 91db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 92db4efbfdSBarry Smith break; 93db4efbfdSBarry Smith case 5: 94db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 95db4efbfdSBarry Smith break; 96db4efbfdSBarry Smith case 6: 97db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 98db4efbfdSBarry Smith break; 99db4efbfdSBarry Smith case 7: 100db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 101db4efbfdSBarry Smith break; 102db4efbfdSBarry Smith default: 103db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 104db4efbfdSBarry Smith break; 105db4efbfdSBarry Smith } 106db4efbfdSBarry Smith } 107db4efbfdSBarry Smith PetscFunctionReturn(0); 108db4efbfdSBarry Smith } 109db4efbfdSBarry Smith 11083287d42SBarry Smith /* 11183287d42SBarry Smith The symbolic factorization code is identical to that for AIJ format, 11283287d42SBarry Smith except for very small changes since this is now a SeqBAIJ datastructure. 11383287d42SBarry Smith NOT good code reuse. 11483287d42SBarry Smith */ 115c7272abeSHong Zhang #include "petscbt.h" 116c7272abeSHong Zhang #include "../src/mat/utils/freespace.h" 117c7272abeSHong Zhang 1184a2ae208SSatish Balay #undef __FUNCT__ 119faca2338SShri Abhyankar #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ_newdatastruct" 120faca2338SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_newdatastruct(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 12183287d42SBarry Smith { 12283287d42SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 123c7272abeSHong Zhang PetscInt n=a->mbs,bs = A->rmap->bs,bs2=a->bs2; 124c7272abeSHong Zhang PetscTruth row_identity,col_identity,both_identity; 12583287d42SBarry Smith IS isicol; 1266849ba73SBarry Smith PetscErrorCode ierr; 127c7272abeSHong Zhang const PetscInt *r,*ic; 128c7272abeSHong Zhang PetscInt i,*ai=a->i,*aj=a->j; 129c7272abeSHong Zhang PetscInt *bi,*bj,*ajtmp; 130c7272abeSHong Zhang PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 131c7272abeSHong Zhang PetscReal f; 132c7272abeSHong Zhang PetscInt nlnk,*lnk,k,**bi_ptr; 133c7272abeSHong Zhang PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 134c7272abeSHong Zhang PetscBT lnkbt; 13583287d42SBarry Smith 13683287d42SBarry Smith PetscFunctionBegin; 137d0f46423SBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 13883287d42SBarry Smith ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 13983287d42SBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 14083287d42SBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 14183287d42SBarry Smith 142*a6df321fSShri Abhyankar /* get new row and diagonal pointers */ 143*a6df321fSShri Abhyankar ierr = PetscMalloc2(n+1,PetscInt,&bi,n+1,PetscInt,&bdiag);CHKERRQ(ierr); 144*a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 145b2b2dd24SShri Abhyankar 146b2b2dd24SShri Abhyankar /* linked list for storing column indices of the active row */ 147b2b2dd24SShri Abhyankar nlnk = n + 1; 148b2b2dd24SShri Abhyankar ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 149b2b2dd24SShri Abhyankar 150b2b2dd24SShri Abhyankar ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 151b2b2dd24SShri Abhyankar 152b2b2dd24SShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 153b2b2dd24SShri Abhyankar f = info->fill; 154b2b2dd24SShri Abhyankar ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 155b2b2dd24SShri Abhyankar current_space = free_space; 156b2b2dd24SShri Abhyankar 157b2b2dd24SShri Abhyankar for (i=0; i<n; i++) { 158b2b2dd24SShri Abhyankar /* copy previous fill into linked list */ 159b2b2dd24SShri Abhyankar nzi = 0; 160b2b2dd24SShri Abhyankar nnz = ai[r[i]+1] - ai[r[i]]; 161b2b2dd24SShri Abhyankar if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 162b2b2dd24SShri Abhyankar ajtmp = aj + ai[r[i]]; 163b2b2dd24SShri Abhyankar ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 164b2b2dd24SShri Abhyankar nzi += nlnk; 165b2b2dd24SShri Abhyankar 166b2b2dd24SShri Abhyankar /* add pivot rows into linked list */ 167b2b2dd24SShri Abhyankar row = lnk[n]; 168b2b2dd24SShri Abhyankar while (row < i) { 169b2b2dd24SShri Abhyankar nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 170b2b2dd24SShri Abhyankar ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 171b2b2dd24SShri Abhyankar ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 172b2b2dd24SShri Abhyankar nzi += nlnk; 173b2b2dd24SShri Abhyankar row = lnk[row]; 174b2b2dd24SShri Abhyankar } 175b2b2dd24SShri Abhyankar bi[i+1] = bi[i] + nzi; 176b2b2dd24SShri Abhyankar im[i] = nzi; 177b2b2dd24SShri Abhyankar 178b2b2dd24SShri Abhyankar /* mark bdiag */ 179b2b2dd24SShri Abhyankar nzbd = 0; 180b2b2dd24SShri Abhyankar nnz = nzi; 181b2b2dd24SShri Abhyankar k = lnk[n]; 182b2b2dd24SShri Abhyankar while (nnz-- && k < i){ 183b2b2dd24SShri Abhyankar nzbd++; 184b2b2dd24SShri Abhyankar k = lnk[k]; 185b2b2dd24SShri Abhyankar } 186b2b2dd24SShri Abhyankar bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU_v2() */ 187b2b2dd24SShri Abhyankar 188b2b2dd24SShri Abhyankar /* if free space is not available, make more free space */ 189b2b2dd24SShri Abhyankar if (current_space->local_remaining<nzi) { 190b2b2dd24SShri Abhyankar nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 191b2b2dd24SShri Abhyankar ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 192b2b2dd24SShri Abhyankar reallocs++; 193b2b2dd24SShri Abhyankar } 194b2b2dd24SShri Abhyankar 195b2b2dd24SShri Abhyankar /* copy data into free space, then initialize lnk */ 196b2b2dd24SShri Abhyankar ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 197b2b2dd24SShri Abhyankar bi_ptr[i] = current_space->array; 198b2b2dd24SShri Abhyankar current_space->array += nzi; 199b2b2dd24SShri Abhyankar current_space->local_used += nzi; 200b2b2dd24SShri Abhyankar current_space->local_remaining -= nzi; 201b2b2dd24SShri Abhyankar } 202b2b2dd24SShri Abhyankar #if defined(PETSC_USE_INFO) 203b2b2dd24SShri Abhyankar if (ai[n] != 0) { 204b2b2dd24SShri Abhyankar PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 205b2b2dd24SShri Abhyankar ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 206b2b2dd24SShri Abhyankar ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 207b2b2dd24SShri Abhyankar ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 208b2b2dd24SShri Abhyankar ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 209b2b2dd24SShri Abhyankar } else { 210b2b2dd24SShri Abhyankar ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 211b2b2dd24SShri Abhyankar } 212b2b2dd24SShri Abhyankar #endif 213b2b2dd24SShri Abhyankar 214b2b2dd24SShri Abhyankar ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 215b2b2dd24SShri Abhyankar ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 216b2b2dd24SShri Abhyankar 217b2b2dd24SShri Abhyankar /* destroy list of free space and other temporary array(s) */ 218b2b2dd24SShri Abhyankar ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 219b2b2dd24SShri Abhyankar ierr = PetscFreeSpaceContiguous_LU_v2(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 220b2b2dd24SShri Abhyankar ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 221b2b2dd24SShri Abhyankar ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 222b2b2dd24SShri Abhyankar 223b2b2dd24SShri Abhyankar /* put together the new matrix */ 224b2b2dd24SShri Abhyankar ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 225b2b2dd24SShri Abhyankar ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 226b2b2dd24SShri Abhyankar b = (Mat_SeqBAIJ*)(B)->data; 227b2b2dd24SShri Abhyankar b->free_a = PETSC_TRUE; 228b2b2dd24SShri Abhyankar b->free_ij = PETSC_TRUE; 229b2b2dd24SShri Abhyankar b->singlemalloc = PETSC_FALSE; 230b2b2dd24SShri Abhyankar ierr = PetscMalloc((bdiag[0]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 231b2b2dd24SShri Abhyankar b->j = bj; 232b2b2dd24SShri Abhyankar b->i = bi; 233b2b2dd24SShri Abhyankar b->diag = bdiag; 234b2b2dd24SShri Abhyankar b->free_diag = PETSC_TRUE; 235b2b2dd24SShri Abhyankar b->ilen = 0; 236b2b2dd24SShri Abhyankar b->imax = 0; 237b2b2dd24SShri Abhyankar b->row = isrow; 238b2b2dd24SShri Abhyankar b->col = iscol; 239b2b2dd24SShri Abhyankar b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 240b2b2dd24SShri Abhyankar ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 241b2b2dd24SShri Abhyankar ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 242b2b2dd24SShri Abhyankar b->icol = isicol; 243b2b2dd24SShri Abhyankar ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 244b2b2dd24SShri Abhyankar ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr); 245b2b2dd24SShri Abhyankar 246b2b2dd24SShri Abhyankar b->maxnz = b->nz = bdiag[0]+1; 247b2b2dd24SShri Abhyankar B->factor = MAT_FACTOR_LU; 248b2b2dd24SShri Abhyankar B->info.factor_mallocs = reallocs; 249b2b2dd24SShri Abhyankar B->info.fill_ratio_given = f; 250b2b2dd24SShri Abhyankar 251b2b2dd24SShri Abhyankar if (ai[n] != 0) { 252b2b2dd24SShri Abhyankar B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 253b2b2dd24SShri Abhyankar } else { 254b2b2dd24SShri Abhyankar B->info.fill_ratio_needed = 0.0; 255b2b2dd24SShri Abhyankar } 256b2b2dd24SShri Abhyankar 257b2b2dd24SShri Abhyankar ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 258b2b2dd24SShri Abhyankar ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 259b2b2dd24SShri Abhyankar both_identity = (PetscTruth) (row_identity && col_identity); 260b2b2dd24SShri Abhyankar if(both_identity){ 261b2b2dd24SShri Abhyankar switch(bs){ 262b2b2dd24SShri Abhyankar case 2: 263c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct; 264b2b2dd24SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering_newdatastruct_v2; 265b2b2dd24SShri Abhyankar break; 266b2b2dd24SShri Abhyankar case 3: 267c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_newdatastruct; 268b2b2dd24SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct_v2; 269b2b2dd24SShri Abhyankar break; 270b2b2dd24SShri Abhyankar case 4: 271c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_newdatastruct; 272b2b2dd24SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering_newdatastruct_v2; 273b2b2dd24SShri Abhyankar break; 274b2b2dd24SShri Abhyankar case 5: 275c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_newdatastruct; 27653cca76cSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering_newdatastruct_v2; 277b2b2dd24SShri Abhyankar break; 278b2b2dd24SShri Abhyankar case 6: 279c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_newdatastruct; 28053cca76cSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering_newdatastruct_v2; 281b2b2dd24SShri Abhyankar break; 282b2b2dd24SShri Abhyankar case 7: 283c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_newdatastruct; 28453cca76cSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering_newdatastruct_v2; 285b2b2dd24SShri Abhyankar break; 286b2b2dd24SShri Abhyankar default: 287c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct; 2881a83e813SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct_v2; 289b2b2dd24SShri Abhyankar } 290b2b2dd24SShri Abhyankar } 291b2b2dd24SShri Abhyankar else { 292b2b2dd24SShri Abhyankar switch(bs){ 293b2b2dd24SShri Abhyankar case 2: 294c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_newdatastruct; 2950c4413a7SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_2_newdatastruct_v2; 296b2b2dd24SShri Abhyankar break; 297b2b2dd24SShri Abhyankar case 3: 298c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_newdatastruct; 2990c4413a7SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_3_newdatastruct_v2; 300b2b2dd24SShri Abhyankar break; 301b2b2dd24SShri Abhyankar case 4: 302c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_newdatastruct; 30378bb4007SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_4_newdatastruct_v2; 304b2b2dd24SShri Abhyankar break; 305b2b2dd24SShri Abhyankar case 5: 306c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_newdatastruct; 30778bb4007SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_5_newdatastruct_v2; 308b2b2dd24SShri Abhyankar break; 309b2b2dd24SShri Abhyankar case 6: 310c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_newdatastruct; 3116506fda5SShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_6_newdatastruct_v2; 312b2b2dd24SShri Abhyankar break; 313b2b2dd24SShri Abhyankar case 7: 314c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_newdatastruct; 31535aa4fcfSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_7_newdatastruct_v2; 316b2b2dd24SShri Abhyankar break; 317b2b2dd24SShri Abhyankar default: 318c0c7eb62SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct; 31935aa4fcfSShri Abhyankar B->ops->solve = MatSolve_SeqBAIJ_N_newdatastruct_v2; 320b2b2dd24SShri Abhyankar } 321b2b2dd24SShri Abhyankar } 322b2b2dd24SShri Abhyankar /* ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); */ 323b2b2dd24SShri Abhyankar PetscFunctionReturn(0); 324b2b2dd24SShri Abhyankar } 325b2b2dd24SShri Abhyankar 326b2b2dd24SShri Abhyankar #undef __FUNCT__ 327faca2338SShri Abhyankar #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ" 328faca2338SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 329faca2338SShri Abhyankar { 330faca2338SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 331faca2338SShri Abhyankar PetscInt n=a->mbs,bs = A->rmap->bs,bs2=a->bs2; 332faca2338SShri Abhyankar PetscTruth row_identity,col_identity,both_identity; 333faca2338SShri Abhyankar IS isicol; 334faca2338SShri Abhyankar PetscErrorCode ierr; 335faca2338SShri Abhyankar const PetscInt *r,*ic; 336faca2338SShri Abhyankar PetscInt i,*ai=a->i,*aj=a->j; 337faca2338SShri Abhyankar PetscInt *bi,*bj,*ajtmp; 338faca2338SShri Abhyankar PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 339faca2338SShri Abhyankar PetscReal f; 340faca2338SShri Abhyankar PetscInt nlnk,*lnk,k,**bi_ptr; 341faca2338SShri Abhyankar PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 342faca2338SShri Abhyankar PetscBT lnkbt; 343faca2338SShri Abhyankar PetscTruth newdatastruct=PETSC_FALSE; 344faca2338SShri Abhyankar 345faca2338SShri Abhyankar PetscFunctionBegin; 346faca2338SShri Abhyankar ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr); 347faca2338SShri Abhyankar if(newdatastruct){ 348faca2338SShri Abhyankar ierr = MatLUFactorSymbolic_SeqBAIJ_newdatastruct(B,A,isrow,iscol,info);CHKERRQ(ierr); 349faca2338SShri Abhyankar PetscFunctionReturn(0); 350faca2338SShri Abhyankar } 351faca2338SShri Abhyankar 352faca2338SShri Abhyankar if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 353faca2338SShri Abhyankar ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 354faca2338SShri Abhyankar ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 355faca2338SShri Abhyankar ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 356faca2338SShri Abhyankar 357faca2338SShri Abhyankar /* get new row pointers */ 358*a6df321fSShri Abhyankar ierr = PetscMalloc2(n+1,PetscInt,&bi,n+1,PetscInt,&bdiag);CHKERRQ(ierr); 359*a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 360c7272abeSHong Zhang 361c7272abeSHong Zhang /* linked list for storing column indices of the active row */ 362c7272abeSHong Zhang nlnk = n + 1; 363c7272abeSHong Zhang ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 364c7272abeSHong Zhang 365c7272abeSHong Zhang ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 366c7272abeSHong Zhang 367c7272abeSHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 368c7272abeSHong Zhang f = info->fill; 369c7272abeSHong Zhang ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 370c7272abeSHong Zhang current_space = free_space; 37183287d42SBarry Smith 37283287d42SBarry Smith for (i=0; i<n; i++) { 373c7272abeSHong Zhang /* copy previous fill into linked list */ 37483287d42SBarry Smith nzi = 0; 375c7272abeSHong Zhang nnz = ai[r[i]+1] - ai[r[i]]; 376c7272abeSHong Zhang if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 377c7272abeSHong Zhang ajtmp = aj + ai[r[i]]; 378c7272abeSHong Zhang ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 379c7272abeSHong Zhang nzi += nlnk; 380c7272abeSHong Zhang 381c7272abeSHong Zhang /* add pivot rows into linked list */ 382c7272abeSHong Zhang row = lnk[n]; 383c7272abeSHong Zhang while (row < i) { 384c7272abeSHong Zhang nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 385c7272abeSHong Zhang ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 386c7272abeSHong Zhang ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 387c7272abeSHong Zhang nzi += nlnk; 388c7272abeSHong Zhang row = lnk[row]; 38983287d42SBarry Smith } 390c7272abeSHong Zhang bi[i+1] = bi[i] + nzi; 391c7272abeSHong Zhang im[i] = nzi; 392c7272abeSHong Zhang 393c7272abeSHong Zhang /* mark bdiag */ 394c7272abeSHong Zhang nzbd = 0; 395c7272abeSHong Zhang nnz = nzi; 396c7272abeSHong Zhang k = lnk[n]; 397c7272abeSHong Zhang while (nnz-- && k < i){ 398c7272abeSHong Zhang nzbd++; 399c7272abeSHong Zhang k = lnk[k]; 400c7272abeSHong Zhang } 401c7272abeSHong Zhang bdiag[i] = bi[i] + nzbd; 402c7272abeSHong Zhang 403c7272abeSHong Zhang /* if free space is not available, make more free space */ 404c7272abeSHong Zhang if (current_space->local_remaining<nzi) { 405c7272abeSHong Zhang nnz = (n - i)*nzi; /* estimated and max additional space needed */ 406c7272abeSHong Zhang ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 407c7272abeSHong Zhang reallocs++; 40883287d42SBarry Smith } 40983287d42SBarry Smith 410c7272abeSHong Zhang /* copy data into free space, then initialize lnk */ 411c7272abeSHong Zhang ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 412c7272abeSHong Zhang bi_ptr[i] = current_space->array; 413c7272abeSHong Zhang current_space->array += nzi; 414c7272abeSHong Zhang current_space->local_used += nzi; 415c7272abeSHong Zhang current_space->local_remaining -= nzi; 416c7272abeSHong Zhang } 4176cf91177SBarry Smith #if defined(PETSC_USE_INFO) 41883287d42SBarry Smith if (ai[n] != 0) { 419c7272abeSHong Zhang PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 420ae15b995SBarry Smith ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 421ae15b995SBarry Smith ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 422ae15b995SBarry Smith ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 423ae15b995SBarry Smith ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 42483287d42SBarry Smith } else { 425c7272abeSHong Zhang ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 42683287d42SBarry Smith } 42763ba0a88SBarry Smith #endif 42883287d42SBarry Smith 42983287d42SBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 43083287d42SBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 43183287d42SBarry Smith 432c7272abeSHong Zhang /* destroy list of free space and other temporary array(s) */ 433c7272abeSHong Zhang ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 434c7272abeSHong Zhang ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 435c7272abeSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 436c7272abeSHong Zhang ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 43783287d42SBarry Smith 43883287d42SBarry Smith /* put together the new matrix */ 439719d5645SBarry Smith ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 440719d5645SBarry Smith ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 441719d5645SBarry Smith b = (Mat_SeqBAIJ*)(B)->data; 442e6b907acSBarry Smith b->free_a = PETSC_TRUE; 443e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 444c7272abeSHong Zhang b->singlemalloc = PETSC_FALSE; 445c7272abeSHong Zhang ierr = PetscMalloc((bi[n]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 446c7272abeSHong Zhang b->j = bj; 447c7272abeSHong Zhang b->i = bi; 448c7272abeSHong Zhang b->diag = bdiag; 4497f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 45083287d42SBarry Smith b->ilen = 0; 45183287d42SBarry Smith b->imax = 0; 45283287d42SBarry Smith b->row = isrow; 45383287d42SBarry Smith b->col = iscol; 454d3d32019SBarry Smith b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 45583287d42SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 45683287d42SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 45783287d42SBarry Smith b->icol = isicol; 45887828ca2SBarry Smith ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 459c7272abeSHong Zhang ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr); 46083287d42SBarry Smith 461c7272abeSHong Zhang b->maxnz = b->nz = bi[n] ; 462c7272abeSHong Zhang (B)->factor = MAT_FACTOR_LU; 463719d5645SBarry Smith (B)->info.factor_mallocs = reallocs; 464719d5645SBarry Smith (B)->info.fill_ratio_given = f; 465c7272abeSHong Zhang 46683287d42SBarry Smith if (ai[n] != 0) { 467c7272abeSHong Zhang (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 46883287d42SBarry Smith } else { 469719d5645SBarry Smith (B)->info.fill_ratio_needed = 0.0; 47083287d42SBarry Smith } 471c7272abeSHong Zhang 472db4efbfdSBarry Smith ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 473db4efbfdSBarry Smith ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 4747d18ce8fSMatthew Knepley both_identity = (PetscTruth) (row_identity && col_identity); 47541df41f0SMatthew Knepley ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); 47683287d42SBarry Smith PetscFunctionReturn(0); 47783287d42SBarry Smith } 478db4efbfdSBarry Smith 479