1be1d678aSKris Buschelman #define PETSCMAT_DLL 2289bc588SBarry Smith 3b377110cSBarry Smith 47c4f633dSBarry Smith #include "../src/mat/impls/aij/seq/aij.h" 5783ef271SHong Zhang #include "../src/mat/impls/sbaij/seq/sbaij.h" 61393bc97SHong Zhang #include "petscbt.h" 77c4f633dSBarry Smith #include "../src/mat/utils/freespace.h" 83b2fbd54SBarry Smith 9f6a26d55SMatthew Knepley EXTERN_C_BEGIN 104a2ae208SSatish Balay #undef __FUNCT__ 114a2ae208SSatish Balay #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 128fa24674SBarry Smith /* 138fa24674SBarry Smith Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 148fa24674SBarry Smith */ 15dfbe8321SBarry Smith PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 163b2fbd54SBarry Smith { 178fa24674SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 188fa24674SBarry Smith PetscErrorCode ierr; 19889a8dedSBarry Smith PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 208fa24674SBarry Smith const PetscInt *ai = a->i, *aj = a->j; 218fa24674SBarry Smith const PetscScalar *aa = a->a; 228fa24674SBarry Smith PetscTruth *done; 23889a8dedSBarry Smith PetscReal best,past = 0,future; 243a40ed3dSBarry Smith 258fa24674SBarry Smith PetscFunctionBegin; 268fa24674SBarry Smith /* pick initial row */ 278fa24674SBarry Smith best = -1; 288fa24674SBarry Smith for (i=0; i<n; i++) { 298fa24674SBarry Smith future = 0; 308fa24674SBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 318fa24674SBarry Smith if (aj[j] != i) future += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]); 328fa24674SBarry Smith } 338fa24674SBarry Smith if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 348fa24674SBarry Smith if (past/future > best) { 358fa24674SBarry Smith best = past/future; 368fa24674SBarry Smith current = i; 378fa24674SBarry Smith } 388fa24674SBarry Smith } 398fa24674SBarry Smith 408fa24674SBarry Smith ierr = PetscMalloc(n*sizeof(PetscTruth),&done);CHKERRQ(ierr); 418fa24674SBarry Smith ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr); 428fa24674SBarry Smith ierr = PetscMemzero(done,n*sizeof(PetscTruth));CHKERRQ(ierr); 438fa24674SBarry Smith order[0] = current; 448fa24674SBarry Smith for (i=0; i<n-1; i++) { 458fa24674SBarry Smith done[current] = PETSC_TRUE; 468fa24674SBarry Smith best = -1; 478fa24674SBarry Smith /* loop over all neighbors of current pivot */ 488fa24674SBarry Smith for (j=ai[current]; j<ai[current+1]; j++) { 498fa24674SBarry Smith jj = aj[j]; 508fa24674SBarry Smith if (done[jj]) continue; 518fa24674SBarry Smith /* loop over columns of potential next row computing weights for below and above diagonal */ 528fa24674SBarry Smith past = future = 0.0; 538fa24674SBarry Smith for (k=ai[jj]; k<ai[jj+1]; k++) { 548fa24674SBarry Smith kk = aj[k]; 558fa24674SBarry Smith if (done[kk]) past += PetscAbsScalar(aa[k]); 568fa24674SBarry Smith else if (kk != jj) future += PetscAbsScalar(aa[k]); 578fa24674SBarry Smith } 588fa24674SBarry Smith if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 598fa24674SBarry Smith if (past/future > best) { 608fa24674SBarry Smith best = past/future; 618fa24674SBarry Smith newcurrent = jj; 628fa24674SBarry Smith } 638fa24674SBarry Smith } 648fa24674SBarry Smith if (best == -1) { /* no neighbors to select from so select best of all that remain */ 658fa24674SBarry Smith best = -1; 668fa24674SBarry Smith for (k=0; k<n; k++) { 678fa24674SBarry Smith if (done[k]) continue; 688fa24674SBarry Smith future = 0; 698fa24674SBarry Smith past = 0; 708fa24674SBarry Smith for (j=ai[k]; j<ai[k+1]; j++) { 718fa24674SBarry Smith kk = aj[j]; 728fa24674SBarry Smith if (done[kk]) past += PetscAbsScalar(aa[j]); 738fa24674SBarry Smith else if (kk != k) future += PetscAbsScalar(aa[j]); 748fa24674SBarry Smith } 758fa24674SBarry Smith if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 768fa24674SBarry Smith if (past/future > best) { 778fa24674SBarry Smith best = past/future; 788fa24674SBarry Smith newcurrent = k; 798fa24674SBarry Smith } 808fa24674SBarry Smith } 818fa24674SBarry Smith } 828fa24674SBarry Smith if (current == newcurrent) SETERRQ(PETSC_ERR_PLIB,"newcurrent cannot be current"); 838fa24674SBarry Smith current = newcurrent; 848fa24674SBarry Smith order[i+1] = current; 858fa24674SBarry Smith } 868fa24674SBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,irow);CHKERRQ(ierr); 878fa24674SBarry Smith *icol = *irow; 888fa24674SBarry Smith ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 898fa24674SBarry Smith ierr = PetscFree(done);CHKERRQ(ierr); 908fa24674SBarry Smith ierr = PetscFree(order);CHKERRQ(ierr); 91d64ed03dSBarry Smith PetscFunctionReturn(0); 923b2fbd54SBarry Smith } 93f6a26d55SMatthew Knepley EXTERN_C_END 943b2fbd54SBarry Smith 95e631078cSBarry Smith EXTERN_C_BEGIN 964a2ae208SSatish Balay #undef __FUNCT__ 97db4efbfdSBarry Smith #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 98db4efbfdSBarry Smith PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 99db4efbfdSBarry Smith { 100db4efbfdSBarry Smith PetscFunctionBegin; 101db4efbfdSBarry Smith *flg = PETSC_TRUE; 102db4efbfdSBarry Smith PetscFunctionReturn(0); 103db4efbfdSBarry Smith } 104db4efbfdSBarry Smith EXTERN_C_END 105db4efbfdSBarry Smith 106db4efbfdSBarry Smith EXTERN_C_BEGIN 107db4efbfdSBarry Smith #undef __FUNCT__ 108b24902e0SBarry Smith #define __FUNCT__ "MatGetFactor_seqaij_petsc" 109b24902e0SBarry Smith PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 110b24902e0SBarry Smith { 111d0f46423SBarry Smith PetscInt n = A->rmap->n; 112b24902e0SBarry Smith PetscErrorCode ierr; 113b24902e0SBarry Smith 114b24902e0SBarry Smith PetscFunctionBegin; 115b24902e0SBarry Smith ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 116b24902e0SBarry Smith ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 117599ef60dSHong Zhang if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){ 118b24902e0SBarry Smith ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 119db4efbfdSBarry Smith (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 12035bd34faSBarry Smith (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 1211da05e5aSHong Zhang (*B)->ops->iludtfactor = MatILUDTFactor_SeqAIJ; 122b24902e0SBarry Smith } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 123b24902e0SBarry Smith ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 124b24902e0SBarry Smith ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1255c9eb25fSBarry Smith (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 1265c9eb25fSBarry Smith (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 127b24902e0SBarry Smith } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 128db4efbfdSBarry Smith (*B)->factor = ftype; 129b24902e0SBarry Smith PetscFunctionReturn(0); 130b24902e0SBarry Smith } 131e631078cSBarry Smith EXTERN_C_END 132b24902e0SBarry Smith 133b24902e0SBarry Smith #undef __FUNCT__ 134b9617806SBarry Smith #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 1350481f469SBarry Smith PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 136289bc588SBarry Smith { 137416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 138289bc588SBarry Smith IS isicol; 1396849ba73SBarry Smith PetscErrorCode ierr; 1405d0c19d7SBarry Smith const PetscInt *r,*ic; 1415d0c19d7SBarry Smith PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 1421393bc97SHong Zhang PetscInt *bi,*bj,*ajtmp; 1431393bc97SHong Zhang PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 1449e046878SBarry Smith PetscReal f; 1454875ba38SHong Zhang PetscInt nlnk,*lnk,k,**bi_ptr; 146a1a86e44SBarry Smith PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1471393bc97SHong Zhang PetscBT lnkbt; 148ce3d78c0SShri Abhyankar PetscTruth newdatastruct=PETSC_FALSE; 149289bc588SBarry Smith 1503a40ed3dSBarry Smith PetscFunctionBegin; 151ce3d78c0SShri Abhyankar ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr); 152ce3d78c0SShri Abhyankar if(newdatastruct){ 153ce3d78c0SShri Abhyankar ierr = MatLUFactorSymbolic_SeqAIJ_newdatastruct(B,A,isrow,iscol,info);CHKERRQ(ierr); 154ce3d78c0SShri Abhyankar PetscFunctionReturn(0); 155ce3d78c0SShri Abhyankar } 156ce3d78c0SShri Abhyankar 157d0f46423SBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 1584c49b128SBarry Smith ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 159ac121b3dSBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 160ac121b3dSBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 161289bc588SBarry Smith 162289bc588SBarry Smith /* get new row pointers */ 1631393bc97SHong Zhang ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1641393bc97SHong Zhang bi[0] = 0; 1651393bc97SHong Zhang 1661393bc97SHong Zhang /* bdiag is location of diagonal in factor */ 1671393bc97SHong Zhang ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1681393bc97SHong Zhang bdiag[0] = 0; 1691393bc97SHong Zhang 1701393bc97SHong Zhang /* linked list for storing column indices of the active row */ 1711393bc97SHong Zhang nlnk = n + 1; 1721393bc97SHong Zhang ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1731393bc97SHong Zhang 17435baf27eSSatish Balay ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 1751393bc97SHong Zhang 1761393bc97SHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 177d3d32019SBarry Smith f = info->fill; 178a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1791393bc97SHong Zhang current_space = free_space; 180289bc588SBarry Smith 181289bc588SBarry Smith for (i=0; i<n; i++) { 1821393bc97SHong Zhang /* copy previous fill into linked list */ 183289bc588SBarry Smith nzi = 0; 1841393bc97SHong Zhang nnz = ai[r[i]+1] - ai[r[i]]; 1853b4a8b6dSBarry Smith if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1861393bc97SHong Zhang ajtmp = aj + ai[r[i]]; 187afcc9904SHong Zhang ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1881393bc97SHong Zhang nzi += nlnk; 1891393bc97SHong Zhang 1901393bc97SHong Zhang /* add pivot rows into linked list */ 1911393bc97SHong Zhang row = lnk[n]; 1921393bc97SHong Zhang while (row < i) { 193d1170560SHong Zhang nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 194d1170560SHong Zhang ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 195d1170560SHong Zhang ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 1961393bc97SHong Zhang nzi += nlnk; 1971393bc97SHong Zhang row = lnk[row]; 198289bc588SBarry Smith } 1991393bc97SHong Zhang bi[i+1] = bi[i] + nzi; 2001393bc97SHong Zhang im[i] = nzi; 2011393bc97SHong Zhang 2021393bc97SHong Zhang /* mark bdiag */ 2031393bc97SHong Zhang nzbd = 0; 2041393bc97SHong Zhang nnz = nzi; 2051393bc97SHong Zhang k = lnk[n]; 2061393bc97SHong Zhang while (nnz-- && k < i){ 2071393bc97SHong Zhang nzbd++; 2081393bc97SHong Zhang k = lnk[k]; 2091393bc97SHong Zhang } 2101393bc97SHong Zhang bdiag[i] = bi[i] + nzbd; 2111393bc97SHong Zhang 2121393bc97SHong Zhang /* if free space is not available, make more free space */ 2131393bc97SHong Zhang if (current_space->local_remaining<nzi) { 2141393bc97SHong Zhang nnz = (n - i)*nzi; /* estimated and max additional space needed */ 215a1a86e44SBarry Smith ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2161393bc97SHong Zhang reallocs++; 2171393bc97SHong Zhang } 2181393bc97SHong Zhang 2191393bc97SHong Zhang /* copy data into free space, then initialize lnk */ 2201393bc97SHong Zhang ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2211393bc97SHong Zhang bi_ptr[i] = current_space->array; 2221393bc97SHong Zhang current_space->array += nzi; 2231393bc97SHong Zhang current_space->local_used += nzi; 2241393bc97SHong Zhang current_space->local_remaining -= nzi; 225289bc588SBarry Smith } 2266cf91177SBarry Smith #if defined(PETSC_USE_INFO) 227464e8d44SSatish Balay if (ai[n] != 0) { 2281393bc97SHong Zhang PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 229ae15b995SBarry Smith ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 230ae15b995SBarry Smith ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 231ae15b995SBarry Smith ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 232ae15b995SBarry Smith ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 23305bf559cSSatish Balay } else { 234ae15b995SBarry Smith ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 23505bf559cSSatish Balay } 23663ba0a88SBarry Smith #endif 2372fb3ed76SBarry Smith 238898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 239898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2401987afe7SBarry Smith 2411393bc97SHong Zhang /* destroy list of free space and other temporary array(s) */ 2421393bc97SHong Zhang ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 243a1a86e44SBarry Smith ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 2441393bc97SHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 24535baf27eSSatish Balay ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 246289bc588SBarry Smith 247289bc588SBarry Smith /* put together the new matrix */ 248719d5645SBarry Smith ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 249719d5645SBarry Smith ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 250719d5645SBarry Smith b = (Mat_SeqAIJ*)(B)->data; 251e6b907acSBarry Smith b->free_a = PETSC_TRUE; 252e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 2537c922b88SBarry Smith b->singlemalloc = PETSC_FALSE; 2541393bc97SHong Zhang ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 2551393bc97SHong Zhang b->j = bj; 2561393bc97SHong Zhang b->i = bi; 2571393bc97SHong Zhang b->diag = bdiag; 258416022c9SBarry Smith b->ilen = 0; 259416022c9SBarry Smith b->imax = 0; 260416022c9SBarry Smith b->row = isrow; 261416022c9SBarry Smith b->col = iscol; 262c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 263c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 26482bf6240SBarry Smith b->icol = isicol; 26587828ca2SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2661393bc97SHong Zhang 2671393bc97SHong Zhang /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 268719d5645SBarry Smith ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2691393bc97SHong Zhang b->maxnz = b->nz = bi[n] ; 2707fd98bd6SLois Curfman McInnes 271719d5645SBarry Smith (B)->factor = MAT_FACTOR_LU; 272719d5645SBarry Smith (B)->info.factor_mallocs = reallocs; 273719d5645SBarry Smith (B)->info.fill_ratio_given = f; 274703deb49SSatish Balay 2759f7d0b68SHong Zhang if (ai[n]) { 276719d5645SBarry Smith (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 277eea2913aSSatish Balay } else { 278719d5645SBarry Smith (B)->info.fill_ratio_needed = 0.0; 279eea2913aSSatish Balay } 280719d5645SBarry Smith (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 281719d5645SBarry Smith (B)->ops->solve = MatSolve_SeqAIJ; 282719d5645SBarry Smith (B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 283db4efbfdSBarry Smith /* switch to inodes if appropriate */ 284719d5645SBarry Smith ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr); 2853a40ed3dSBarry Smith PetscFunctionReturn(0); 286289bc588SBarry Smith } 2871393bc97SHong Zhang 288ce3d78c0SShri Abhyankar #undef __FUNCT__ 289ce3d78c0SShri Abhyankar #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_newdatastruct" 290ce3d78c0SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqAIJ_newdatastruct(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 291ce3d78c0SShri Abhyankar { 292ce3d78c0SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 293ce3d78c0SShri Abhyankar IS isicol; 294ce3d78c0SShri Abhyankar PetscErrorCode ierr; 295ce3d78c0SShri Abhyankar const PetscInt *r,*ic; 296ce3d78c0SShri Abhyankar PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 297ce3d78c0SShri Abhyankar PetscInt *bi,*bj,*ajtmp; 298ce3d78c0SShri Abhyankar PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 299ce3d78c0SShri Abhyankar PetscReal f; 300ce3d78c0SShri Abhyankar PetscInt nlnk,*lnk,k,**bi_ptr; 301ce3d78c0SShri Abhyankar PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 302ce3d78c0SShri Abhyankar PetscBT lnkbt; 303ce3d78c0SShri Abhyankar 304ce3d78c0SShri Abhyankar PetscFunctionBegin; 305ce3d78c0SShri Abhyankar if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 306ce3d78c0SShri Abhyankar ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 307ce3d78c0SShri Abhyankar ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 308ce3d78c0SShri Abhyankar ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 309ce3d78c0SShri Abhyankar 310ce3d78c0SShri Abhyankar /* get new row pointers */ 311ce3d78c0SShri Abhyankar ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 312ce3d78c0SShri Abhyankar bi[0] = 0; 313ce3d78c0SShri Abhyankar 314ce3d78c0SShri Abhyankar /* bdiag is location of diagonal in factor */ 315ce3d78c0SShri Abhyankar ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 316ce3d78c0SShri Abhyankar bdiag[0] = 0; 317ce3d78c0SShri Abhyankar 318ce3d78c0SShri Abhyankar /* linked list for storing column indices of the active row */ 319ce3d78c0SShri Abhyankar nlnk = n + 1; 320ce3d78c0SShri Abhyankar ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 321ce3d78c0SShri Abhyankar 322ce3d78c0SShri Abhyankar ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 323ce3d78c0SShri Abhyankar 324ce3d78c0SShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 325ce3d78c0SShri Abhyankar f = info->fill; 326ce3d78c0SShri Abhyankar ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 327ce3d78c0SShri Abhyankar current_space = free_space; 328ce3d78c0SShri Abhyankar 329ce3d78c0SShri Abhyankar for (i=0; i<n; i++) { 330ce3d78c0SShri Abhyankar /* copy previous fill into linked list */ 331ce3d78c0SShri Abhyankar nzi = 0; 332ce3d78c0SShri Abhyankar nnz = ai[r[i]+1] - ai[r[i]]; 333ce3d78c0SShri Abhyankar if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 334ce3d78c0SShri Abhyankar ajtmp = aj + ai[r[i]]; 335ce3d78c0SShri Abhyankar ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 336ce3d78c0SShri Abhyankar nzi += nlnk; 337ce3d78c0SShri Abhyankar 338ce3d78c0SShri Abhyankar /* add pivot rows into linked list */ 339ce3d78c0SShri Abhyankar row = lnk[n]; 340ce3d78c0SShri Abhyankar while (row < i){ 34170319c84SHong Zhang nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 342ce3d78c0SShri Abhyankar ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 343ce3d78c0SShri Abhyankar ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 344ce3d78c0SShri Abhyankar nzi += nlnk; 345ce3d78c0SShri Abhyankar row = lnk[row]; 346ce3d78c0SShri Abhyankar } 347ce3d78c0SShri Abhyankar bi[i+1] = bi[i] + nzi; 348ce3d78c0SShri Abhyankar im[i] = nzi; 349ce3d78c0SShri Abhyankar 350ce3d78c0SShri Abhyankar /* mark bdiag */ 351ce3d78c0SShri Abhyankar nzbd = 0; 352ce3d78c0SShri Abhyankar nnz = nzi; 353ce3d78c0SShri Abhyankar k = lnk[n]; 354ce3d78c0SShri Abhyankar while (nnz-- && k < i){ 355ce3d78c0SShri Abhyankar nzbd++; 356ce3d78c0SShri Abhyankar k = lnk[k]; 357ce3d78c0SShri Abhyankar } 358783ef271SHong Zhang bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 359ce3d78c0SShri Abhyankar 360ce3d78c0SShri Abhyankar /* if free space is not available, make more free space */ 361ce3d78c0SShri Abhyankar if (current_space->local_remaining<nzi) { 362ce3d78c0SShri Abhyankar nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 363ce3d78c0SShri Abhyankar ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 364ce3d78c0SShri Abhyankar reallocs++; 365ce3d78c0SShri Abhyankar } 366ce3d78c0SShri Abhyankar 367ce3d78c0SShri Abhyankar /* copy data into free space, then initialize lnk */ 368ce3d78c0SShri Abhyankar ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 369ce3d78c0SShri Abhyankar bi_ptr[i] = current_space->array; 370ce3d78c0SShri Abhyankar current_space->array += nzi; 371ce3d78c0SShri Abhyankar current_space->local_used += nzi; 372ce3d78c0SShri Abhyankar current_space->local_remaining -= nzi; 373ce3d78c0SShri Abhyankar } 374ce3d78c0SShri Abhyankar #if defined(PETSC_USE_INFO) 375ce3d78c0SShri Abhyankar if (ai[n] != 0) { 376ce3d78c0SShri Abhyankar PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 377ce3d78c0SShri Abhyankar ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 378ce3d78c0SShri Abhyankar ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 379ce3d78c0SShri Abhyankar ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 380ce3d78c0SShri Abhyankar ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 381ce3d78c0SShri Abhyankar } else { 382ce3d78c0SShri Abhyankar ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 383ce3d78c0SShri Abhyankar } 384ce3d78c0SShri Abhyankar #endif 385ce3d78c0SShri Abhyankar 386ce3d78c0SShri Abhyankar ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 387ce3d78c0SShri Abhyankar ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 388ce3d78c0SShri Abhyankar 389ce3d78c0SShri Abhyankar /* destroy list of free space and other temporary array(s) */ 390ce3d78c0SShri Abhyankar ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 391783ef271SHong Zhang ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 392ce3d78c0SShri Abhyankar ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 393ce3d78c0SShri Abhyankar ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 394ce3d78c0SShri Abhyankar 395ce3d78c0SShri Abhyankar /* put together the new matrix */ 396ce3d78c0SShri Abhyankar ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 397ce3d78c0SShri Abhyankar ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 398ce3d78c0SShri Abhyankar b = (Mat_SeqAIJ*)(B)->data; 399ce3d78c0SShri Abhyankar b->free_a = PETSC_TRUE; 400ce3d78c0SShri Abhyankar b->free_ij = PETSC_TRUE; 401ce3d78c0SShri Abhyankar b->singlemalloc = PETSC_FALSE; 402ce3d78c0SShri Abhyankar ierr = PetscMalloc((bi[2*n+1])*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 403ce3d78c0SShri Abhyankar b->j = bj; 404ce3d78c0SShri Abhyankar b->i = bi; 405ce3d78c0SShri Abhyankar b->diag = bdiag; 406ce3d78c0SShri Abhyankar b->ilen = 0; 407ce3d78c0SShri Abhyankar b->imax = 0; 408ce3d78c0SShri Abhyankar b->row = isrow; 409ce3d78c0SShri Abhyankar b->col = iscol; 410ce3d78c0SShri Abhyankar ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 411ce3d78c0SShri Abhyankar ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 412ce3d78c0SShri Abhyankar b->icol = isicol; 413ce3d78c0SShri Abhyankar ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 414ce3d78c0SShri Abhyankar 415ce3d78c0SShri Abhyankar /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 416ce3d78c0SShri Abhyankar ierr = PetscLogObjectMemory(B,bi[2*n+1]*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 417ce3d78c0SShri Abhyankar b->maxnz = b->nz = bi[2*n+1] ; 418ce3d78c0SShri Abhyankar 419ce3d78c0SShri Abhyankar (B)->factor = MAT_FACTOR_LU; 420ce3d78c0SShri Abhyankar (B)->info.factor_mallocs = reallocs; 421ce3d78c0SShri Abhyankar (B)->info.fill_ratio_given = f; 422ce3d78c0SShri Abhyankar 423ce3d78c0SShri Abhyankar if (ai[n]) { 424894a2336SHong Zhang (B)->info.fill_ratio_needed = ((PetscReal)bi[2*n+1])/((PetscReal)ai[n]); 425ce3d78c0SShri Abhyankar } else { 426ce3d78c0SShri Abhyankar (B)->info.fill_ratio_needed = 0.0; 427ce3d78c0SShri Abhyankar } 428ce3d78c0SShri Abhyankar (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_newdatastruct; 429ce3d78c0SShri Abhyankar PetscFunctionReturn(0); 430ce3d78c0SShri Abhyankar } 431ce3d78c0SShri Abhyankar 4325b5bc046SBarry Smith /* 4335b5bc046SBarry Smith Trouble in factorization, should we dump the original matrix? 4345b5bc046SBarry Smith */ 4355b5bc046SBarry Smith #undef __FUNCT__ 4365b5bc046SBarry Smith #define __FUNCT__ "MatFactorDumpMatrix" 4375b5bc046SBarry Smith PetscErrorCode MatFactorDumpMatrix(Mat A) 4385b5bc046SBarry Smith { 4395b5bc046SBarry Smith PetscErrorCode ierr; 44090d69ab7SBarry Smith PetscTruth flg = PETSC_FALSE; 4415b5bc046SBarry Smith 4425b5bc046SBarry Smith PetscFunctionBegin; 44390d69ab7SBarry Smith ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 4445b5bc046SBarry Smith if (flg) { 4455b5bc046SBarry Smith PetscViewer viewer; 4465b5bc046SBarry Smith char filename[PETSC_MAX_PATH_LEN]; 4475b5bc046SBarry Smith 4485b5bc046SBarry Smith ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 4497adad957SLisandro Dalcin ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 4505b5bc046SBarry Smith ierr = MatView(A,viewer);CHKERRQ(ierr); 4515b5bc046SBarry Smith ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 4525b5bc046SBarry Smith } 4535b5bc046SBarry Smith PetscFunctionReturn(0); 4545b5bc046SBarry Smith } 4555b5bc046SBarry Smith 456db4efbfdSBarry Smith extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec); 457db4efbfdSBarry Smith 4580a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */ 459894a2336SHong Zhang extern PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 460894a2336SHong Zhang extern PetscErrorCode MatSolve_SeqAIJ_newdatastruct(Mat,Vec,Vec); 461c9c72f8fSHong Zhang 462c9c72f8fSHong Zhang #undef __FUNCT__ 463c9c72f8fSHong Zhang #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_newdatastruct" 464c9c72f8fSHong Zhang PetscErrorCode MatLUFactorNumeric_SeqAIJ_newdatastruct(Mat B,Mat A,const MatFactorInfo *info) 465c9c72f8fSHong Zhang { 466c9c72f8fSHong Zhang Mat C=B; 467c9c72f8fSHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 468c9c72f8fSHong Zhang IS isrow = b->row,isicol = b->icol; 469c9c72f8fSHong Zhang PetscErrorCode ierr; 470c9c72f8fSHong Zhang const PetscInt *r,*ic,*ics; 471c9c72f8fSHong Zhang PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 472c9c72f8fSHong Zhang PetscInt *ajtmp,*bjtmp,nz,nzL,row,*bdiag=b->diag,*pj; 473c9c72f8fSHong Zhang MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 474c9c72f8fSHong Zhang PetscTruth row_identity,col_identity; 475c9c72f8fSHong Zhang 476d4ad06f7SHong Zhang LUShift_Ctx sctx; 477d4ad06f7SHong Zhang PetscInt *ddiag,newshift; 478d4ad06f7SHong Zhang PetscReal rs; 479d4ad06f7SHong Zhang MatScalar d; 480d4ad06f7SHong Zhang 481c9c72f8fSHong Zhang PetscFunctionBegin; 482d4ad06f7SHong Zhang /* ZeropivotSetUp(): initialize shift context sctx */ 483d4ad06f7SHong Zhang sctx.nshift = 0; 484d4ad06f7SHong Zhang sctx.nshift_max = 0; 485d4ad06f7SHong Zhang sctx.shift_top = 0.0; 486d4ad06f7SHong Zhang sctx.shift_lo = 0.0; 487d4ad06f7SHong Zhang sctx.shift_hi = 0.0; 488d4ad06f7SHong Zhang sctx.shift_fraction = 0.0; 489d4ad06f7SHong Zhang sctx.shift_amount = 0.0; 490d4ad06f7SHong Zhang 491d4ad06f7SHong Zhang /* if both shift schemes are chosen by user, only use info->shiftpd */ 492d4ad06f7SHong Zhang if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 493d4ad06f7SHong Zhang ddiag = a->diag; 494d4ad06f7SHong Zhang sctx.shift_top = info->zeropivot; 495d4ad06f7SHong Zhang for (i=0; i<n; i++) { 496d4ad06f7SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 497d4ad06f7SHong Zhang d = (aa)[ddiag[i]]; 498d4ad06f7SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 499d4ad06f7SHong Zhang v = aa+ai[i]; 500d4ad06f7SHong Zhang nz = ai[i+1] - ai[i]; 501d4ad06f7SHong Zhang for (j=0; j<nz; j++) 502d4ad06f7SHong Zhang rs += PetscAbsScalar(v[j]); 503d4ad06f7SHong Zhang if (rs>sctx.shift_top) sctx.shift_top = rs; 504d4ad06f7SHong Zhang } 505d4ad06f7SHong Zhang sctx.shift_top *= 1.1; 506d4ad06f7SHong Zhang sctx.nshift_max = 5; 507d4ad06f7SHong Zhang sctx.shift_lo = 0.; 508d4ad06f7SHong Zhang sctx.shift_hi = 1.; 509d4ad06f7SHong Zhang } 510d4ad06f7SHong Zhang 511c9c72f8fSHong Zhang ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 512c9c72f8fSHong Zhang ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 513c9c72f8fSHong Zhang ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 514c9c72f8fSHong Zhang ics = ic; 515c9c72f8fSHong Zhang 516d4ad06f7SHong Zhang do { 517d4ad06f7SHong Zhang sctx.lushift = PETSC_FALSE; 518c9c72f8fSHong Zhang for (i=0; i<n; i++){ 519c9c72f8fSHong Zhang /* zero rtmp */ 520c9c72f8fSHong Zhang /* L part */ 521c9c72f8fSHong Zhang nz = bi[i+1] - bi[i]; 522c9c72f8fSHong Zhang bjtmp = bj + bi[i]; 523c9c72f8fSHong Zhang for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 524c9c72f8fSHong Zhang 525c9c72f8fSHong Zhang /* U part */ 5268fc3a347SHong Zhang nz = bi[2*n-i+1] - bi[2*n-i]; 5278fc3a347SHong Zhang bjtmp = bj + bi[2*n-i]; 528c9c72f8fSHong Zhang for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 529c9c72f8fSHong Zhang 530c9c72f8fSHong Zhang /* load in initial (unfactored row) */ 531c9c72f8fSHong Zhang nz = ai[r[i]+1] - ai[r[i]]; 532c9c72f8fSHong Zhang ajtmp = aj + ai[r[i]]; 533c9c72f8fSHong Zhang v = aa + ai[r[i]]; 534c9c72f8fSHong Zhang for (j=0; j<nz; j++) { 535c9c72f8fSHong Zhang rtmp[ics[ajtmp[j]]] = v[j]; 536c9c72f8fSHong Zhang } 537d4ad06f7SHong Zhang /* ZeropivotApply() */ 538d4ad06f7SHong Zhang rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 539c9c72f8fSHong Zhang 540c9c72f8fSHong Zhang /* elimination */ 541c9c72f8fSHong Zhang bjtmp = bj + bi[i]; 542c9c72f8fSHong Zhang row = *bjtmp++; 543c9c72f8fSHong Zhang nzL = bi[i+1] - bi[i]; 544c9c72f8fSHong Zhang k = 0; 545c9c72f8fSHong Zhang while (k < nzL) { 546c9c72f8fSHong Zhang pc = rtmp + row; 547c9c72f8fSHong Zhang if (*pc != 0.0) { 548c9c72f8fSHong Zhang pv = b->a + bdiag[row]; 549c9c72f8fSHong Zhang multiplier = *pc * (*pv); 550c9c72f8fSHong Zhang *pc = multiplier; 5518fc3a347SHong Zhang pj = b->j + bi[2*n-row]; /* begining of U(row,:) */ 5528fc3a347SHong Zhang pv = b->a + bi[2*n-row]; 5538fc3a347SHong Zhang nz = bi[2*n-row+1] - bi[2*n-row] - 1; /* num of entries in U(row,:), excluding diag */ 554c9c72f8fSHong Zhang for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 555c9c72f8fSHong Zhang ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 556c9c72f8fSHong Zhang } 557c9c72f8fSHong Zhang row = *bjtmp++; k++; 558c9c72f8fSHong Zhang } 559c9c72f8fSHong Zhang 560c9c72f8fSHong Zhang /* finished row so stick it into b->a */ 561d4ad06f7SHong Zhang rs = 0.0; 562c9c72f8fSHong Zhang /* L part */ 563c9c72f8fSHong Zhang pv = b->a + bi[i] ; 564c9c72f8fSHong Zhang pj = b->j + bi[i] ; 565c9c72f8fSHong Zhang nz = bi[i+1] - bi[i]; 566c9c72f8fSHong Zhang for (j=0; j<nz; j++) { 567d4ad06f7SHong Zhang pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 568c9c72f8fSHong Zhang } 569c9c72f8fSHong Zhang 570c9c72f8fSHong Zhang /* U part */ 5718fc3a347SHong Zhang pv = b->a + bi[2*n-i]; 5728fc3a347SHong Zhang pj = b->j + bi[2*n-i]; 5738fc3a347SHong Zhang nz = bi[2*n-i+1] - bi[2*n-i] - 1; 574d4ad06f7SHong Zhang for (j=0; j<nz; j++) { 575d4ad06f7SHong Zhang pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 576c9c72f8fSHong Zhang } 577d4ad06f7SHong Zhang 578d4ad06f7SHong Zhang /* ZeropivotCheck() */ 579d4ad06f7SHong Zhang sctx.rs = rs; 580d4ad06f7SHong Zhang sctx.pv = rtmp[i]; 581d4ad06f7SHong Zhang ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 582d4ad06f7SHong Zhang if (newshift == 1) break; 583d4ad06f7SHong Zhang 584d4ad06f7SHong Zhang /* Mark diagonal and invert diagonal for simplier triangular solves */ 585d4ad06f7SHong Zhang pv = b->a + bdiag[i]; 586d4ad06f7SHong Zhang *pv = 1.0/rtmp[i]; 587d4ad06f7SHong Zhang 588d4ad06f7SHong Zhang } /* endof for (i=0; i<n; i++){ */ 589d4ad06f7SHong Zhang 590d4ad06f7SHong Zhang /* ZeropivotRefine() */ 591d4ad06f7SHong Zhang if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){ 592d4ad06f7SHong Zhang /* 593d4ad06f7SHong Zhang * if no shift in this attempt & shifting & started shifting & can refine, 594d4ad06f7SHong Zhang * then try lower shift 595d4ad06f7SHong Zhang */ 596d4ad06f7SHong Zhang sctx.shift_hi = sctx.shift_fraction; 597d4ad06f7SHong Zhang sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 598d4ad06f7SHong Zhang sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 599d4ad06f7SHong Zhang sctx.lushift = PETSC_TRUE; 600d4ad06f7SHong Zhang sctx.nshift++; 601d4ad06f7SHong Zhang } 602d4ad06f7SHong Zhang } while (sctx.lushift); 603d4ad06f7SHong Zhang 604c9c72f8fSHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 605c9c72f8fSHong Zhang ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 606c9c72f8fSHong Zhang ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 607c9c72f8fSHong Zhang 608c9c72f8fSHong Zhang ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 609c9c72f8fSHong Zhang ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 610c9c72f8fSHong Zhang if (row_identity && col_identity) { 611894a2336SHong Zhang C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_newdatastruct; 612c9c72f8fSHong Zhang } else { 613894a2336SHong Zhang C->ops->solve = MatSolve_SeqAIJ_newdatastruct; 614c9c72f8fSHong Zhang } 615c9c72f8fSHong Zhang 616c9c72f8fSHong Zhang C->ops->solveadd = 0; 617c9c72f8fSHong Zhang C->ops->solvetranspose = 0; 618c9c72f8fSHong Zhang C->ops->solvetransposeadd = 0; 619c9c72f8fSHong Zhang C->ops->matsolve = 0; 620c9c72f8fSHong Zhang C->assembled = PETSC_TRUE; 621c9c72f8fSHong Zhang C->preallocated = PETSC_TRUE; 622c9c72f8fSHong Zhang ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 623d4ad06f7SHong Zhang 624d4ad06f7SHong Zhang /* ZeropivotView() */ 625d4ad06f7SHong Zhang if (sctx.nshift){ 626d4ad06f7SHong Zhang if (info->shiftpd) { 627d4ad06f7SHong Zhang ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 628d4ad06f7SHong Zhang } else if (info->shiftnz) { 629d4ad06f7SHong Zhang ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 630d4ad06f7SHong Zhang } 631d4ad06f7SHong Zhang } 632c9c72f8fSHong Zhang PetscFunctionReturn(0); 633c9c72f8fSHong Zhang } 634c9c72f8fSHong Zhang 635*813a1f2bSShri Abhyankar /* ----------------------------------------------------------- */ 636*813a1f2bSShri Abhyankar extern PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_newdatastruct_v2(Mat,Vec,Vec); 637*813a1f2bSShri Abhyankar /* extern PetscErrorCode MatSolve_SeqAIJ_newdatastruct(Mat,Vec,Vec); */ 638*813a1f2bSShri Abhyankar 639*813a1f2bSShri Abhyankar #undef __FUNCT__ 640*813a1f2bSShri Abhyankar #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_newdatastruct_v2" 641*813a1f2bSShri Abhyankar PetscErrorCode MatLUFactorNumeric_SeqAIJ_newdatastruct_v2(Mat B,Mat A,const MatFactorInfo *info) 642*813a1f2bSShri Abhyankar { 643*813a1f2bSShri Abhyankar Mat C=B; 644*813a1f2bSShri Abhyankar Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 645*813a1f2bSShri Abhyankar IS isrow = b->row,isicol = b->icol; 646*813a1f2bSShri Abhyankar PetscErrorCode ierr; 647*813a1f2bSShri Abhyankar const PetscInt *r,*ic,*ics; 648*813a1f2bSShri Abhyankar PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 649*813a1f2bSShri Abhyankar PetscInt *ajtmp,*bjtmp,nz,nzL,row,*bdiag=b->diag,*pj; 650*813a1f2bSShri Abhyankar MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 651*813a1f2bSShri Abhyankar PetscTruth row_identity,col_identity; 652*813a1f2bSShri Abhyankar 653*813a1f2bSShri Abhyankar LUShift_Ctx sctx; 654*813a1f2bSShri Abhyankar PetscInt *ddiag,newshift; 655*813a1f2bSShri Abhyankar PetscReal rs; 656*813a1f2bSShri Abhyankar MatScalar d; 657*813a1f2bSShri Abhyankar 658*813a1f2bSShri Abhyankar PetscFunctionBegin; 659*813a1f2bSShri Abhyankar /* ZeropivotSetUp(): initialize shift context sctx */ 660*813a1f2bSShri Abhyankar sctx.nshift = 0; 661*813a1f2bSShri Abhyankar sctx.nshift_max = 0; 662*813a1f2bSShri Abhyankar sctx.shift_top = 0.0; 663*813a1f2bSShri Abhyankar sctx.shift_lo = 0.0; 664*813a1f2bSShri Abhyankar sctx.shift_hi = 0.0; 665*813a1f2bSShri Abhyankar sctx.shift_fraction = 0.0; 666*813a1f2bSShri Abhyankar sctx.shift_amount = 0.0; 667*813a1f2bSShri Abhyankar 668*813a1f2bSShri Abhyankar /* if both shift schemes are chosen by user, only use info->shiftpd */ 669*813a1f2bSShri Abhyankar if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 670*813a1f2bSShri Abhyankar ddiag = a->diag; 671*813a1f2bSShri Abhyankar sctx.shift_top = info->zeropivot; 672*813a1f2bSShri Abhyankar for (i=0; i<n; i++) { 673*813a1f2bSShri Abhyankar /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 674*813a1f2bSShri Abhyankar d = (aa)[ddiag[i]]; 675*813a1f2bSShri Abhyankar rs = -PetscAbsScalar(d) - PetscRealPart(d); 676*813a1f2bSShri Abhyankar v = aa+ai[i]; 677*813a1f2bSShri Abhyankar nz = ai[i+1] - ai[i]; 678*813a1f2bSShri Abhyankar for (j=0; j<nz; j++) 679*813a1f2bSShri Abhyankar rs += PetscAbsScalar(v[j]); 680*813a1f2bSShri Abhyankar if (rs>sctx.shift_top) sctx.shift_top = rs; 681*813a1f2bSShri Abhyankar } 682*813a1f2bSShri Abhyankar sctx.shift_top *= 1.1; 683*813a1f2bSShri Abhyankar sctx.nshift_max = 5; 684*813a1f2bSShri Abhyankar sctx.shift_lo = 0.; 685*813a1f2bSShri Abhyankar sctx.shift_hi = 1.; 686*813a1f2bSShri Abhyankar } 687*813a1f2bSShri Abhyankar 688*813a1f2bSShri Abhyankar ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 689*813a1f2bSShri Abhyankar ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 690*813a1f2bSShri Abhyankar ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 691*813a1f2bSShri Abhyankar ics = ic; 692*813a1f2bSShri Abhyankar 693*813a1f2bSShri Abhyankar do { 694*813a1f2bSShri Abhyankar sctx.lushift = PETSC_FALSE; 695*813a1f2bSShri Abhyankar for (i=0; i<n; i++){ 696*813a1f2bSShri Abhyankar /* zero rtmp */ 697*813a1f2bSShri Abhyankar /* L part */ 698*813a1f2bSShri Abhyankar nz = bi[i+1] - bi[i]; 699*813a1f2bSShri Abhyankar bjtmp = bj + bi[i]; 700*813a1f2bSShri Abhyankar for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 701*813a1f2bSShri Abhyankar 702*813a1f2bSShri Abhyankar /* U part */ 703*813a1f2bSShri Abhyankar nz = bi[2*n-i+1] - bi[2*n-i]; 704*813a1f2bSShri Abhyankar bjtmp = bj + bi[2*n-i]; 705*813a1f2bSShri Abhyankar for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 706*813a1f2bSShri Abhyankar 707*813a1f2bSShri Abhyankar /* load in initial (unfactored row) */ 708*813a1f2bSShri Abhyankar nz = ai[r[i]+1] - ai[r[i]]; 709*813a1f2bSShri Abhyankar ajtmp = aj + ai[r[i]]; 710*813a1f2bSShri Abhyankar v = aa + ai[r[i]]; 711*813a1f2bSShri Abhyankar for (j=0; j<nz; j++) { 712*813a1f2bSShri Abhyankar rtmp[ics[ajtmp[j]]] = v[j]; 713*813a1f2bSShri Abhyankar } 714*813a1f2bSShri Abhyankar /* ZeropivotApply() */ 715*813a1f2bSShri Abhyankar rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 716*813a1f2bSShri Abhyankar 717*813a1f2bSShri Abhyankar /* elimination */ 718*813a1f2bSShri Abhyankar bjtmp = bj + bi[i]; 719*813a1f2bSShri Abhyankar row = *bjtmp++; 720*813a1f2bSShri Abhyankar nzL = bi[i+1] - bi[i]; 721*813a1f2bSShri Abhyankar k = 0; 722*813a1f2bSShri Abhyankar while (k < nzL) { 723*813a1f2bSShri Abhyankar pc = rtmp + row; 724*813a1f2bSShri Abhyankar if (*pc != 0.0) { 725*813a1f2bSShri Abhyankar pv = b->a + bdiag[row]; 726*813a1f2bSShri Abhyankar multiplier = *pc * (*pv); 727*813a1f2bSShri Abhyankar *pc = multiplier; 728*813a1f2bSShri Abhyankar pj = b->j + bi[2*n-row]; /* begining of U(row,:) */ 729*813a1f2bSShri Abhyankar pv = b->a + bi[2*n-row]; 730*813a1f2bSShri Abhyankar nz = bi[2*n-row+1] - bi[2*n-row] - 1; /* num of entries in U(row,:), excluding diag */ 731*813a1f2bSShri Abhyankar for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 732*813a1f2bSShri Abhyankar ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 733*813a1f2bSShri Abhyankar } 734*813a1f2bSShri Abhyankar row = *bjtmp++; k++; 735*813a1f2bSShri Abhyankar } 736*813a1f2bSShri Abhyankar 737*813a1f2bSShri Abhyankar /* finished row so stick it into b->a */ 738*813a1f2bSShri Abhyankar rs = 0.0; 739*813a1f2bSShri Abhyankar /* L part */ 740*813a1f2bSShri Abhyankar pv = b->a + bi[i] ; 741*813a1f2bSShri Abhyankar pj = b->j + bi[i] ; 742*813a1f2bSShri Abhyankar nz = bi[i+1] - bi[i]; 743*813a1f2bSShri Abhyankar for (j=0; j<nz; j++) { 744*813a1f2bSShri Abhyankar pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 745*813a1f2bSShri Abhyankar } 746*813a1f2bSShri Abhyankar 747*813a1f2bSShri Abhyankar /* U part */ 748*813a1f2bSShri Abhyankar pv = b->a + bi[2*n-i]; 749*813a1f2bSShri Abhyankar pj = b->j + bi[2*n-i]; 750*813a1f2bSShri Abhyankar nz = bi[2*n-i+1] - bi[2*n-i] - 1; 751*813a1f2bSShri Abhyankar for (j=0; j<nz; j++) { 752*813a1f2bSShri Abhyankar pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 753*813a1f2bSShri Abhyankar } 754*813a1f2bSShri Abhyankar 755*813a1f2bSShri Abhyankar /* ZeropivotCheck() */ 756*813a1f2bSShri Abhyankar sctx.rs = rs; 757*813a1f2bSShri Abhyankar sctx.pv = rtmp[i]; 758*813a1f2bSShri Abhyankar ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 759*813a1f2bSShri Abhyankar if (newshift == 1) break; 760*813a1f2bSShri Abhyankar 761*813a1f2bSShri Abhyankar /* Mark diagonal and invert diagonal for simplier triangular solves */ 762*813a1f2bSShri Abhyankar pv = b->a + bdiag[i]; 763*813a1f2bSShri Abhyankar *pv = 1.0/rtmp[i]; 764*813a1f2bSShri Abhyankar 765*813a1f2bSShri Abhyankar } /* endof for (i=0; i<n; i++){ */ 766*813a1f2bSShri Abhyankar 767*813a1f2bSShri Abhyankar /* ZeropivotRefine() */ 768*813a1f2bSShri Abhyankar if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){ 769*813a1f2bSShri Abhyankar /* 770*813a1f2bSShri Abhyankar * if no shift in this attempt & shifting & started shifting & can refine, 771*813a1f2bSShri Abhyankar * then try lower shift 772*813a1f2bSShri Abhyankar */ 773*813a1f2bSShri Abhyankar sctx.shift_hi = sctx.shift_fraction; 774*813a1f2bSShri Abhyankar sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 775*813a1f2bSShri Abhyankar sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 776*813a1f2bSShri Abhyankar sctx.lushift = PETSC_TRUE; 777*813a1f2bSShri Abhyankar sctx.nshift++; 778*813a1f2bSShri Abhyankar } 779*813a1f2bSShri Abhyankar } while (sctx.lushift); 780*813a1f2bSShri Abhyankar 781*813a1f2bSShri Abhyankar ierr = PetscFree(rtmp);CHKERRQ(ierr); 782*813a1f2bSShri Abhyankar ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 783*813a1f2bSShri Abhyankar ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 784*813a1f2bSShri Abhyankar 785*813a1f2bSShri Abhyankar ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 786*813a1f2bSShri Abhyankar ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 787*813a1f2bSShri Abhyankar if (row_identity && col_identity) { 788*813a1f2bSShri Abhyankar C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_newdatastruct; 789*813a1f2bSShri Abhyankar } else { 790*813a1f2bSShri Abhyankar C->ops->solve = MatSolve_SeqAIJ_newdatastruct; 791*813a1f2bSShri Abhyankar } 792*813a1f2bSShri Abhyankar 793*813a1f2bSShri Abhyankar C->ops->solveadd = 0; 794*813a1f2bSShri Abhyankar C->ops->solvetranspose = 0; 795*813a1f2bSShri Abhyankar C->ops->solvetransposeadd = 0; 796*813a1f2bSShri Abhyankar C->ops->matsolve = 0; 797*813a1f2bSShri Abhyankar C->assembled = PETSC_TRUE; 798*813a1f2bSShri Abhyankar C->preallocated = PETSC_TRUE; 799*813a1f2bSShri Abhyankar ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 800*813a1f2bSShri Abhyankar 801*813a1f2bSShri Abhyankar /* ZeropivotView() */ 802*813a1f2bSShri Abhyankar if (sctx.nshift){ 803*813a1f2bSShri Abhyankar if (info->shiftpd) { 804*813a1f2bSShri Abhyankar ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 805*813a1f2bSShri Abhyankar } else if (info->shiftnz) { 806*813a1f2bSShri Abhyankar ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 807*813a1f2bSShri Abhyankar } 808*813a1f2bSShri Abhyankar } 809*813a1f2bSShri Abhyankar PetscFunctionReturn(0); 810*813a1f2bSShri Abhyankar } 811*813a1f2bSShri Abhyankar 8124a2ae208SSatish Balay #undef __FUNCT__ 8134a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 8140481f469SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 815289bc588SBarry Smith { 816719d5645SBarry Smith Mat C=B; 817416022c9SBarry Smith Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 81882bf6240SBarry Smith IS isrow = b->row,isicol = b->icol; 8196849ba73SBarry Smith PetscErrorCode ierr; 8205d0c19d7SBarry Smith const PetscInt *r,*ic,*ics; 821d278edc7SBarry Smith PetscInt nz,row,i,j,n=A->rmap->n,diag; 822d278edc7SBarry Smith const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 823d278edc7SBarry Smith const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 824d278edc7SBarry Smith MatScalar *pv,*rtmp,*pc,multiplier,d; 825d278edc7SBarry Smith const MatScalar *v,*aa=a->a; 82633f9893dSHong Zhang PetscReal rs=0.0; 827b3bf805bSHong Zhang LUShift_Ctx sctx; 82842898933SHong Zhang PetscInt newshift,*ddiag; 829289bc588SBarry Smith 8303a40ed3dSBarry Smith PetscFunctionBegin; 83178b31e54SBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 83278b31e54SBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 833b78a477dSHong Zhang ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 8349f7d0b68SHong Zhang ics = ic; 835289bc588SBarry Smith 836d4ad06f7SHong Zhang /* initialize shift context sctx */ 837d4ad06f7SHong Zhang sctx.nshift = 0; 838ada7143aSSatish Balay sctx.nshift_max = 0; 839d4ad06f7SHong Zhang sctx.shift_top = 0.0; 840d4ad06f7SHong Zhang sctx.shift_lo = 0.0; 841d4ad06f7SHong Zhang sctx.shift_hi = 0.0; 842d4ad06f7SHong Zhang sctx.shift_fraction = 0.0; 843d4ad06f7SHong Zhang sctx.shift_amount = 0.0; 844ada7143aSSatish Balay 845118f5789SBarry Smith /* if both shift schemes are chosen by user, only use info->shiftpd */ 8461a890ab1SHong Zhang if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 84742898933SHong Zhang ddiag = a->diag; 8489f7d0b68SHong Zhang sctx.shift_top = info->zeropivot; 8496cc28720Svictorle for (i=0; i<n; i++) { 8509f95998fSHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 8519f7d0b68SHong Zhang d = (aa)[ddiag[i]]; 8521a890ab1SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 8539f7d0b68SHong Zhang v = aa+ai[i]; 8549f7d0b68SHong Zhang nz = ai[i+1] - ai[i]; 855e24cfe64SHong Zhang for (j=0; j<nz; j++) 8561a890ab1SHong Zhang rs += PetscAbsScalar(v[j]); 8571a890ab1SHong Zhang if (rs>sctx.shift_top) sctx.shift_top = rs; 8586cc28720Svictorle } 859118f5789SBarry Smith sctx.shift_top *= 1.1; 860d2276718SHong Zhang sctx.nshift_max = 5; 861d2276718SHong Zhang sctx.shift_lo = 0.; 862d2276718SHong Zhang sctx.shift_hi = 1.; 863a0a356efSHong Zhang } 864a0a356efSHong Zhang 865085256b3SBarry Smith do { 866d2276718SHong Zhang sctx.lushift = PETSC_FALSE; 867289bc588SBarry Smith for (i=0; i<n; i++){ 868b3bf805bSHong Zhang nz = bi[i+1] - bi[i]; 869b3bf805bSHong Zhang bjtmp = bj + bi[i]; 8709f7d0b68SHong Zhang for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 871289bc588SBarry Smith 872289bc588SBarry Smith /* load in initial (unfactored row) */ 8739f7d0b68SHong Zhang nz = ai[r[i]+1] - ai[r[i]]; 8749f7d0b68SHong Zhang ajtmp = aj + ai[r[i]]; 8759f7d0b68SHong Zhang v = aa + ai[r[i]]; 876085256b3SBarry Smith for (j=0; j<nz; j++) { 877b3bf805bSHong Zhang rtmp[ics[ajtmp[j]]] = v[j]; 878085256b3SBarry Smith } 879d2276718SHong Zhang rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 880bd1bc851SBarry Smith /* if (sctx.shift_amount > 0.0) printf("row %d, shift %g\n",i,sctx.shift_amount); */ 881289bc588SBarry Smith 882b3bf805bSHong Zhang row = *bjtmp++; 883f2582111SSatish Balay while (row < i) { 8848c37ef55SBarry Smith pc = rtmp + row; 8858c37ef55SBarry Smith if (*pc != 0.0) { 886010a20caSHong Zhang pv = b->a + diag_offset[row]; 887010a20caSHong Zhang pj = b->j + diag_offset[row] + 1; 88835aab85fSBarry Smith multiplier = *pc / *pv++; 8898c37ef55SBarry Smith *pc = multiplier; 890b3bf805bSHong Zhang nz = bi[row+1] - diag_offset[row] - 1; 8919f7d0b68SHong Zhang for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 892dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 893289bc588SBarry Smith } 894b3bf805bSHong Zhang row = *bjtmp++; 895289bc588SBarry Smith } 896416022c9SBarry Smith /* finished row so stick it into b->a */ 897b3bf805bSHong Zhang pv = b->a + bi[i] ; 898b3bf805bSHong Zhang pj = b->j + bi[i] ; 899b3bf805bSHong Zhang nz = bi[i+1] - bi[i]; 900b3bf805bSHong Zhang diag = diag_offset[i] - bi[i]; 901e57ff13aSBarry Smith rs = 0.0; 902d3d32019SBarry Smith for (j=0; j<nz; j++) { 9039f7d0b68SHong Zhang pv[j] = rtmp[pj[j]]; 9049f7d0b68SHong Zhang rs += PetscAbsScalar(pv[j]); 905d3d32019SBarry Smith } 906e57ff13aSBarry Smith rs -= PetscAbsScalar(pv[diag]); 907d2276718SHong Zhang 908b3bf805bSHong Zhang /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 909d2276718SHong Zhang sctx.rs = rs; 910d2276718SHong Zhang sctx.pv = pv[diag]; 911c6b1f410SHong Zhang ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 9125b5bc046SBarry Smith if (newshift == 1) break; 91335aab85fSBarry Smith } 914d2276718SHong Zhang 9150481f469SBarry Smith if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 9166cc28720Svictorle /* 9176c037d1bSvictorle * if no shift in this attempt & shifting & started shifting & can refine, 9186cc28720Svictorle * then try lower shift 9196cc28720Svictorle */ 9200481f469SBarry Smith sctx.shift_hi = sctx.shift_fraction; 9210481f469SBarry Smith sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 9220481f469SBarry Smith sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 923d2276718SHong Zhang sctx.lushift = PETSC_TRUE; 924d2276718SHong Zhang sctx.nshift++; 9256cc28720Svictorle } 926d2276718SHong Zhang } while (sctx.lushift); 9270f11f9aeSSatish Balay 92835aab85fSBarry Smith /* invert diagonal entries for simplier triangular solves */ 92935aab85fSBarry Smith for (i=0; i<n; i++) { 930010a20caSHong Zhang b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 93135aab85fSBarry Smith } 932606d414cSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 93378b31e54SBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 93478b31e54SBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 935db4efbfdSBarry Smith if (b->inode.use) { 936db4efbfdSBarry Smith C->ops->solve = MatSolve_Inode; 937db4efbfdSBarry Smith } else { 9388d8c7c43SBarry Smith PetscTruth row_identity, col_identity; 9398d8c7c43SBarry Smith ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 9408d8c7c43SBarry Smith ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 9418d8c7c43SBarry Smith if (row_identity && col_identity) { 9428d8c7c43SBarry Smith C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 9438d8c7c43SBarry Smith } else { 944db4efbfdSBarry Smith C->ops->solve = MatSolve_SeqAIJ; 945db4efbfdSBarry Smith } 9468d8c7c43SBarry Smith } 947db4efbfdSBarry Smith C->ops->solveadd = MatSolveAdd_SeqAIJ; 948db4efbfdSBarry Smith C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 949db4efbfdSBarry Smith C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 950de26497bSBarry Smith C->ops->matsolve = MatMatSolve_SeqAIJ; 951c456f294SBarry Smith C->assembled = PETSC_TRUE; 9525c9eb25fSBarry Smith C->preallocated = PETSC_TRUE; 953d0f46423SBarry Smith ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 954d2276718SHong Zhang if (sctx.nshift){ 955e738e422SBarry Smith if (info->shiftpd) { 9560481f469SBarry Smith ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 957e738e422SBarry Smith } else if (info->shiftnz) { 958e738e422SBarry Smith ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 9596cc28720Svictorle } 9600697b6caSHong Zhang } 9613a40ed3dSBarry Smith PetscFunctionReturn(0); 962289bc588SBarry Smith } 9630889b5dcSvictorle 964137fb511SHong Zhang /* 965137fb511SHong Zhang This routine implements inplace ILU(0) with row or/and column permutations. 966137fb511SHong Zhang Input: 967137fb511SHong Zhang A - original matrix 968137fb511SHong Zhang Output; 969137fb511SHong Zhang A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 970137fb511SHong Zhang a->j (col index) is permuted by the inverse of colperm, then sorted 971137fb511SHong Zhang a->a reordered accordingly with a->j 972137fb511SHong Zhang a->diag (ptr to diagonal elements) is updated. 973137fb511SHong Zhang */ 974137fb511SHong Zhang #undef __FUNCT__ 975137fb511SHong Zhang #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 9760481f469SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 977137fb511SHong Zhang { 978b051339dSHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 979b051339dSHong Zhang IS isrow = a->row,isicol = a->icol; 980137fb511SHong Zhang PetscErrorCode ierr; 9815d0c19d7SBarry Smith const PetscInt *r,*ic,*ics; 9825d0c19d7SBarry Smith PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 9835d0c19d7SBarry Smith PetscInt *ajtmp,nz,row; 984b051339dSHong Zhang PetscInt *diag = a->diag,nbdiag,*pj; 985a77337e4SBarry Smith PetscScalar *rtmp,*pc,multiplier,d; 986a77337e4SBarry Smith MatScalar *v,*pv; 987137fb511SHong Zhang PetscReal rs; 988137fb511SHong Zhang LUShift_Ctx sctx; 989b051339dSHong Zhang PetscInt newshift; 990137fb511SHong Zhang 991137fb511SHong Zhang PetscFunctionBegin; 992719d5645SBarry Smith if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 993137fb511SHong Zhang ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 994137fb511SHong Zhang ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 995137fb511SHong Zhang ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 996137fb511SHong Zhang ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 997b051339dSHong Zhang ics = ic; 998137fb511SHong Zhang 999137fb511SHong Zhang sctx.shift_top = 0; 1000137fb511SHong Zhang sctx.nshift_max = 0; 1001137fb511SHong Zhang sctx.shift_lo = 0; 1002137fb511SHong Zhang sctx.shift_hi = 0; 10030481f469SBarry Smith sctx.shift_fraction = 0; 1004137fb511SHong Zhang 1005137fb511SHong Zhang /* if both shift schemes are chosen by user, only use info->shiftpd */ 1006137fb511SHong Zhang if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 1007137fb511SHong Zhang sctx.shift_top = 0; 1008137fb511SHong Zhang for (i=0; i<n; i++) { 1009137fb511SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1010b051339dSHong Zhang d = (a->a)[diag[i]]; 1011137fb511SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 1012b051339dSHong Zhang v = a->a+ai[i]; 1013b051339dSHong Zhang nz = ai[i+1] - ai[i]; 1014137fb511SHong Zhang for (j=0; j<nz; j++) 1015137fb511SHong Zhang rs += PetscAbsScalar(v[j]); 1016137fb511SHong Zhang if (rs>sctx.shift_top) sctx.shift_top = rs; 1017137fb511SHong Zhang } 1018137fb511SHong Zhang if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 1019137fb511SHong Zhang sctx.shift_top *= 1.1; 1020137fb511SHong Zhang sctx.nshift_max = 5; 1021137fb511SHong Zhang sctx.shift_lo = 0.; 1022137fb511SHong Zhang sctx.shift_hi = 1.; 1023137fb511SHong Zhang } 1024137fb511SHong Zhang 1025137fb511SHong Zhang sctx.shift_amount = 0; 1026137fb511SHong Zhang sctx.nshift = 0; 1027137fb511SHong Zhang do { 1028137fb511SHong Zhang sctx.lushift = PETSC_FALSE; 1029137fb511SHong Zhang for (i=0; i<n; i++){ 1030b051339dSHong Zhang /* load in initial unfactored row */ 1031b051339dSHong Zhang nz = ai[r[i]+1] - ai[r[i]]; 1032b051339dSHong Zhang ajtmp = aj + ai[r[i]]; 1033b051339dSHong Zhang v = a->a + ai[r[i]]; 1034137fb511SHong Zhang /* sort permuted ajtmp and values v accordingly */ 1035b051339dSHong Zhang for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 1036137fb511SHong Zhang ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 1037137fb511SHong Zhang 1038b051339dSHong Zhang diag[r[i]] = ai[r[i]]; 1039137fb511SHong Zhang for (j=0; j<nz; j++) { 1040137fb511SHong Zhang rtmp[ajtmp[j]] = v[j]; 1041b051339dSHong Zhang if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 1042137fb511SHong Zhang } 1043137fb511SHong Zhang rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 1044137fb511SHong Zhang 1045b051339dSHong Zhang row = *ajtmp++; 1046137fb511SHong Zhang while (row < i) { 1047137fb511SHong Zhang pc = rtmp + row; 1048137fb511SHong Zhang if (*pc != 0.0) { 1049b051339dSHong Zhang pv = a->a + diag[r[row]]; 1050b051339dSHong Zhang pj = aj + diag[r[row]] + 1; 1051137fb511SHong Zhang 1052137fb511SHong Zhang multiplier = *pc / *pv++; 1053137fb511SHong Zhang *pc = multiplier; 1054b051339dSHong Zhang nz = ai[r[row]+1] - diag[r[row]] - 1; 1055b051339dSHong Zhang for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 1056dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 1057137fb511SHong Zhang } 1058b051339dSHong Zhang row = *ajtmp++; 1059137fb511SHong Zhang } 1060b051339dSHong Zhang /* finished row so overwrite it onto a->a */ 1061b051339dSHong Zhang pv = a->a + ai[r[i]] ; 1062b051339dSHong Zhang pj = aj + ai[r[i]] ; 1063b051339dSHong Zhang nz = ai[r[i]+1] - ai[r[i]]; 1064b051339dSHong Zhang nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 1065137fb511SHong Zhang 1066137fb511SHong Zhang rs = 0.0; 1067137fb511SHong Zhang for (j=0; j<nz; j++) { 1068b051339dSHong Zhang pv[j] = rtmp[pj[j]]; 1069b051339dSHong Zhang if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 1070137fb511SHong Zhang } 1071137fb511SHong Zhang 1072137fb511SHong Zhang /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 1073137fb511SHong Zhang sctx.rs = rs; 1074b051339dSHong Zhang sctx.pv = pv[nbdiag]; 1075c6b1f410SHong Zhang ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 1076137fb511SHong Zhang if (newshift == 1) break; 1077137fb511SHong Zhang } 1078137fb511SHong Zhang 10790481f469SBarry Smith if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 1080137fb511SHong Zhang /* 1081137fb511SHong Zhang * if no shift in this attempt & shifting & started shifting & can refine, 1082137fb511SHong Zhang * then try lower shift 1083137fb511SHong Zhang */ 10840481f469SBarry Smith sctx.shift_hi = sctx.shift_fraction; 10850481f469SBarry Smith sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 10860481f469SBarry Smith sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 1087137fb511SHong Zhang sctx.lushift = PETSC_TRUE; 1088137fb511SHong Zhang sctx.nshift++; 1089137fb511SHong Zhang } 1090137fb511SHong Zhang } while (sctx.lushift); 1091137fb511SHong Zhang 1092137fb511SHong Zhang /* invert diagonal entries for simplier triangular solves */ 1093137fb511SHong Zhang for (i=0; i<n; i++) { 1094b051339dSHong Zhang a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 1095137fb511SHong Zhang } 1096137fb511SHong Zhang 1097137fb511SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 1098137fb511SHong Zhang ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1099137fb511SHong Zhang ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1100b051339dSHong Zhang A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 1101db4efbfdSBarry Smith A->ops->solveadd = MatSolveAdd_SeqAIJ; 1102db4efbfdSBarry Smith A->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 1103db4efbfdSBarry Smith A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 1104b051339dSHong Zhang A->assembled = PETSC_TRUE; 11055c9eb25fSBarry Smith A->preallocated = PETSC_TRUE; 1106d0f46423SBarry Smith ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 1107137fb511SHong Zhang if (sctx.nshift){ 1108e738e422SBarry Smith if (info->shiftpd) { 11090481f469SBarry Smith ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 1110e738e422SBarry Smith } else if (info->shiftnz) { 1111e738e422SBarry Smith ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1112137fb511SHong Zhang } 1113137fb511SHong Zhang } 1114137fb511SHong Zhang PetscFunctionReturn(0); 1115137fb511SHong Zhang } 1116137fb511SHong Zhang 11170a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */ 11184a2ae208SSatish Balay #undef __FUNCT__ 11194a2ae208SSatish Balay #define __FUNCT__ "MatLUFactor_SeqAIJ" 11200481f469SBarry Smith PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 1121da3a660dSBarry Smith { 1122dfbe8321SBarry Smith PetscErrorCode ierr; 1123416022c9SBarry Smith Mat C; 1124416022c9SBarry Smith 11253a40ed3dSBarry Smith PetscFunctionBegin; 112643244d56SBarry Smith ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 1127719d5645SBarry Smith ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 1128719d5645SBarry Smith ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 1129db4efbfdSBarry Smith A->ops->solve = C->ops->solve; 1130db4efbfdSBarry Smith A->ops->solvetranspose = C->ops->solvetranspose; 1131273d9f13SBarry Smith ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 113252e6d16bSBarry Smith ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 11333a40ed3dSBarry Smith PetscFunctionReturn(0); 1134da3a660dSBarry Smith } 11350a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */ 11361d098868SHong Zhang 1137889a8dedSBarry Smith 11384a2ae208SSatish Balay #undef __FUNCT__ 11394a2ae208SSatish Balay #define __FUNCT__ "MatSolve_SeqAIJ" 1140dfbe8321SBarry Smith PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 11418c37ef55SBarry Smith { 1142416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1143416022c9SBarry Smith IS iscol = a->col,isrow = a->row; 11446849ba73SBarry Smith PetscErrorCode ierr; 11455d0c19d7SBarry Smith PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 11465d0c19d7SBarry Smith PetscInt nz; 11475d0c19d7SBarry Smith const PetscInt *rout,*cout,*r,*c; 1148dd6ea824SBarry Smith PetscScalar *x,*tmp,*tmps,sum; 1149d9fead3dSBarry Smith const PetscScalar *b; 1150dd6ea824SBarry Smith const MatScalar *aa = a->a,*v; 11518c37ef55SBarry Smith 11523a40ed3dSBarry Smith PetscFunctionBegin; 11533a40ed3dSBarry Smith if (!n) PetscFunctionReturn(0); 115491bf9adeSBarry Smith 1155d9fead3dSBarry Smith ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 11561ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1157416022c9SBarry Smith tmp = a->solve_work; 11588c37ef55SBarry Smith 11593b2fbd54SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 11603b2fbd54SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 11618c37ef55SBarry Smith 11628c37ef55SBarry Smith /* forward solve the lower triangular */ 11638c37ef55SBarry Smith tmp[0] = b[*r++]; 1164010a20caSHong Zhang tmps = tmp; 11658c37ef55SBarry Smith for (i=1; i<n; i++) { 1166010a20caSHong Zhang v = aa + ai[i] ; 1167010a20caSHong Zhang vi = aj + ai[i] ; 116853e7425aSBarry Smith nz = a->diag[i] - ai[i]; 116953e7425aSBarry Smith sum = b[*r++]; 1170003131ecSBarry Smith PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 11718c37ef55SBarry Smith tmp[i] = sum; 11728c37ef55SBarry Smith } 11738c37ef55SBarry Smith 11748c37ef55SBarry Smith /* backward solve the upper triangular */ 11758c37ef55SBarry Smith for (i=n-1; i>=0; i--){ 1176010a20caSHong Zhang v = aa + a->diag[i] + 1; 1177010a20caSHong Zhang vi = aj + a->diag[i] + 1; 1178416022c9SBarry Smith nz = ai[i+1] - a->diag[i] - 1; 11798c37ef55SBarry Smith sum = tmp[i]; 1180003131ecSBarry Smith PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1181010a20caSHong Zhang x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 11828c37ef55SBarry Smith } 11838c37ef55SBarry Smith 11843b2fbd54SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 11853b2fbd54SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1186d9fead3dSBarry Smith ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 11871ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1188dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 11893a40ed3dSBarry Smith PetscFunctionReturn(0); 11908c37ef55SBarry Smith } 1191026e39d0SSatish Balay 11922bebee5dSHong Zhang #undef __FUNCT__ 11932bebee5dSHong Zhang #define __FUNCT__ "MatMatSolve_SeqAIJ" 11942bebee5dSHong Zhang PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 11952bebee5dSHong Zhang { 11962bebee5dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 11972bebee5dSHong Zhang IS iscol = a->col,isrow = a->row; 11982bebee5dSHong Zhang PetscErrorCode ierr; 11995d0c19d7SBarry Smith PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 12005d0c19d7SBarry Smith PetscInt nz,neq; 12015d0c19d7SBarry Smith const PetscInt *rout,*cout,*r,*c; 1202dd6ea824SBarry Smith PetscScalar *x,*b,*tmp,*tmps,sum; 1203dd6ea824SBarry Smith const MatScalar *aa = a->a,*v; 1204db4efbfdSBarry Smith PetscTruth bisdense,xisdense; 12052bebee5dSHong Zhang 12062bebee5dSHong Zhang PetscFunctionBegin; 12072bebee5dSHong Zhang if (!n) PetscFunctionReturn(0); 12082bebee5dSHong Zhang 1209db4efbfdSBarry Smith ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1210db4efbfdSBarry Smith if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1211db4efbfdSBarry Smith ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1212db4efbfdSBarry Smith if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1213db4efbfdSBarry Smith 12142bebee5dSHong Zhang ierr = MatGetArray(B,&b);CHKERRQ(ierr); 12152bebee5dSHong Zhang ierr = MatGetArray(X,&x);CHKERRQ(ierr); 12162bebee5dSHong Zhang 12172bebee5dSHong Zhang tmp = a->solve_work; 12182bebee5dSHong Zhang ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 12192bebee5dSHong Zhang ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 12202bebee5dSHong Zhang 1221a36b22ccSSatish Balay for (neq=0; neq<B->cmap->n; neq++){ 12222bebee5dSHong Zhang /* forward solve the lower triangular */ 12232bebee5dSHong Zhang tmp[0] = b[r[0]]; 12242bebee5dSHong Zhang tmps = tmp; 12252bebee5dSHong Zhang for (i=1; i<n; i++) { 12262bebee5dSHong Zhang v = aa + ai[i] ; 12272bebee5dSHong Zhang vi = aj + ai[i] ; 12282bebee5dSHong Zhang nz = a->diag[i] - ai[i]; 12292bebee5dSHong Zhang sum = b[r[i]]; 1230003131ecSBarry Smith PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 12312bebee5dSHong Zhang tmp[i] = sum; 12322bebee5dSHong Zhang } 12332bebee5dSHong Zhang /* backward solve the upper triangular */ 12342bebee5dSHong Zhang for (i=n-1; i>=0; i--){ 12352bebee5dSHong Zhang v = aa + a->diag[i] + 1; 12362bebee5dSHong Zhang vi = aj + a->diag[i] + 1; 12372bebee5dSHong Zhang nz = ai[i+1] - a->diag[i] - 1; 12382bebee5dSHong Zhang sum = tmp[i]; 1239003131ecSBarry Smith PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 12402bebee5dSHong Zhang x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 12412bebee5dSHong Zhang } 12422bebee5dSHong Zhang 12432bebee5dSHong Zhang b += n; 12442bebee5dSHong Zhang x += n; 12452bebee5dSHong Zhang } 12462bebee5dSHong Zhang ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 12472bebee5dSHong Zhang ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 12482bebee5dSHong Zhang ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 12492bebee5dSHong Zhang ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 1250dc0b31edSSatish Balay ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 12512bebee5dSHong Zhang PetscFunctionReturn(0); 12522bebee5dSHong Zhang } 12532bebee5dSHong Zhang 1254137fb511SHong Zhang #undef __FUNCT__ 1255137fb511SHong Zhang #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1256137fb511SHong Zhang PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1257137fb511SHong Zhang { 1258137fb511SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1259137fb511SHong Zhang IS iscol = a->col,isrow = a->row; 1260137fb511SHong Zhang PetscErrorCode ierr; 12615d0c19d7SBarry Smith const PetscInt *r,*c,*rout,*cout; 12625d0c19d7SBarry Smith PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 12635d0c19d7SBarry Smith PetscInt nz,row; 1264dd6ea824SBarry Smith PetscScalar *x,*b,*tmp,*tmps,sum; 1265dd6ea824SBarry Smith const MatScalar *aa = a->a,*v; 1266137fb511SHong Zhang 1267137fb511SHong Zhang PetscFunctionBegin; 1268137fb511SHong Zhang if (!n) PetscFunctionReturn(0); 1269137fb511SHong Zhang 1270137fb511SHong Zhang ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1271137fb511SHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1272137fb511SHong Zhang tmp = a->solve_work; 1273137fb511SHong Zhang 1274137fb511SHong Zhang ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1275137fb511SHong Zhang ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1276137fb511SHong Zhang 1277137fb511SHong Zhang /* forward solve the lower triangular */ 1278137fb511SHong Zhang tmp[0] = b[*r++]; 1279137fb511SHong Zhang tmps = tmp; 1280137fb511SHong Zhang for (row=1; row<n; row++) { 1281137fb511SHong Zhang i = rout[row]; /* permuted row */ 1282137fb511SHong Zhang v = aa + ai[i] ; 1283137fb511SHong Zhang vi = aj + ai[i] ; 1284137fb511SHong Zhang nz = a->diag[i] - ai[i]; 1285137fb511SHong Zhang sum = b[*r++]; 1286003131ecSBarry Smith PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1287137fb511SHong Zhang tmp[row] = sum; 1288137fb511SHong Zhang } 1289137fb511SHong Zhang 1290137fb511SHong Zhang /* backward solve the upper triangular */ 1291137fb511SHong Zhang for (row=n-1; row>=0; row--){ 1292137fb511SHong Zhang i = rout[row]; /* permuted row */ 1293137fb511SHong Zhang v = aa + a->diag[i] + 1; 1294137fb511SHong Zhang vi = aj + a->diag[i] + 1; 1295137fb511SHong Zhang nz = ai[i+1] - a->diag[i] - 1; 1296137fb511SHong Zhang sum = tmp[row]; 1297003131ecSBarry Smith PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1298137fb511SHong Zhang x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1299137fb511SHong Zhang } 1300137fb511SHong Zhang 1301137fb511SHong Zhang ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1302137fb511SHong Zhang ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1303137fb511SHong Zhang ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1304137fb511SHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1305dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1306137fb511SHong Zhang PetscFunctionReturn(0); 1307137fb511SHong Zhang } 1308137fb511SHong Zhang 1309930ae53cSBarry Smith /* ----------------------------------------------------------- */ 1310a4005a5dSBarry Smith #include "../src/mat/impls/aij/seq/ftn-kernels/fsolve.h" 13114a2ae208SSatish Balay #undef __FUNCT__ 13124a2ae208SSatish Balay #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 1313dfbe8321SBarry Smith PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 1314930ae53cSBarry Smith { 1315930ae53cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 13166849ba73SBarry Smith PetscErrorCode ierr; 1317d0f46423SBarry Smith PetscInt n = A->rmap->n; 1318b377110cSBarry Smith const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1319356650c2SBarry Smith PetscScalar *x; 1320356650c2SBarry Smith const PetscScalar *b; 1321dd6ea824SBarry Smith const MatScalar *aa = a->a; 1322aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1323356650c2SBarry Smith PetscInt adiag_i,i,nz,ai_i; 1324b377110cSBarry Smith const PetscInt *vi; 1325dd6ea824SBarry Smith const MatScalar *v; 1326dd6ea824SBarry Smith PetscScalar sum; 1327d85afc42SSatish Balay #endif 1328930ae53cSBarry Smith 13293a40ed3dSBarry Smith PetscFunctionBegin; 13303a40ed3dSBarry Smith if (!n) PetscFunctionReturn(0); 1331930ae53cSBarry Smith 1332356650c2SBarry Smith ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 13331ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1334930ae53cSBarry Smith 1335aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 13361c4feecaSSatish Balay fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 13371c4feecaSSatish Balay #else 1338930ae53cSBarry Smith /* forward solve the lower triangular */ 1339930ae53cSBarry Smith x[0] = b[0]; 1340930ae53cSBarry Smith for (i=1; i<n; i++) { 1341930ae53cSBarry Smith ai_i = ai[i]; 1342930ae53cSBarry Smith v = aa + ai_i; 1343930ae53cSBarry Smith vi = aj + ai_i; 1344930ae53cSBarry Smith nz = adiag[i] - ai_i; 1345930ae53cSBarry Smith sum = b[i]; 1346003131ecSBarry Smith PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1347930ae53cSBarry Smith x[i] = sum; 1348930ae53cSBarry Smith } 1349930ae53cSBarry Smith 1350930ae53cSBarry Smith /* backward solve the upper triangular */ 1351930ae53cSBarry Smith for (i=n-1; i>=0; i--){ 1352930ae53cSBarry Smith adiag_i = adiag[i]; 1353d708749eSBarry Smith v = aa + adiag_i + 1; 1354d708749eSBarry Smith vi = aj + adiag_i + 1; 1355930ae53cSBarry Smith nz = ai[i+1] - adiag_i - 1; 1356930ae53cSBarry Smith sum = x[i]; 1357003131ecSBarry Smith PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1358930ae53cSBarry Smith x[i] = sum*aa[adiag_i]; 1359930ae53cSBarry Smith } 13601c4feecaSSatish Balay #endif 1361dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1362356650c2SBarry Smith ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 13631ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 13643a40ed3dSBarry Smith PetscFunctionReturn(0); 1365930ae53cSBarry Smith } 1366930ae53cSBarry Smith 13674a2ae208SSatish Balay #undef __FUNCT__ 13684a2ae208SSatish Balay #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1369dfbe8321SBarry Smith PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1370da3a660dSBarry Smith { 1371416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1372416022c9SBarry Smith IS iscol = a->col,isrow = a->row; 13736849ba73SBarry Smith PetscErrorCode ierr; 137404fbf559SBarry Smith PetscInt i, n = A->rmap->n,j; 13755d0c19d7SBarry Smith PetscInt nz; 137604fbf559SBarry Smith const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 137704fbf559SBarry Smith PetscScalar *x,*tmp,sum; 137804fbf559SBarry Smith const PetscScalar *b; 1379dd6ea824SBarry Smith const MatScalar *aa = a->a,*v; 1380da3a660dSBarry Smith 13813a40ed3dSBarry Smith PetscFunctionBegin; 138278b31e54SBarry Smith if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1383da3a660dSBarry Smith 138404fbf559SBarry Smith ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 13851ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1386416022c9SBarry Smith tmp = a->solve_work; 1387da3a660dSBarry Smith 13883b2fbd54SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 13893b2fbd54SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1390da3a660dSBarry Smith 1391da3a660dSBarry Smith /* forward solve the lower triangular */ 1392da3a660dSBarry Smith tmp[0] = b[*r++]; 1393da3a660dSBarry Smith for (i=1; i<n; i++) { 1394010a20caSHong Zhang v = aa + ai[i] ; 1395010a20caSHong Zhang vi = aj + ai[i] ; 1396416022c9SBarry Smith nz = a->diag[i] - ai[i]; 1397da3a660dSBarry Smith sum = b[*r++]; 139804fbf559SBarry Smith for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1399da3a660dSBarry Smith tmp[i] = sum; 1400da3a660dSBarry Smith } 1401da3a660dSBarry Smith 1402da3a660dSBarry Smith /* backward solve the upper triangular */ 1403da3a660dSBarry Smith for (i=n-1; i>=0; i--){ 1404010a20caSHong Zhang v = aa + a->diag[i] + 1; 1405010a20caSHong Zhang vi = aj + a->diag[i] + 1; 1406416022c9SBarry Smith nz = ai[i+1] - a->diag[i] - 1; 1407da3a660dSBarry Smith sum = tmp[i]; 140804fbf559SBarry Smith for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1409010a20caSHong Zhang tmp[i] = sum*aa[a->diag[i]]; 1410da3a660dSBarry Smith x[*c--] += tmp[i]; 1411da3a660dSBarry Smith } 1412da3a660dSBarry Smith 14133b2fbd54SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 14143b2fbd54SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 141504fbf559SBarry Smith ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 14161ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1417dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1418898183ecSLois Curfman McInnes 14193a40ed3dSBarry Smith PetscFunctionReturn(0); 1420da3a660dSBarry Smith } 142104fbf559SBarry Smith 14224a2ae208SSatish Balay #undef __FUNCT__ 14234a2ae208SSatish Balay #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1424dfbe8321SBarry Smith PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1425da3a660dSBarry Smith { 1426416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 14272235e667SBarry Smith IS iscol = a->col,isrow = a->row; 14286849ba73SBarry Smith PetscErrorCode ierr; 142904fbf559SBarry Smith const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 143004fbf559SBarry Smith PetscInt i,n = A->rmap->n,j; 143104fbf559SBarry Smith PetscInt nz; 143204fbf559SBarry Smith PetscScalar *x,*tmp,s1; 1433dd6ea824SBarry Smith const MatScalar *aa = a->a,*v; 143404fbf559SBarry Smith const PetscScalar *b; 1435da3a660dSBarry Smith 14363a40ed3dSBarry Smith PetscFunctionBegin; 143704fbf559SBarry Smith ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 14381ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1439416022c9SBarry Smith tmp = a->solve_work; 1440da3a660dSBarry Smith 14412235e667SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 14422235e667SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1443da3a660dSBarry Smith 1444da3a660dSBarry Smith /* copy the b into temp work space according to permutation */ 14452235e667SBarry Smith for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1446da3a660dSBarry Smith 1447da3a660dSBarry Smith /* forward solve the U^T */ 1448da3a660dSBarry Smith for (i=0; i<n; i++) { 1449010a20caSHong Zhang v = aa + diag[i] ; 1450010a20caSHong Zhang vi = aj + diag[i] + 1; 1451f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 1452c38d4ed2SBarry Smith s1 = tmp[i]; 1453ef66eb69SBarry Smith s1 *= (*v++); /* multiply by inverse of diagonal entry */ 145404fbf559SBarry Smith for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1455c38d4ed2SBarry Smith tmp[i] = s1; 1456da3a660dSBarry Smith } 1457da3a660dSBarry Smith 1458da3a660dSBarry Smith /* backward solve the L^T */ 1459da3a660dSBarry Smith for (i=n-1; i>=0; i--){ 1460010a20caSHong Zhang v = aa + diag[i] - 1 ; 1461010a20caSHong Zhang vi = aj + diag[i] - 1 ; 1462f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 1463f1af5d2fSBarry Smith s1 = tmp[i]; 146404fbf559SBarry Smith for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1465da3a660dSBarry Smith } 1466da3a660dSBarry Smith 1467da3a660dSBarry Smith /* copy tmp into x according to permutation */ 1468591294e4SBarry Smith for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1469da3a660dSBarry Smith 14702235e667SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 14712235e667SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 147204fbf559SBarry Smith ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 14731ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1474da3a660dSBarry Smith 1475dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 14763a40ed3dSBarry Smith PetscFunctionReturn(0); 1477da3a660dSBarry Smith } 1478da3a660dSBarry Smith 14794a2ae208SSatish Balay #undef __FUNCT__ 14804a2ae208SSatish Balay #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1481dfbe8321SBarry Smith PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1482da3a660dSBarry Smith { 1483416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 14842235e667SBarry Smith IS iscol = a->col,isrow = a->row; 14856849ba73SBarry Smith PetscErrorCode ierr; 148604fbf559SBarry Smith const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 148704fbf559SBarry Smith PetscInt i,n = A->rmap->n,j; 148804fbf559SBarry Smith PetscInt nz; 148904fbf559SBarry Smith PetscScalar *x,*tmp,s1; 1490dd6ea824SBarry Smith const MatScalar *aa = a->a,*v; 149104fbf559SBarry Smith const PetscScalar *b; 14926abc6512SBarry Smith 14933a40ed3dSBarry Smith PetscFunctionBegin; 149423bc6035SMatthew Knepley if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 149504fbf559SBarry Smith ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 14961ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1497416022c9SBarry Smith tmp = a->solve_work; 14986abc6512SBarry Smith 14992235e667SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 15002235e667SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 15016abc6512SBarry Smith 15026abc6512SBarry Smith /* copy the b into temp work space according to permutation */ 15032235e667SBarry Smith for (i=0; i<n; i++) tmp[i] = b[c[i]]; 15046abc6512SBarry Smith 15056abc6512SBarry Smith /* forward solve the U^T */ 15066abc6512SBarry Smith for (i=0; i<n; i++) { 1507010a20caSHong Zhang v = aa + diag[i] ; 1508010a20caSHong Zhang vi = aj + diag[i] + 1; 1509f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 151004fbf559SBarry Smith s1 = tmp[i]; 151104fbf559SBarry Smith s1 *= (*v++); /* multiply by inverse of diagonal entry */ 151204fbf559SBarry Smith for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 151304fbf559SBarry Smith tmp[i] = s1; 15146abc6512SBarry Smith } 15156abc6512SBarry Smith 15166abc6512SBarry Smith /* backward solve the L^T */ 15176abc6512SBarry Smith for (i=n-1; i>=0; i--){ 1518010a20caSHong Zhang v = aa + diag[i] - 1 ; 1519010a20caSHong Zhang vi = aj + diag[i] - 1 ; 1520f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 152104fbf559SBarry Smith s1 = tmp[i]; 152204fbf559SBarry Smith for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 15236abc6512SBarry Smith } 15246abc6512SBarry Smith 15256abc6512SBarry Smith /* copy tmp into x according to permutation */ 15266abc6512SBarry Smith for (i=0; i<n; i++) x[r[i]] += tmp[i]; 15276abc6512SBarry Smith 15282235e667SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 15292235e667SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 153004fbf559SBarry Smith ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 15311ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15326abc6512SBarry Smith 153304fbf559SBarry Smith ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 15343a40ed3dSBarry Smith PetscFunctionReturn(0); 1535da3a660dSBarry Smith } 153604fbf559SBarry Smith 15379e25ed09SBarry Smith /* ----------------------------------------------------------------*/ 15383c5fc038SBarry Smith EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1539f77e22a1SHong Zhang EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscTruth); 154008480c60SBarry Smith 15418fc3a347SHong Zhang /* 15428fc3a347SHong Zhang ilu(0) with natural ordering under new data structure. 15438fc3a347SHong Zhang Factored arrays bj and ba are stored as 15448fc3a347SHong Zhang L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 15458fc3a347SHong Zhang 15468fc3a347SHong Zhang bi=fact->i is an array of size 2n+2, in which 15478fc3a347SHong Zhang bi+ 15488fc3a347SHong Zhang bi[i] -> 1st entry of L(i,:),i=0,...,i-1 1549f77e22a1SHong Zhang bi[n] -> points to L(n-1,:)+1 15508fc3a347SHong Zhang bi[n+1] -> 1st entry of U(n-1,:) 15518fc3a347SHong Zhang bi[2n-i] -> 1st entry of U(i,:) 15528fc3a347SHong Zhang bi[2n-i+1] -> end of U(i,:)+1, the 1st entry of U(i-1,:) 1553f77e22a1SHong Zhang bi[2n] -> 1st entry of U(0,:) 1554f77e22a1SHong Zhang bi[2n+1] -> points to U(0,:)+1 15558fc3a347SHong Zhang 15568fc3a347SHong Zhang U(i,:) contains diag[i] as its last entry, i.e., 15578fc3a347SHong Zhang U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 15588fc3a347SHong Zhang */ 1559c9c72f8fSHong Zhang #undef __FUNCT__ 15608fc3a347SHong Zhang #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct" 15618fc3a347SHong Zhang PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1562c9c72f8fSHong Zhang { 1563c9c72f8fSHong Zhang 1564c9c72f8fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1565c9c72f8fSHong Zhang PetscErrorCode ierr; 1566c9c72f8fSHong Zhang PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 15678fc3a347SHong Zhang PetscInt i,j,nz,*bi,*bj,*bdiag; 1568c9c72f8fSHong Zhang 1569c9c72f8fSHong Zhang PetscFunctionBegin; 1570f77e22a1SHong Zhang ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1571c9c72f8fSHong Zhang b = (Mat_SeqAIJ*)(fact)->data; 1572c9c72f8fSHong Zhang 1573f77e22a1SHong Zhang /* allocate matrix arrays for new data structure */ 1574f77e22a1SHong Zhang ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,2*n+2,PetscInt,&b->i);CHKERRQ(ierr); 1575f77e22a1SHong Zhang ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(2*n+2)*sizeof(PetscInt));CHKERRQ(ierr); 1576f77e22a1SHong Zhang b->singlemalloc = PETSC_TRUE; 1577f77e22a1SHong Zhang if (!b->diag){ 15788fc3a347SHong Zhang ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr); 1579f77e22a1SHong Zhang } 1580c9c72f8fSHong Zhang bdiag = b->diag; 1581c9c72f8fSHong Zhang 1582c9c72f8fSHong Zhang if (n > 0) { 158316a2bf60SHong Zhang ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1584c9c72f8fSHong Zhang } 1585c9c72f8fSHong Zhang 1586c9c72f8fSHong Zhang /* set bi and bj with new data structure */ 1587c9c72f8fSHong Zhang bi = b->i; 1588c9c72f8fSHong Zhang bj = b->j; 1589c9c72f8fSHong Zhang 1590c9c72f8fSHong Zhang /* L part */ 1591c9c72f8fSHong Zhang bi[0] = 0; 1592c9c72f8fSHong Zhang for (i=0; i<n; i++){ 1593c9c72f8fSHong Zhang nz = adiag[i] - ai[i]; 1594c9c72f8fSHong Zhang bi[i+1] = bi[i] + nz; 1595c9c72f8fSHong Zhang aj = a->j + ai[i]; 1596c9c72f8fSHong Zhang for (j=0; j<nz; j++){ 1597c9c72f8fSHong Zhang *bj = aj[j]; bj++; 1598c9c72f8fSHong Zhang } 1599c9c72f8fSHong Zhang } 1600c9c72f8fSHong Zhang 1601c9c72f8fSHong Zhang /* U part */ 16028fc3a347SHong Zhang bi[n+1] = bi[n]; 1603c9c72f8fSHong Zhang for (i=n-1; i>=0; i--){ 1604c9c72f8fSHong Zhang nz = ai[i+1] - adiag[i] - 1; 16058fc3a347SHong Zhang bi[2*n-i+1] = bi[2*n-i] + nz + 1; 1606c9c72f8fSHong Zhang aj = a->j + adiag[i] + 1; 1607c9c72f8fSHong Zhang for (j=0; j<nz; j++){ 1608c9c72f8fSHong Zhang *bj = aj[j]; bj++; 1609c9c72f8fSHong Zhang } 1610c9c72f8fSHong Zhang /* diag[i] */ 1611c9c72f8fSHong Zhang *bj = i; bj++; 16128fc3a347SHong Zhang bdiag[i] = bi[2*n-i+1]-1; 1613c9c72f8fSHong Zhang } 1614c9c72f8fSHong Zhang PetscFunctionReturn(0); 1615c9c72f8fSHong Zhang } 1616c9c72f8fSHong Zhang 16174a2ae208SSatish Balay #undef __FUNCT__ 16186516239fSHong Zhang #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_newdatastruct" 16196516239fSHong Zhang PetscErrorCode MatILUFactorSymbolic_SeqAIJ_newdatastruct(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 16209e25ed09SBarry Smith { 1621416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 16229e25ed09SBarry Smith IS isicol; 16236849ba73SBarry Smith PetscErrorCode ierr; 16245d0c19d7SBarry Smith const PetscInt *r,*ic; 16255d0c19d7SBarry Smith PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 16265a8e39fbSHong Zhang PetscInt *bi,*cols,nnz,*cols_lvl; 1627052f0c41SBarry Smith PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 16285a8e39fbSHong Zhang PetscInt i,levels,diagonal_fill; 162977c4ece6SBarry Smith PetscTruth col_identity,row_identity; 1630329f5518SBarry Smith PetscReal f; 16315a8e39fbSHong Zhang PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 16325a8e39fbSHong Zhang PetscBT lnkbt; 16335a8e39fbSHong Zhang PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1634a1a86e44SBarry Smith PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1635a1a86e44SBarry Smith PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 163609f38230SBarry Smith PetscTruth missing; 16379e25ed09SBarry Smith 16383a40ed3dSBarry Smith PetscFunctionBegin; 1639d0f46423SBarry Smith if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 16406cf997b0SBarry Smith f = info->fill; 164138baddfdSBarry Smith levels = (PetscInt)info->levels; 164238baddfdSBarry Smith diagonal_fill = (PetscInt)info->diagonal_fill; 16434c49b128SBarry Smith ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 164482bf6240SBarry Smith 1645be0abb6dSBarry Smith ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1646be0abb6dSBarry Smith ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1647c9c72f8fSHong Zhang 16486516239fSHong Zhang if (!levels && row_identity && col_identity) { 16496516239fSHong Zhang /* special case: ilu(0) with natural ordering */ 16508fc3a347SHong Zhang ierr = MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1651c9c72f8fSHong Zhang (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_newdatastruct; 16526516239fSHong Zhang 16536516239fSHong Zhang fact->factor = MAT_FACTOR_ILU; 16546516239fSHong Zhang (fact)->info.factor_mallocs = 0; 16556516239fSHong Zhang (fact)->info.fill_ratio_given = info->fill; 16566516239fSHong Zhang (fact)->info.fill_ratio_needed = 1.0; 16576516239fSHong Zhang b = (Mat_SeqAIJ*)(fact)->data; 16586516239fSHong Zhang ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 16596516239fSHong Zhang if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 16606516239fSHong Zhang b->row = isrow; 16616516239fSHong Zhang b->col = iscol; 16626516239fSHong Zhang b->icol = isicol; 16636516239fSHong Zhang ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 16646516239fSHong Zhang ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 16656516239fSHong Zhang ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 16666516239fSHong Zhang /* ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); */ 16676516239fSHong Zhang PetscFunctionReturn(0); 16686516239fSHong Zhang } 16696516239fSHong Zhang 16706516239fSHong Zhang ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 16716516239fSHong Zhang ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 16726516239fSHong Zhang 16736516239fSHong Zhang /* get new row pointers */ 16746516239fSHong Zhang ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 16756516239fSHong Zhang bi[0] = 0; 16766516239fSHong Zhang /* bdiag is location of diagonal in factor */ 16776516239fSHong Zhang ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 16786516239fSHong Zhang bdiag[0] = 0; 16796516239fSHong Zhang 16806516239fSHong Zhang ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 16816516239fSHong Zhang bjlvl_ptr = (PetscInt**)(bj_ptr + n); 16826516239fSHong Zhang 16836516239fSHong Zhang /* create a linked list for storing column indices of the active row */ 16846516239fSHong Zhang nlnk = n + 1; 16856516239fSHong Zhang ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 16866516239fSHong Zhang 16876516239fSHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 16886516239fSHong Zhang ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 16896516239fSHong Zhang current_space = free_space; 16906516239fSHong Zhang ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 16916516239fSHong Zhang current_space_lvl = free_space_lvl; 16926516239fSHong Zhang 16936516239fSHong Zhang for (i=0; i<n; i++) { 16946516239fSHong Zhang nzi = 0; 16956516239fSHong Zhang /* copy current row into linked list */ 16966516239fSHong Zhang nnz = ai[r[i]+1] - ai[r[i]]; 16976516239fSHong Zhang if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 16986516239fSHong Zhang cols = aj + ai[r[i]]; 16996516239fSHong Zhang lnk[i] = -1; /* marker to indicate if diagonal exists */ 17006516239fSHong Zhang ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 17016516239fSHong Zhang nzi += nlnk; 17026516239fSHong Zhang 17036516239fSHong Zhang /* make sure diagonal entry is included */ 17046516239fSHong Zhang if (diagonal_fill && lnk[i] == -1) { 17056516239fSHong Zhang fm = n; 17066516239fSHong Zhang while (lnk[fm] < i) fm = lnk[fm]; 17076516239fSHong Zhang lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 17086516239fSHong Zhang lnk[fm] = i; 17096516239fSHong Zhang lnk_lvl[i] = 0; 17106516239fSHong Zhang nzi++; dcount++; 17116516239fSHong Zhang } 17126516239fSHong Zhang 17136516239fSHong Zhang /* add pivot rows into the active row */ 17146516239fSHong Zhang nzbd = 0; 17156516239fSHong Zhang prow = lnk[n]; 17166516239fSHong Zhang while (prow < i) { 17176516239fSHong Zhang nnz = bdiag[prow]; 17186516239fSHong Zhang cols = bj_ptr[prow] + nnz + 1; 17196516239fSHong Zhang cols_lvl = bjlvl_ptr[prow] + nnz + 1; 17206516239fSHong Zhang nnz = bi[prow+1] - bi[prow] - nnz - 1; 17216516239fSHong Zhang ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 17226516239fSHong Zhang nzi += nlnk; 17236516239fSHong Zhang prow = lnk[prow]; 17246516239fSHong Zhang nzbd++; 17256516239fSHong Zhang } 17266516239fSHong Zhang bdiag[i] = nzbd; 17276516239fSHong Zhang bi[i+1] = bi[i] + nzi; 17286516239fSHong Zhang 17296516239fSHong Zhang /* if free space is not available, make more free space */ 17306516239fSHong Zhang if (current_space->local_remaining<nzi) { 17316516239fSHong Zhang nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 17326516239fSHong Zhang ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 17336516239fSHong Zhang ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 17346516239fSHong Zhang reallocs++; 17356516239fSHong Zhang } 17366516239fSHong Zhang 17376516239fSHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 17386516239fSHong Zhang ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 17396516239fSHong Zhang bj_ptr[i] = current_space->array; 17406516239fSHong Zhang bjlvl_ptr[i] = current_space_lvl->array; 17416516239fSHong Zhang 17426516239fSHong Zhang /* make sure the active row i has diagonal entry */ 17436516239fSHong Zhang if (*(bj_ptr[i]+bdiag[i]) != i) { 17446516239fSHong Zhang SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 17456516239fSHong Zhang try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 17466516239fSHong Zhang } 17476516239fSHong Zhang 17486516239fSHong Zhang current_space->array += nzi; 17496516239fSHong Zhang current_space->local_used += nzi; 17506516239fSHong Zhang current_space->local_remaining -= nzi; 17516516239fSHong Zhang current_space_lvl->array += nzi; 17526516239fSHong Zhang current_space_lvl->local_used += nzi; 17536516239fSHong Zhang current_space_lvl->local_remaining -= nzi; 17546516239fSHong Zhang } 17556516239fSHong Zhang 17566516239fSHong Zhang ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 17576516239fSHong Zhang ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 17586516239fSHong Zhang 17596516239fSHong Zhang /* destroy list of free space and other temporary arrays */ 17606516239fSHong Zhang ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 17616516239fSHong Zhang 17626516239fSHong Zhang /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1763783ef271SHong Zhang ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 17646516239fSHong Zhang 17656516239fSHong Zhang ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 17666516239fSHong Zhang ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 17676516239fSHong Zhang ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 17686516239fSHong Zhang 17696516239fSHong Zhang #if defined(PETSC_USE_INFO) 17706516239fSHong Zhang { 17716516239fSHong Zhang PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 17726516239fSHong Zhang ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 17736516239fSHong Zhang ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 17746516239fSHong Zhang ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 17756516239fSHong Zhang ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 17766516239fSHong Zhang if (diagonal_fill) { 17776516239fSHong Zhang ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 17786516239fSHong Zhang } 17796516239fSHong Zhang } 17806516239fSHong Zhang #endif 17816516239fSHong Zhang 17826516239fSHong Zhang /* put together the new matrix */ 17836516239fSHong Zhang ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 17846516239fSHong Zhang ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 17856516239fSHong Zhang b = (Mat_SeqAIJ*)(fact)->data; 17866516239fSHong Zhang b->free_a = PETSC_TRUE; 17876516239fSHong Zhang b->free_ij = PETSC_TRUE; 17886516239fSHong Zhang b->singlemalloc = PETSC_FALSE; 17896516239fSHong Zhang ierr = PetscMalloc( (bi[2*n+1] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 17906516239fSHong Zhang b->j = bj; 17916516239fSHong Zhang b->i = bi; 17926516239fSHong Zhang b->diag = bdiag; 17936516239fSHong Zhang b->ilen = 0; 17946516239fSHong Zhang b->imax = 0; 17956516239fSHong Zhang b->row = isrow; 17966516239fSHong Zhang b->col = iscol; 17976516239fSHong Zhang ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 17986516239fSHong Zhang ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 17996516239fSHong Zhang b->icol = isicol; 18006516239fSHong Zhang ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 18016516239fSHong Zhang /* In b structure: Free imax, ilen, old a, old j. 18026516239fSHong Zhang Allocate bdiag, solve_work, new a, new j */ 18036516239fSHong Zhang ierr = PetscLogObjectMemory(fact,bi[2*n+1] * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 18046516239fSHong Zhang b->maxnz = b->nz = bi[2*n+1] ; 18056516239fSHong Zhang (fact)->info.factor_mallocs = reallocs; 18066516239fSHong Zhang (fact)->info.fill_ratio_given = f; 18076516239fSHong Zhang (fact)->info.fill_ratio_needed = ((PetscReal)bi[2*n+1])/((PetscReal)ai[n]); 18086516239fSHong Zhang (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_newdatastruct; 18096516239fSHong Zhang /* ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); */ 18106516239fSHong Zhang PetscFunctionReturn(0); 18116516239fSHong Zhang } 18126516239fSHong Zhang 1813*813a1f2bSShri Abhyankar 1814*813a1f2bSShri Abhyankar /* 1815*813a1f2bSShri Abhyankar ilu(0) with natural ordering under revised new data structure. 1816*813a1f2bSShri Abhyankar Factored arrays bj and ba are stored as 1817*813a1f2bSShri Abhyankar L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1818*813a1f2bSShri Abhyankar 1819*813a1f2bSShri Abhyankar bi=fact->i is an array of size n+1, in which 1820*813a1f2bSShri Abhyankar bi+ 1821*813a1f2bSShri Abhyankar bi[i] -> 1st entry of L(i,:),i=0,...,i-1 1822*813a1f2bSShri Abhyankar bi[n] -> points to L(n-1,:)+1 1823*813a1f2bSShri Abhyankar bi[n+1] -> 1st entry of U(n-1,:) 1824*813a1f2bSShri Abhyankar bdiag=fact->diag is an array of size n+1,in which 1825*813a1f2bSShri Abhyankar bdiag[0] -> points to diagonal of U(n-1,:) 1826*813a1f2bSShri Abhyankar bdiag[n-1] -> points to diagonal of U(0,:) 1827*813a1f2bSShri Abhyankar 1828*813a1f2bSShri Abhyankar U(i,:) contains bdiag[i] as its last entry, i.e., 1829*813a1f2bSShri Abhyankar U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1830*813a1f2bSShri Abhyankar */ 1831*813a1f2bSShri Abhyankar #undef __FUNCT__ 1832*813a1f2bSShri Abhyankar #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct_v2" 1833*813a1f2bSShri Abhyankar PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct_v2(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1834*813a1f2bSShri Abhyankar { 1835*813a1f2bSShri Abhyankar 1836*813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1837*813a1f2bSShri Abhyankar PetscErrorCode ierr; 1838*813a1f2bSShri Abhyankar PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1839*813a1f2bSShri Abhyankar PetscInt i,j,nz,*bi,*bj,*bdiag,bi_temp; 1840*813a1f2bSShri Abhyankar 1841*813a1f2bSShri Abhyankar PetscFunctionBegin; 1842*813a1f2bSShri Abhyankar ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1843*813a1f2bSShri Abhyankar b = (Mat_SeqAIJ*)(fact)->data; 1844*813a1f2bSShri Abhyankar 1845*813a1f2bSShri Abhyankar /* allocate matrix arrays for new data structure */ 1846*813a1f2bSShri Abhyankar ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr); 1847*813a1f2bSShri Abhyankar ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1848*813a1f2bSShri Abhyankar b->singlemalloc = PETSC_TRUE; 1849*813a1f2bSShri Abhyankar if (!b->diag){ 1850*813a1f2bSShri Abhyankar ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr); 1851*813a1f2bSShri Abhyankar ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1852*813a1f2bSShri Abhyankar } 1853*813a1f2bSShri Abhyankar bdiag = b->diag; 1854*813a1f2bSShri Abhyankar 1855*813a1f2bSShri Abhyankar if (n > 0) { 1856*813a1f2bSShri Abhyankar ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1857*813a1f2bSShri Abhyankar } 1858*813a1f2bSShri Abhyankar 1859*813a1f2bSShri Abhyankar /* set bi and bj with new data structure */ 1860*813a1f2bSShri Abhyankar bi = b->i; 1861*813a1f2bSShri Abhyankar bj = b->j; 1862*813a1f2bSShri Abhyankar 1863*813a1f2bSShri Abhyankar /* L part */ 1864*813a1f2bSShri Abhyankar bi[0] = 0; 1865*813a1f2bSShri Abhyankar for (i=0; i<n; i++){ 1866*813a1f2bSShri Abhyankar nz = adiag[i] - ai[i]; 1867*813a1f2bSShri Abhyankar bi[i+1] = bi[i] + nz; 1868*813a1f2bSShri Abhyankar aj = a->j + ai[i]; 1869*813a1f2bSShri Abhyankar for (j=0; j<nz; j++){ 1870*813a1f2bSShri Abhyankar *bj = aj[j]; bj++; 1871*813a1f2bSShri Abhyankar } 1872*813a1f2bSShri Abhyankar } 1873*813a1f2bSShri Abhyankar 1874*813a1f2bSShri Abhyankar /* U part */ 1875*813a1f2bSShri Abhyankar /* bi[n+1] = bi[n]; */ 1876*813a1f2bSShri Abhyankar bi_temp = bi[n]; 1877*813a1f2bSShri Abhyankar for (i=n-1; i>=0; i--){ 1878*813a1f2bSShri Abhyankar nz = ai[i+1] - adiag[i] - 1; 1879*813a1f2bSShri Abhyankar /* bi[2*n-i+1] = bi[2*n-i] + nz + 1; */ 1880*813a1f2bSShri Abhyankar bi_temp = bi_temp + nz + 1; 1881*813a1f2bSShri Abhyankar aj = a->j + adiag[i] + 1; 1882*813a1f2bSShri Abhyankar for (j=0; j<nz; j++){ 1883*813a1f2bSShri Abhyankar *bj = aj[j]; bj++; 1884*813a1f2bSShri Abhyankar } 1885*813a1f2bSShri Abhyankar /* diag[i] */ 1886*813a1f2bSShri Abhyankar *bj = i; bj++; 1887*813a1f2bSShri Abhyankar /* bdiag[i] = bi[2*n-i+1]-1; */ 1888*813a1f2bSShri Abhyankar bdiag[i] = bi_temp - 1; 1889*813a1f2bSShri Abhyankar } 1890*813a1f2bSShri Abhyankar PetscFunctionReturn(0); 1891*813a1f2bSShri Abhyankar } 1892*813a1f2bSShri Abhyankar 1893*813a1f2bSShri Abhyankar #undef __FUNCT__ 1894*813a1f2bSShri Abhyankar #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_newdatastruct_v2" 1895*813a1f2bSShri Abhyankar PetscErrorCode MatILUFactorSymbolic_SeqAIJ_newdatastruct_v2(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1896*813a1f2bSShri Abhyankar { 1897*813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1898*813a1f2bSShri Abhyankar IS isicol; 1899*813a1f2bSShri Abhyankar PetscErrorCode ierr; 1900*813a1f2bSShri Abhyankar const PetscInt *r,*ic; 1901*813a1f2bSShri Abhyankar PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1902*813a1f2bSShri Abhyankar PetscInt *bi,*cols,nnz,*cols_lvl; 1903*813a1f2bSShri Abhyankar PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1904*813a1f2bSShri Abhyankar PetscInt i,levels,diagonal_fill; 1905*813a1f2bSShri Abhyankar PetscTruth col_identity,row_identity; 1906*813a1f2bSShri Abhyankar PetscReal f; 1907*813a1f2bSShri Abhyankar PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1908*813a1f2bSShri Abhyankar PetscBT lnkbt; 1909*813a1f2bSShri Abhyankar PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1910*813a1f2bSShri Abhyankar PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1911*813a1f2bSShri Abhyankar PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1912*813a1f2bSShri Abhyankar PetscTruth missing; 1913*813a1f2bSShri Abhyankar 1914*813a1f2bSShri Abhyankar PetscFunctionBegin; 1915*813a1f2bSShri Abhyankar if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1916*813a1f2bSShri Abhyankar f = info->fill; 1917*813a1f2bSShri Abhyankar levels = (PetscInt)info->levels; 1918*813a1f2bSShri Abhyankar diagonal_fill = (PetscInt)info->diagonal_fill; 1919*813a1f2bSShri Abhyankar ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1920*813a1f2bSShri Abhyankar 1921*813a1f2bSShri Abhyankar ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1922*813a1f2bSShri Abhyankar ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1923*813a1f2bSShri Abhyankar 1924*813a1f2bSShri Abhyankar if (!levels && row_identity && col_identity) { 1925*813a1f2bSShri Abhyankar /* special case: ilu(0) with natural ordering */ 1926*813a1f2bSShri Abhyankar ierr = MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct_v2(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1927*813a1f2bSShri Abhyankar (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_newdatastruct_v2; 1928*813a1f2bSShri Abhyankar 1929*813a1f2bSShri Abhyankar fact->factor = MAT_FACTOR_ILU; 1930*813a1f2bSShri Abhyankar (fact)->info.factor_mallocs = 0; 1931*813a1f2bSShri Abhyankar (fact)->info.fill_ratio_given = info->fill; 1932*813a1f2bSShri Abhyankar (fact)->info.fill_ratio_needed = 1.0; 1933*813a1f2bSShri Abhyankar b = (Mat_SeqAIJ*)(fact)->data; 1934*813a1f2bSShri Abhyankar ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1935*813a1f2bSShri Abhyankar if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1936*813a1f2bSShri Abhyankar b->row = isrow; 1937*813a1f2bSShri Abhyankar b->col = iscol; 1938*813a1f2bSShri Abhyankar b->icol = isicol; 1939*813a1f2bSShri Abhyankar ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1940*813a1f2bSShri Abhyankar ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1941*813a1f2bSShri Abhyankar ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1942*813a1f2bSShri Abhyankar /* ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); */ 1943*813a1f2bSShri Abhyankar PetscFunctionReturn(0); 1944*813a1f2bSShri Abhyankar } 1945*813a1f2bSShri Abhyankar 1946*813a1f2bSShri Abhyankar ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1947*813a1f2bSShri Abhyankar ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1948*813a1f2bSShri Abhyankar 1949*813a1f2bSShri Abhyankar /* get new row pointers */ 1950*813a1f2bSShri Abhyankar ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1951*813a1f2bSShri Abhyankar bi[0] = 0; 1952*813a1f2bSShri Abhyankar /* bdiag is location of diagonal in factor */ 1953*813a1f2bSShri Abhyankar ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1954*813a1f2bSShri Abhyankar bdiag[0] = 0; 1955*813a1f2bSShri Abhyankar 1956*813a1f2bSShri Abhyankar ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1957*813a1f2bSShri Abhyankar bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1958*813a1f2bSShri Abhyankar 1959*813a1f2bSShri Abhyankar /* create a linked list for storing column indices of the active row */ 1960*813a1f2bSShri Abhyankar nlnk = n + 1; 1961*813a1f2bSShri Abhyankar ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1962*813a1f2bSShri Abhyankar 1963*813a1f2bSShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 1964*813a1f2bSShri Abhyankar ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1965*813a1f2bSShri Abhyankar current_space = free_space; 1966*813a1f2bSShri Abhyankar ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1967*813a1f2bSShri Abhyankar current_space_lvl = free_space_lvl; 1968*813a1f2bSShri Abhyankar 1969*813a1f2bSShri Abhyankar for (i=0; i<n; i++) { 1970*813a1f2bSShri Abhyankar nzi = 0; 1971*813a1f2bSShri Abhyankar /* copy current row into linked list */ 1972*813a1f2bSShri Abhyankar nnz = ai[r[i]+1] - ai[r[i]]; 1973*813a1f2bSShri Abhyankar if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1974*813a1f2bSShri Abhyankar cols = aj + ai[r[i]]; 1975*813a1f2bSShri Abhyankar lnk[i] = -1; /* marker to indicate if diagonal exists */ 1976*813a1f2bSShri Abhyankar ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1977*813a1f2bSShri Abhyankar nzi += nlnk; 1978*813a1f2bSShri Abhyankar 1979*813a1f2bSShri Abhyankar /* make sure diagonal entry is included */ 1980*813a1f2bSShri Abhyankar if (diagonal_fill && lnk[i] == -1) { 1981*813a1f2bSShri Abhyankar fm = n; 1982*813a1f2bSShri Abhyankar while (lnk[fm] < i) fm = lnk[fm]; 1983*813a1f2bSShri Abhyankar lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1984*813a1f2bSShri Abhyankar lnk[fm] = i; 1985*813a1f2bSShri Abhyankar lnk_lvl[i] = 0; 1986*813a1f2bSShri Abhyankar nzi++; dcount++; 1987*813a1f2bSShri Abhyankar } 1988*813a1f2bSShri Abhyankar 1989*813a1f2bSShri Abhyankar /* add pivot rows into the active row */ 1990*813a1f2bSShri Abhyankar nzbd = 0; 1991*813a1f2bSShri Abhyankar prow = lnk[n]; 1992*813a1f2bSShri Abhyankar while (prow < i) { 1993*813a1f2bSShri Abhyankar nnz = bdiag[prow]; 1994*813a1f2bSShri Abhyankar cols = bj_ptr[prow] + nnz + 1; 1995*813a1f2bSShri Abhyankar cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1996*813a1f2bSShri Abhyankar nnz = bi[prow+1] - bi[prow] - nnz - 1; 1997*813a1f2bSShri Abhyankar ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1998*813a1f2bSShri Abhyankar nzi += nlnk; 1999*813a1f2bSShri Abhyankar prow = lnk[prow]; 2000*813a1f2bSShri Abhyankar nzbd++; 2001*813a1f2bSShri Abhyankar } 2002*813a1f2bSShri Abhyankar bdiag[i] = nzbd; 2003*813a1f2bSShri Abhyankar bi[i+1] = bi[i] + nzi; 2004*813a1f2bSShri Abhyankar 2005*813a1f2bSShri Abhyankar /* if free space is not available, make more free space */ 2006*813a1f2bSShri Abhyankar if (current_space->local_remaining<nzi) { 2007*813a1f2bSShri Abhyankar nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 2008*813a1f2bSShri Abhyankar ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2009*813a1f2bSShri Abhyankar ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 2010*813a1f2bSShri Abhyankar reallocs++; 2011*813a1f2bSShri Abhyankar } 2012*813a1f2bSShri Abhyankar 2013*813a1f2bSShri Abhyankar /* copy data into free_space and free_space_lvl, then initialize lnk */ 2014*813a1f2bSShri Abhyankar ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2015*813a1f2bSShri Abhyankar bj_ptr[i] = current_space->array; 2016*813a1f2bSShri Abhyankar bjlvl_ptr[i] = current_space_lvl->array; 2017*813a1f2bSShri Abhyankar 2018*813a1f2bSShri Abhyankar /* make sure the active row i has diagonal entry */ 2019*813a1f2bSShri Abhyankar if (*(bj_ptr[i]+bdiag[i]) != i) { 2020*813a1f2bSShri Abhyankar SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 2021*813a1f2bSShri Abhyankar try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2022*813a1f2bSShri Abhyankar } 2023*813a1f2bSShri Abhyankar 2024*813a1f2bSShri Abhyankar current_space->array += nzi; 2025*813a1f2bSShri Abhyankar current_space->local_used += nzi; 2026*813a1f2bSShri Abhyankar current_space->local_remaining -= nzi; 2027*813a1f2bSShri Abhyankar current_space_lvl->array += nzi; 2028*813a1f2bSShri Abhyankar current_space_lvl->local_used += nzi; 2029*813a1f2bSShri Abhyankar current_space_lvl->local_remaining -= nzi; 2030*813a1f2bSShri Abhyankar } 2031*813a1f2bSShri Abhyankar 2032*813a1f2bSShri Abhyankar ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2033*813a1f2bSShri Abhyankar ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2034*813a1f2bSShri Abhyankar 2035*813a1f2bSShri Abhyankar /* destroy list of free space and other temporary arrays */ 2036*813a1f2bSShri Abhyankar ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2037*813a1f2bSShri Abhyankar 2038*813a1f2bSShri Abhyankar /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 2039*813a1f2bSShri Abhyankar ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 2040*813a1f2bSShri Abhyankar 2041*813a1f2bSShri Abhyankar ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2042*813a1f2bSShri Abhyankar ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2043*813a1f2bSShri Abhyankar ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 2044*813a1f2bSShri Abhyankar 2045*813a1f2bSShri Abhyankar #if defined(PETSC_USE_INFO) 2046*813a1f2bSShri Abhyankar { 2047*813a1f2bSShri Abhyankar PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2048*813a1f2bSShri Abhyankar ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2049*813a1f2bSShri Abhyankar ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2050*813a1f2bSShri Abhyankar ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 2051*813a1f2bSShri Abhyankar ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2052*813a1f2bSShri Abhyankar if (diagonal_fill) { 2053*813a1f2bSShri Abhyankar ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2054*813a1f2bSShri Abhyankar } 2055*813a1f2bSShri Abhyankar } 2056*813a1f2bSShri Abhyankar #endif 2057*813a1f2bSShri Abhyankar 2058*813a1f2bSShri Abhyankar /* put together the new matrix */ 2059*813a1f2bSShri Abhyankar ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2060*813a1f2bSShri Abhyankar ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 2061*813a1f2bSShri Abhyankar b = (Mat_SeqAIJ*)(fact)->data; 2062*813a1f2bSShri Abhyankar b->free_a = PETSC_TRUE; 2063*813a1f2bSShri Abhyankar b->free_ij = PETSC_TRUE; 2064*813a1f2bSShri Abhyankar b->singlemalloc = PETSC_FALSE; 2065*813a1f2bSShri Abhyankar ierr = PetscMalloc( (bi[2*n+1] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 2066*813a1f2bSShri Abhyankar b->j = bj; 2067*813a1f2bSShri Abhyankar b->i = bi; 2068*813a1f2bSShri Abhyankar b->diag = bdiag; 2069*813a1f2bSShri Abhyankar b->ilen = 0; 2070*813a1f2bSShri Abhyankar b->imax = 0; 2071*813a1f2bSShri Abhyankar b->row = isrow; 2072*813a1f2bSShri Abhyankar b->col = iscol; 2073*813a1f2bSShri Abhyankar ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2074*813a1f2bSShri Abhyankar ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2075*813a1f2bSShri Abhyankar b->icol = isicol; 2076*813a1f2bSShri Abhyankar ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2077*813a1f2bSShri Abhyankar /* In b structure: Free imax, ilen, old a, old j. 2078*813a1f2bSShri Abhyankar Allocate bdiag, solve_work, new a, new j */ 2079*813a1f2bSShri Abhyankar ierr = PetscLogObjectMemory(fact,bi[2*n+1] * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2080*813a1f2bSShri Abhyankar b->maxnz = b->nz = bi[2*n+1] ; 2081*813a1f2bSShri Abhyankar (fact)->info.factor_mallocs = reallocs; 2082*813a1f2bSShri Abhyankar (fact)->info.fill_ratio_given = f; 2083*813a1f2bSShri Abhyankar (fact)->info.fill_ratio_needed = ((PetscReal)bi[2*n+1])/((PetscReal)ai[n]); 2084*813a1f2bSShri Abhyankar (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_newdatastruct; 2085*813a1f2bSShri Abhyankar /* ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); */ 2086*813a1f2bSShri Abhyankar PetscFunctionReturn(0); 2087*813a1f2bSShri Abhyankar } 2088*813a1f2bSShri Abhyankar 20896516239fSHong Zhang #undef __FUNCT__ 20906516239fSHong Zhang #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 20916516239fSHong Zhang PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 20926516239fSHong Zhang { 20936516239fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 20946516239fSHong Zhang IS isicol; 20956516239fSHong Zhang PetscErrorCode ierr; 20966516239fSHong Zhang const PetscInt *r,*ic; 20976516239fSHong Zhang PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 20986516239fSHong Zhang PetscInt *bi,*cols,nnz,*cols_lvl; 20996516239fSHong Zhang PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 21006516239fSHong Zhang PetscInt i,levels,diagonal_fill; 21016516239fSHong Zhang PetscTruth col_identity,row_identity; 21026516239fSHong Zhang PetscReal f; 21036516239fSHong Zhang PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 21046516239fSHong Zhang PetscBT lnkbt; 21056516239fSHong Zhang PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 21066516239fSHong Zhang PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 21076516239fSHong Zhang PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 21086516239fSHong Zhang PetscTruth missing; 2109*813a1f2bSShri Abhyankar PetscTruth newdatastruct=PETSC_FALSE,newdatastruct_v2=PETSC_FALSE; 21106516239fSHong Zhang 21116516239fSHong Zhang PetscFunctionBegin; 21126516239fSHong Zhang ierr = PetscOptionsGetTruth(PETSC_NULL,"-ilu_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr); 21136516239fSHong Zhang if (newdatastruct){ 21146516239fSHong Zhang ierr = MatILUFactorSymbolic_SeqAIJ_newdatastruct(fact,A,isrow,iscol,info);CHKERRQ(ierr); 21156516239fSHong Zhang PetscFunctionReturn(0); 21166516239fSHong Zhang } 2117*813a1f2bSShri Abhyankar ierr = PetscOptionsGetTruth(PETSC_NULL,"-ilu_new_v2",&newdatastruct_v2,PETSC_NULL);CHKERRQ(ierr); 2118*813a1f2bSShri Abhyankar if(newdatastruct_v2){ 2119*813a1f2bSShri Abhyankar ierr = MatILUFactorSymbolic_SeqAIJ_newdatastruct_v2(fact,A,isrow,iscol,info);CHKERRQ(ierr); 2120*813a1f2bSShri Abhyankar PetscFunctionReturn(0); 2121*813a1f2bSShri Abhyankar } 21226516239fSHong Zhang 21236516239fSHong Zhang if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 21246516239fSHong Zhang f = info->fill; 21256516239fSHong Zhang levels = (PetscInt)info->levels; 21266516239fSHong Zhang diagonal_fill = (PetscInt)info->diagonal_fill; 21276516239fSHong Zhang ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 21286516239fSHong Zhang 21296516239fSHong Zhang ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 21306516239fSHong Zhang ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 21316516239fSHong Zhang if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 2132f77e22a1SHong Zhang ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 2133c9c72f8fSHong Zhang (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 2134c9c72f8fSHong Zhang 2135719d5645SBarry Smith fact->factor = MAT_FACTOR_ILU; 2136719d5645SBarry Smith (fact)->info.factor_mallocs = 0; 2137719d5645SBarry Smith (fact)->info.fill_ratio_given = info->fill; 2138719d5645SBarry Smith (fact)->info.fill_ratio_needed = 1.0; 2139719d5645SBarry Smith b = (Mat_SeqAIJ*)(fact)->data; 21408ef3462fSBarry Smith ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 214109f38230SBarry Smith if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 214208480c60SBarry Smith b->row = isrow; 214308480c60SBarry Smith b->col = iscol; 214482bf6240SBarry Smith b->icol = isicol; 2145719d5645SBarry Smith ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2146c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2147c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2148719d5645SBarry Smith ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 21493a40ed3dSBarry Smith PetscFunctionReturn(0); 215008480c60SBarry Smith } 215108480c60SBarry Smith 2152898183ecSLois Curfman McInnes ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 2153898183ecSLois Curfman McInnes ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 21549e25ed09SBarry Smith 21559e25ed09SBarry Smith /* get new row pointers */ 21565a8e39fbSHong Zhang ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 21575a8e39fbSHong Zhang bi[0] = 0; 21585a8e39fbSHong Zhang /* bdiag is location of diagonal in factor */ 21595a8e39fbSHong Zhang ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 21605a8e39fbSHong Zhang bdiag[0] = 0; 21616cf997b0SBarry Smith 21625a8e39fbSHong Zhang ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 21635a8e39fbSHong Zhang bjlvl_ptr = (PetscInt**)(bj_ptr + n); 21645a8e39fbSHong Zhang 21655a8e39fbSHong Zhang /* create a linked list for storing column indices of the active row */ 21665a8e39fbSHong Zhang nlnk = n + 1; 21675a8e39fbSHong Zhang ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 21685a8e39fbSHong Zhang 21695a8e39fbSHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 2170a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 21715a8e39fbSHong Zhang current_space = free_space; 2172a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 21735a8e39fbSHong Zhang current_space_lvl = free_space_lvl; 21745a8e39fbSHong Zhang 21755a8e39fbSHong Zhang for (i=0; i<n; i++) { 21765a8e39fbSHong Zhang nzi = 0; 21776cf997b0SBarry Smith /* copy current row into linked list */ 21785a8e39fbSHong Zhang nnz = ai[r[i]+1] - ai[r[i]]; 21793b4a8b6dSBarry Smith if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 21805a8e39fbSHong Zhang cols = aj + ai[r[i]]; 21815a8e39fbSHong Zhang lnk[i] = -1; /* marker to indicate if diagonal exists */ 21825a8e39fbSHong Zhang ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 21835a8e39fbSHong Zhang nzi += nlnk; 21846cf997b0SBarry Smith 21856cf997b0SBarry Smith /* make sure diagonal entry is included */ 21865a8e39fbSHong Zhang if (diagonal_fill && lnk[i] == -1) { 21876cf997b0SBarry Smith fm = n; 21885a8e39fbSHong Zhang while (lnk[fm] < i) fm = lnk[fm]; 21895a8e39fbSHong Zhang lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 21905a8e39fbSHong Zhang lnk[fm] = i; 21915a8e39fbSHong Zhang lnk_lvl[i] = 0; 21925a8e39fbSHong Zhang nzi++; dcount++; 21936cf997b0SBarry Smith } 21946cf997b0SBarry Smith 21955a8e39fbSHong Zhang /* add pivot rows into the active row */ 21965a8e39fbSHong Zhang nzbd = 0; 21975a8e39fbSHong Zhang prow = lnk[n]; 21985a8e39fbSHong Zhang while (prow < i) { 21995a8e39fbSHong Zhang nnz = bdiag[prow]; 22005a8e39fbSHong Zhang cols = bj_ptr[prow] + nnz + 1; 22015a8e39fbSHong Zhang cols_lvl = bjlvl_ptr[prow] + nnz + 1; 22025a8e39fbSHong Zhang nnz = bi[prow+1] - bi[prow] - nnz - 1; 22035a8e39fbSHong Zhang ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 22045a8e39fbSHong Zhang nzi += nlnk; 22055a8e39fbSHong Zhang prow = lnk[prow]; 22065a8e39fbSHong Zhang nzbd++; 220756beaf04SBarry Smith } 22085a8e39fbSHong Zhang bdiag[i] = nzbd; 22095a8e39fbSHong Zhang bi[i+1] = bi[i] + nzi; 2210ecf371e4SBarry Smith 22115a8e39fbSHong Zhang /* if free space is not available, make more free space */ 22125a8e39fbSHong Zhang if (current_space->local_remaining<nzi) { 22135a8e39fbSHong Zhang nnz = nzi*(n - i); /* estimated and max additional space needed */ 2214a1a86e44SBarry Smith ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2215a1a86e44SBarry Smith ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 22165a8e39fbSHong Zhang reallocs++; 22175a8e39fbSHong Zhang } 2218ecf371e4SBarry Smith 22195a8e39fbSHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 22205a8e39fbSHong Zhang ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 22215a8e39fbSHong Zhang bj_ptr[i] = current_space->array; 22225a8e39fbSHong Zhang bjlvl_ptr[i] = current_space_lvl->array; 22235a8e39fbSHong Zhang 22245a8e39fbSHong Zhang /* make sure the active row i has diagonal entry */ 22255a8e39fbSHong Zhang if (*(bj_ptr[i]+bdiag[i]) != i) { 222677431f27SBarry Smith SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 2227d7ee0231SBarry Smith try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 22286cf997b0SBarry Smith } 22295a8e39fbSHong Zhang 22305a8e39fbSHong Zhang current_space->array += nzi; 22315a8e39fbSHong Zhang current_space->local_used += nzi; 22325a8e39fbSHong Zhang current_space->local_remaining -= nzi; 22335a8e39fbSHong Zhang current_space_lvl->array += nzi; 22345a8e39fbSHong Zhang current_space_lvl->local_used += nzi; 22355a8e39fbSHong Zhang current_space_lvl->local_remaining -= nzi; 22369e25ed09SBarry Smith } 22375a8e39fbSHong Zhang 2238898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2239898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 22405a8e39fbSHong Zhang 22415a8e39fbSHong Zhang /* destroy list of free space and other temporary arrays */ 22425a8e39fbSHong Zhang ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 22436516239fSHong Zhang ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 22445a8e39fbSHong Zhang ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2245a1a86e44SBarry Smith ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 22465a8e39fbSHong Zhang ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 22479e25ed09SBarry Smith 22486cf91177SBarry Smith #if defined(PETSC_USE_INFO) 2249f64065bbSBarry Smith { 22505a8e39fbSHong Zhang PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2251ae15b995SBarry Smith ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2252ae15b995SBarry Smith ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2253ae15b995SBarry Smith ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 2254ae15b995SBarry Smith ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2255335d9088SBarry Smith if (diagonal_fill) { 2256ae15b995SBarry Smith ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2257335d9088SBarry Smith } 2258f64065bbSBarry Smith } 225963ba0a88SBarry Smith #endif 2260bbb6d6a8SBarry Smith 22619e25ed09SBarry Smith /* put together the new matrix */ 226207dbf95fSBarry Smith ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2263719d5645SBarry Smith ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 2264719d5645SBarry Smith b = (Mat_SeqAIJ*)(fact)->data; 2265e6b907acSBarry Smith b->free_a = PETSC_TRUE; 2266e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 22677c922b88SBarry Smith b->singlemalloc = PETSC_FALSE; 2268052f0c41SBarry Smith ierr = PetscMalloc( (bi[n] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 22695a8e39fbSHong Zhang b->j = bj; 22705a8e39fbSHong Zhang b->i = bi; 22715a8e39fbSHong Zhang for (i=0; i<n; i++) bdiag[i] += bi[i]; 22725a8e39fbSHong Zhang b->diag = bdiag; 2273416022c9SBarry Smith b->ilen = 0; 2274416022c9SBarry Smith b->imax = 0; 2275416022c9SBarry Smith b->row = isrow; 2276416022c9SBarry Smith b->col = iscol; 2277c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2278c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 227982bf6240SBarry Smith b->icol = isicol; 228087828ca2SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2281416022c9SBarry Smith /* In b structure: Free imax, ilen, old a, old j. 22825a8e39fbSHong Zhang Allocate bdiag, solve_work, new a, new j */ 2283719d5645SBarry Smith ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 22845a8e39fbSHong Zhang b->maxnz = b->nz = bi[n] ; 2285719d5645SBarry Smith (fact)->info.factor_mallocs = reallocs; 2286719d5645SBarry Smith (fact)->info.fill_ratio_given = f; 2287719d5645SBarry Smith (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2288719d5645SBarry Smith (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 2289719d5645SBarry Smith ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 22903a40ed3dSBarry Smith PetscFunctionReturn(0); 22919e25ed09SBarry Smith } 2292d5d45c9bSBarry Smith 2293a6175056SHong Zhang #undef __FUNCT__ 22945f44c854SHong Zhang #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_newdatastruct" 22955f44c854SHong Zhang PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_newdatastruct(Mat B,Mat A,const MatFactorInfo *info) 22965f44c854SHong Zhang { 22975f44c854SHong Zhang Mat C = B; 22985f44c854SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 22995f44c854SHong Zhang Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 23005f44c854SHong Zhang IS ip=b->row,iip = b->icol; 23015f44c854SHong Zhang PetscErrorCode ierr; 23025f44c854SHong Zhang const PetscInt *rip,*riip; 23035f44c854SHong Zhang PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 23045f44c854SHong Zhang PetscInt *ai=a->i,*aj=a->j; 23055f44c854SHong Zhang PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 23065f44c854SHong Zhang MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 23075f44c854SHong Zhang PetscReal zeropivot,shiftnz; 23085f44c854SHong Zhang PetscReal shiftpd; 23095f44c854SHong Zhang PetscTruth perm_identity; 23105f44c854SHong Zhang 23115f44c854SHong Zhang PetscFunctionBegin; 23125f44c854SHong Zhang 23135f44c854SHong Zhang shiftnz = info->shiftnz; 23145f44c854SHong Zhang shiftpd = info->shiftpd; 23155f44c854SHong Zhang zeropivot = info->zeropivot; 23165f44c854SHong Zhang 23175f44c854SHong Zhang ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 23185f44c854SHong Zhang ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 23195f44c854SHong Zhang 23205f44c854SHong Zhang /* allocate working arrays 23215f44c854SHong Zhang c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 23225f44c854SHong Zhang il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 23235f44c854SHong Zhang */ 23245f44c854SHong Zhang nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 23255f44c854SHong Zhang ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 23265f44c854SHong Zhang c2r = il + mbs; 23275f44c854SHong Zhang rtmp = (MatScalar*)(c2r + mbs); 23285f44c854SHong Zhang 23295f44c854SHong Zhang for (i=0; i<mbs; i++) { 23305f44c854SHong Zhang c2r[i] = mbs; 23315f44c854SHong Zhang } 23325f44c854SHong Zhang il[0] = 0; 23335f44c854SHong Zhang 23345f44c854SHong Zhang for (k = 0; k<mbs; k++){ 23355f44c854SHong Zhang /* zero rtmp */ 23365f44c854SHong Zhang nz = bi[k+1] - bi[k]; 23375f44c854SHong Zhang bjtmp = bj + bi[k]; 23385f44c854SHong Zhang for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 23395f44c854SHong Zhang 23405f44c854SHong Zhang /* load in initial unfactored row */ 23415f44c854SHong Zhang bval = ba + bi[k]; 23425f44c854SHong Zhang jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 23435f44c854SHong Zhang for (j = jmin; j < jmax; j++){ 23445f44c854SHong Zhang col = riip[aj[j]]; 23455f44c854SHong Zhang if (col >= k){ /* only take upper triangular entry */ 23465f44c854SHong Zhang rtmp[col] = aa[j]; 23475f44c854SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 23485f44c854SHong Zhang } 23495f44c854SHong Zhang } 23505f44c854SHong Zhang /* shift the diagonal of the matrix */ 23515f44c854SHong Zhang 23525f44c854SHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 23535f44c854SHong Zhang dk = rtmp[k]; 23545f44c854SHong Zhang i = c2r[k]; /* first row to be added to k_th row */ 23555f44c854SHong Zhang 23565f44c854SHong Zhang while (i < k){ 23575f44c854SHong Zhang nexti = c2r[i]; /* next row to be added to k_th row */ 23585f44c854SHong Zhang 23595f44c854SHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 23605f44c854SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 23615f44c854SHong Zhang uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 23625f44c854SHong Zhang dk += uikdi*ba[ili]; /* update diag[k] */ 23635f44c854SHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 23645f44c854SHong Zhang 23655f44c854SHong Zhang /* add multiple of row i to k-th row */ 23665f44c854SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 23675f44c854SHong Zhang if (jmin < jmax){ 23685f44c854SHong Zhang for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 23695f44c854SHong Zhang /* update il and c2r for row i */ 23705f44c854SHong Zhang il[i] = jmin; 23715f44c854SHong Zhang j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 23725f44c854SHong Zhang } 23735f44c854SHong Zhang i = nexti; 23745f44c854SHong Zhang } 23755f44c854SHong Zhang 23765f44c854SHong Zhang /* copy data into U(k,:) */ 23775f44c854SHong Zhang ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 23785f44c854SHong Zhang jmin = bi[k]; jmax = bi[k+1]-1; 23795f44c854SHong Zhang if (jmin < jmax) { 23805f44c854SHong Zhang for (j=jmin; j<jmax; j++){ 23815f44c854SHong Zhang col = bj[j]; ba[j] = rtmp[col]; 23825f44c854SHong Zhang } 23835f44c854SHong Zhang /* add the k-th row into il and c2r */ 23845f44c854SHong Zhang il[k] = jmin; 23855f44c854SHong Zhang i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 23865f44c854SHong Zhang } 23875f44c854SHong Zhang } 23885f44c854SHong Zhang 23895f44c854SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 23905f44c854SHong Zhang ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 23915f44c854SHong Zhang ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 23925f44c854SHong Zhang 23935f44c854SHong Zhang ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 23945f44c854SHong Zhang if (perm_identity){ 23955f44c854SHong Zhang (B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_newdatastruct; 23965f44c854SHong Zhang (B)->ops->solvetranspose = 0; 23975f44c854SHong Zhang (B)->ops->forwardsolve = 0; 23985f44c854SHong Zhang (B)->ops->backwardsolve = 0; 23995f44c854SHong Zhang } else { 2400783ef271SHong Zhang (B)->ops->solve = MatSolve_SeqSBAIJ_1_newdatastruct; 24015f44c854SHong Zhang (B)->ops->solvetranspose = 0; 24025f44c854SHong Zhang (B)->ops->forwardsolve = 0; 24035f44c854SHong Zhang (B)->ops->backwardsolve = 0; 24045f44c854SHong Zhang } 24055f44c854SHong Zhang 24065f44c854SHong Zhang C->assembled = PETSC_TRUE; 24075f44c854SHong Zhang C->preallocated = PETSC_TRUE; 24085f44c854SHong Zhang ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 24095f44c854SHong Zhang PetscFunctionReturn(0); 24105f44c854SHong Zhang } 24115f44c854SHong Zhang 24125f44c854SHong Zhang #undef __FUNCT__ 2413a6175056SHong Zhang #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 24140481f469SBarry Smith PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2415a6175056SHong Zhang { 2416719d5645SBarry Smith Mat C = B; 24170968510aSHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 24182ed133dbSHong Zhang Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 24199bfd6278SHong Zhang IS ip=b->row,iip = b->icol; 24202ed133dbSHong Zhang PetscErrorCode ierr; 24215d0c19d7SBarry Smith const PetscInt *rip,*riip; 24225d0c19d7SBarry Smith PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol; 24232ed133dbSHong Zhang PetscInt *ai=a->i,*aj=a->j; 24242ed133dbSHong Zhang PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2425622e2028SHong Zhang MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2426540703acSHong Zhang PetscReal zeropivot,rs,shiftnz; 2427fbf22428SSatish Balay PetscReal shiftpd; 2428540703acSHong Zhang ChShift_Ctx sctx; 2429540703acSHong Zhang PetscInt newshift; 2430db4efbfdSBarry Smith PetscTruth perm_identity; 2431d5d45c9bSBarry Smith 2432a6175056SHong Zhang PetscFunctionBegin; 2433540703acSHong Zhang shiftnz = info->shiftnz; 2434540703acSHong Zhang shiftpd = info->shiftpd; 2435ee45ca4aSHong Zhang zeropivot = info->zeropivot; 2436ee45ca4aSHong Zhang 24372ed133dbSHong Zhang ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 24389bfd6278SHong Zhang ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 24392ed133dbSHong Zhang 24402ed133dbSHong Zhang /* initialization */ 24412ed133dbSHong Zhang nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 24422ed133dbSHong Zhang ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 24432ed133dbSHong Zhang jl = il + mbs; 24442ed133dbSHong Zhang rtmp = (MatScalar*)(jl + mbs); 24452ed133dbSHong Zhang 2446540703acSHong Zhang sctx.shift_amount = 0; 2447540703acSHong Zhang sctx.nshift = 0; 24482ed133dbSHong Zhang do { 2449540703acSHong Zhang sctx.chshift = PETSC_FALSE; 24502ed133dbSHong Zhang for (i=0; i<mbs; i++) { 24512ed133dbSHong Zhang rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 2452a6175056SHong Zhang } 24532ed133dbSHong Zhang 24542ed133dbSHong Zhang for (k = 0; k<mbs; k++){ 2455c05c3958SHong Zhang bval = ba + bi[k]; 24562ed133dbSHong Zhang /* initialize k-th row by the perm[k]-th row of A */ 24572ed133dbSHong Zhang jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 24582ed133dbSHong Zhang for (j = jmin; j < jmax; j++){ 24599bfd6278SHong Zhang col = riip[aj[j]]; 24602ed133dbSHong Zhang if (col >= k){ /* only take upper triangular entry */ 24612ed133dbSHong Zhang rtmp[col] = aa[j]; 2462c05c3958SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 24632ed133dbSHong Zhang } 24642ed133dbSHong Zhang } 246539e3d630SHong Zhang /* shift the diagonal of the matrix */ 2466540703acSHong Zhang if (sctx.nshift) rtmp[k] += sctx.shift_amount; 24672ed133dbSHong Zhang 24682ed133dbSHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 24692ed133dbSHong Zhang dk = rtmp[k]; 24702ed133dbSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 24712ed133dbSHong Zhang 24722ed133dbSHong Zhang while (i < k){ 24732ed133dbSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 24742ed133dbSHong Zhang 24752ed133dbSHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 24762ed133dbSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 24772ed133dbSHong Zhang uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 24782ed133dbSHong Zhang dk += uikdi*ba[ili]; 24792ed133dbSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 24802ed133dbSHong Zhang 24812ed133dbSHong Zhang /* add multiple of row i to k-th row */ 24822ed133dbSHong Zhang jmin = ili + 1; jmax = bi[i+1]; 24832ed133dbSHong Zhang if (jmin < jmax){ 24842ed133dbSHong Zhang for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 24852ed133dbSHong Zhang /* update il and jl for row i */ 24862ed133dbSHong Zhang il[i] = jmin; 24872ed133dbSHong Zhang j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 24882ed133dbSHong Zhang } 24892ed133dbSHong Zhang i = nexti; 24902ed133dbSHong Zhang } 24912ed133dbSHong Zhang 24920697b6caSHong Zhang /* shift the diagonals when zero pivot is detected */ 24930697b6caSHong Zhang /* compute rs=sum of abs(off-diagonal) */ 24940697b6caSHong Zhang rs = 0.0; 24953c9e8598SHong Zhang jmin = bi[k]+1; 24963c9e8598SHong Zhang nz = bi[k+1] - jmin; 24973c9e8598SHong Zhang bcol = bj + jmin; 249804fbf559SBarry Smith for (j=0; j<nz; j++) { 249904fbf559SBarry Smith rs += PetscAbsScalar(rtmp[bcol[j]]); 25003c9e8598SHong Zhang } 2501540703acSHong Zhang 2502540703acSHong Zhang sctx.rs = rs; 2503540703acSHong Zhang sctx.pv = dk; 25045b5bc046SBarry Smith ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 2505117f1891Stmunson 2506117f1891Stmunson if (newshift == 1) { 2507117f1891Stmunson if (!sctx.shift_amount) { 2508117f1891Stmunson sctx.shift_amount = 1e-5; 2509117f1891Stmunson } 2510117f1891Stmunson break; 2511117f1891Stmunson } 25123c9e8598SHong Zhang 25133c9e8598SHong Zhang /* copy data into U(k,:) */ 251439e3d630SHong Zhang ba[bi[k]] = 1.0/dk; /* U(k,k) */ 251539e3d630SHong Zhang jmin = bi[k]+1; jmax = bi[k+1]; 251639e3d630SHong Zhang if (jmin < jmax) { 251739e3d630SHong Zhang for (j=jmin; j<jmax; j++){ 251839e3d630SHong Zhang col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 25193c9e8598SHong Zhang } 252039e3d630SHong Zhang /* add the k-th row into il and jl */ 25213c9e8598SHong Zhang il[k] = jmin; 25223c9e8598SHong Zhang i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 25233c9e8598SHong Zhang } 25243c9e8598SHong Zhang } 2525540703acSHong Zhang } while (sctx.chshift); 25263c9e8598SHong Zhang ierr = PetscFree(il);CHKERRQ(ierr); 25273c9e8598SHong Zhang 252839e3d630SHong Zhang ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 25299bfd6278SHong Zhang ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2530db4efbfdSBarry Smith 2531db4efbfdSBarry Smith ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2532db4efbfdSBarry Smith if (perm_identity){ 2533719d5645SBarry Smith (B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2534719d5645SBarry Smith (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2535719d5645SBarry Smith (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2536719d5645SBarry Smith (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2537db4efbfdSBarry Smith } else { 2538719d5645SBarry Smith (B)->ops->solve = MatSolve_SeqSBAIJ_1; 2539719d5645SBarry Smith (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2540719d5645SBarry Smith (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2541719d5645SBarry Smith (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2542db4efbfdSBarry Smith } 2543db4efbfdSBarry Smith 25443c9e8598SHong Zhang C->assembled = PETSC_TRUE; 25453c9e8598SHong Zhang C->preallocated = PETSC_TRUE; 2546d0f46423SBarry Smith ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2547540703acSHong Zhang if (sctx.nshift){ 2548540703acSHong Zhang if (shiftnz) { 25491e2582c4SBarry Smith ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2550540703acSHong Zhang } else if (shiftpd) { 25511e2582c4SBarry Smith ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 25520697b6caSHong Zhang } 25533c9e8598SHong Zhang } 25543c9e8598SHong Zhang PetscFunctionReturn(0); 25553c9e8598SHong Zhang } 2556a6175056SHong Zhang 2557a6175056SHong Zhang #undef __FUNCT__ 25585f44c854SHong Zhang #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_newdatastruct" 25595f44c854SHong Zhang PetscErrorCode MatICCFactorSymbolic_SeqAIJ_newdatastruct(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2560a6175056SHong Zhang { 25610968510aSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2562ed59401aSHong Zhang Mat_SeqSBAIJ *b; 2563dfbe8321SBarry Smith PetscErrorCode ierr; 256458ebbce7SBarry Smith PetscTruth perm_identity,missing; 25655f44c854SHong Zhang PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 25665d0c19d7SBarry Smith const PetscInt *rip,*riip; 2567ed59401aSHong Zhang PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 256858ebbce7SBarry Smith PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 25695a8e39fbSHong Zhang PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2570ed59401aSHong Zhang PetscReal fill=info->fill,levels=info->levels; 2571a1a86e44SBarry Smith PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2572a1a86e44SBarry Smith PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 25737a48dd6fSHong Zhang PetscBT lnkbt; 2574b635d36bSHong Zhang IS iperm; 2575a6175056SHong Zhang 2576a6175056SHong Zhang PetscFunctionBegin; 2577d0f46423SBarry Smith if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 257858ebbce7SBarry Smith ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 257958ebbce7SBarry Smith if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2580653a6975SHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2581b635d36bSHong Zhang ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 25827a48dd6fSHong Zhang 258339e3d630SHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 25845f44c854SHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 258539e3d630SHong Zhang ui[0] = 0; 258639e3d630SHong Zhang 2587b635d36bSHong Zhang /* ICC(0) without matrix ordering: simply copies fill pattern */ 2588622e2028SHong Zhang if (!levels && perm_identity) { 258958ebbce7SBarry Smith 2590ed59401aSHong Zhang for (i=0; i<am; i++) { 259139e3d630SHong Zhang ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 25925f44c854SHong Zhang udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 25935f44c854SHong Zhang } 25945f44c854SHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 25955f44c854SHong Zhang cols = uj; 25965f44c854SHong Zhang for (i=0; i<am; i++) { 25975f44c854SHong Zhang aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 25985f44c854SHong Zhang ncols = ui[i+1] - ui[i] - 1; 25995f44c854SHong Zhang for (j=0; j<ncols; j++) *cols++ = aj[j]; 26005f44c854SHong Zhang *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 26015f44c854SHong Zhang } 26025f44c854SHong Zhang } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2603783ef271SHong Zhang 26045f44c854SHong Zhang ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 26055f44c854SHong Zhang ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 26065f44c854SHong Zhang 26075f44c854SHong Zhang /* initialization */ 26085f44c854SHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 26095f44c854SHong Zhang 26105f44c854SHong Zhang /* jl: linked list for storing indices of the pivot rows 26115f44c854SHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 26125f44c854SHong Zhang ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 26135f44c854SHong Zhang il = jl + am; 26145f44c854SHong Zhang uj_ptr = (PetscInt**)(il + am); 26155f44c854SHong Zhang uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 26165f44c854SHong Zhang for (i=0; i<am; i++){ 26175f44c854SHong Zhang jl[i] = am; il[i] = 0; 26185f44c854SHong Zhang } 26195f44c854SHong Zhang 26205f44c854SHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 26215f44c854SHong Zhang nlnk = am + 1; 26225f44c854SHong Zhang ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 26235f44c854SHong Zhang 26245f44c854SHong Zhang /* initial FreeSpace size is fill*(ai[am]+1) */ 26255f44c854SHong Zhang ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 26265f44c854SHong Zhang current_space = free_space; 26275f44c854SHong Zhang ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 26285f44c854SHong Zhang current_space_lvl = free_space_lvl; 26295f44c854SHong Zhang 26305f44c854SHong Zhang for (k=0; k<am; k++){ /* for each active row k */ 26315f44c854SHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 26325f44c854SHong Zhang nzk = 0; 26335f44c854SHong Zhang ncols = ai[rip[k]+1] - ai[rip[k]]; 26345f44c854SHong Zhang if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 26355f44c854SHong Zhang ncols_upper = 0; 26365f44c854SHong Zhang for (j=0; j<ncols; j++){ 26375f44c854SHong Zhang i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 26385f44c854SHong Zhang if (riip[i] >= k){ /* only take upper triangular entry */ 26395f44c854SHong Zhang ajtmp[ncols_upper] = i; 26405f44c854SHong Zhang ncols_upper++; 26415f44c854SHong Zhang } 26425f44c854SHong Zhang } 26435f44c854SHong Zhang ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 26445f44c854SHong Zhang nzk += nlnk; 26455f44c854SHong Zhang 26465f44c854SHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 26475f44c854SHong Zhang prow = jl[k]; /* 1st pivot row */ 26485f44c854SHong Zhang 26495f44c854SHong Zhang while (prow < k){ 26505f44c854SHong Zhang nextprow = jl[prow]; 26515f44c854SHong Zhang 26525f44c854SHong Zhang /* merge prow into k-th row */ 26535f44c854SHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 26545f44c854SHong Zhang jmax = ui[prow+1]; 26555f44c854SHong Zhang ncols = jmax-jmin; 26565f44c854SHong Zhang i = jmin - ui[prow]; 26575f44c854SHong Zhang cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 26585f44c854SHong Zhang uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 26595f44c854SHong Zhang j = *(uj - 1); 26605f44c854SHong Zhang ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 26615f44c854SHong Zhang nzk += nlnk; 26625f44c854SHong Zhang 26635f44c854SHong Zhang /* update il and jl for prow */ 26645f44c854SHong Zhang if (jmin < jmax){ 26655f44c854SHong Zhang il[prow] = jmin; 26665f44c854SHong Zhang j = *cols; jl[prow] = jl[j]; jl[j] = prow; 26675f44c854SHong Zhang } 26685f44c854SHong Zhang prow = nextprow; 26695f44c854SHong Zhang } 26705f44c854SHong Zhang 26715f44c854SHong Zhang /* if free space is not available, make more free space */ 26725f44c854SHong Zhang if (current_space->local_remaining<nzk) { 26735f44c854SHong Zhang i = am - k + 1; /* num of unfactored rows */ 26745f44c854SHong Zhang i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 26755f44c854SHong Zhang ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 26765f44c854SHong Zhang ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 26775f44c854SHong Zhang reallocs++; 26785f44c854SHong Zhang } 26795f44c854SHong Zhang 26805f44c854SHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 26815f44c854SHong Zhang if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 26825f44c854SHong Zhang ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 26835f44c854SHong Zhang 26845f44c854SHong Zhang /* add the k-th row into il and jl */ 26855f44c854SHong Zhang if (nzk > 1){ 26865f44c854SHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 26875f44c854SHong Zhang jl[k] = jl[i]; jl[i] = k; 26885f44c854SHong Zhang il[k] = ui[k] + 1; 26895f44c854SHong Zhang } 26905f44c854SHong Zhang uj_ptr[k] = current_space->array; 26915f44c854SHong Zhang uj_lvl_ptr[k] = current_space_lvl->array; 26925f44c854SHong Zhang 26935f44c854SHong Zhang current_space->array += nzk; 26945f44c854SHong Zhang current_space->local_used += nzk; 26955f44c854SHong Zhang current_space->local_remaining -= nzk; 26965f44c854SHong Zhang 26975f44c854SHong Zhang current_space_lvl->array += nzk; 26985f44c854SHong Zhang current_space_lvl->local_used += nzk; 26995f44c854SHong Zhang current_space_lvl->local_remaining -= nzk; 27005f44c854SHong Zhang 27015f44c854SHong Zhang ui[k+1] = ui[k] + nzk; 27025f44c854SHong Zhang } 27035f44c854SHong Zhang 27045f44c854SHong Zhang #if defined(PETSC_USE_INFO) 27055f44c854SHong Zhang if (ai[am] != 0) { 27065f44c854SHong Zhang PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 27075f44c854SHong Zhang ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 27085f44c854SHong Zhang ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 27095f44c854SHong Zhang ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 27105f44c854SHong Zhang } else { 27115f44c854SHong Zhang ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 27125f44c854SHong Zhang } 27135f44c854SHong Zhang #endif 27145f44c854SHong Zhang 27155f44c854SHong Zhang ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 27165f44c854SHong Zhang ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 27175f44c854SHong Zhang ierr = PetscFree(jl);CHKERRQ(ierr); 27185f44c854SHong Zhang ierr = PetscFree(ajtmp);CHKERRQ(ierr); 27195f44c854SHong Zhang 27205f44c854SHong Zhang /* destroy list of free space and other temporary array(s) */ 27215f44c854SHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2722783ef271SHong Zhang ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); 27235f44c854SHong Zhang ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 27245f44c854SHong Zhang ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 27255f44c854SHong Zhang 27265f44c854SHong Zhang } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 27275f44c854SHong Zhang 27285f44c854SHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 27295f44c854SHong Zhang 27305f44c854SHong Zhang b = (Mat_SeqSBAIJ*)(fact)->data; 27315f44c854SHong Zhang b->singlemalloc = PETSC_FALSE; 27325f44c854SHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 27335f44c854SHong Zhang b->j = uj; 27345f44c854SHong Zhang b->i = ui; 27355f44c854SHong Zhang b->diag = udiag; 27367f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 27375f44c854SHong Zhang b->ilen = 0; 27385f44c854SHong Zhang b->imax = 0; 27395f44c854SHong Zhang b->row = perm; 27405f44c854SHong Zhang b->col = perm; 27415f44c854SHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 27425f44c854SHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 27435f44c854SHong Zhang b->icol = iperm; 27445f44c854SHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 27455f44c854SHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 27465f44c854SHong Zhang ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 27475f44c854SHong Zhang b->maxnz = b->nz = ui[am]; 27485f44c854SHong Zhang b->free_a = PETSC_TRUE; 27495f44c854SHong Zhang b->free_ij = PETSC_TRUE; 27505f44c854SHong Zhang 27515f44c854SHong Zhang (fact)->info.factor_mallocs = reallocs; 27525f44c854SHong Zhang (fact)->info.fill_ratio_given = fill; 27535f44c854SHong Zhang if (ai[am] != 0) { 27545f44c854SHong Zhang (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 27555f44c854SHong Zhang } else { 27565f44c854SHong Zhang (fact)->info.fill_ratio_needed = 0.0; 27575f44c854SHong Zhang } 27585f44c854SHong Zhang (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_newdatastruct; 27595f44c854SHong Zhang PetscFunctionReturn(0); 27605f44c854SHong Zhang } 27615f44c854SHong Zhang 27625f44c854SHong Zhang #undef __FUNCT__ 27635f44c854SHong Zhang #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 27645f44c854SHong Zhang PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 27655f44c854SHong Zhang { 27665f44c854SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 27675f44c854SHong Zhang Mat_SeqSBAIJ *b; 27685f44c854SHong Zhang PetscErrorCode ierr; 27695f44c854SHong Zhang PetscTruth perm_identity,missing; 27705f44c854SHong Zhang PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 27715f44c854SHong Zhang const PetscInt *rip,*riip; 27725f44c854SHong Zhang PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 27735f44c854SHong Zhang PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 27745f44c854SHong Zhang PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 27755f44c854SHong Zhang PetscReal fill=info->fill,levels=info->levels; 27765f44c854SHong Zhang PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 27775f44c854SHong Zhang PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 27785f44c854SHong Zhang PetscBT lnkbt; 27795f44c854SHong Zhang IS iperm; 27805f44c854SHong Zhang PetscTruth newdatastruct=PETSC_FALSE; 27815f44c854SHong Zhang 27825f44c854SHong Zhang PetscFunctionBegin; 27835f44c854SHong Zhang ierr = PetscOptionsGetTruth(PETSC_NULL,"-icc_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr); 27845f44c854SHong Zhang if(newdatastruct){ 27855f44c854SHong Zhang ierr = MatICCFactorSymbolic_SeqAIJ_newdatastruct(fact,A,perm,info);CHKERRQ(ierr); 27865f44c854SHong Zhang PetscFunctionReturn(0); 27875f44c854SHong Zhang } 27885f44c854SHong Zhang 27895f44c854SHong Zhang if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 27905f44c854SHong Zhang ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 27915f44c854SHong Zhang if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 27925f44c854SHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 27935f44c854SHong Zhang ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 27945f44c854SHong Zhang 27955f44c854SHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 27965f44c854SHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 27975f44c854SHong Zhang ui[0] = 0; 27985f44c854SHong Zhang 27995f44c854SHong Zhang /* ICC(0) without matrix ordering: simply copies fill pattern */ 28005f44c854SHong Zhang if (!levels && perm_identity) { 28015f44c854SHong Zhang 28025f44c854SHong Zhang for (i=0; i<am; i++) { 28035f44c854SHong Zhang ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 28045f44c854SHong Zhang udiag[i] = ui[i]; 2805ed59401aSHong Zhang } 280639e3d630SHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 280739e3d630SHong Zhang cols = uj; 2808ed59401aSHong Zhang for (i=0; i<am; i++) { 2809ed59401aSHong Zhang aj = a->j + a->diag[i]; 281039e3d630SHong Zhang ncols = ui[i+1] - ui[i]; 281139e3d630SHong Zhang for (j=0; j<ncols; j++) *cols++ = *aj++; 2812ed59401aSHong Zhang } 281339e3d630SHong Zhang } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2814b635d36bSHong Zhang ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2815b635d36bSHong Zhang ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2816b635d36bSHong Zhang 28177a48dd6fSHong Zhang /* initialization */ 28185a8e39fbSHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 28197a48dd6fSHong Zhang 28207a48dd6fSHong Zhang /* jl: linked list for storing indices of the pivot rows 28217a48dd6fSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 28221393bc97SHong Zhang ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 28237a48dd6fSHong Zhang il = jl + am; 28247a48dd6fSHong Zhang uj_ptr = (PetscInt**)(il + am); 28257a48dd6fSHong Zhang uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 28267a48dd6fSHong Zhang for (i=0; i<am; i++){ 28277a48dd6fSHong Zhang jl[i] = am; il[i] = 0; 28287a48dd6fSHong Zhang } 28297a48dd6fSHong Zhang 28307a48dd6fSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 28317a48dd6fSHong Zhang nlnk = am + 1; 28322ed133dbSHong Zhang ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 28337a48dd6fSHong Zhang 28347a48dd6fSHong Zhang /* initial FreeSpace size is fill*(ai[am]+1) */ 2835a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 28367a48dd6fSHong Zhang current_space = free_space; 2837a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 28387a48dd6fSHong Zhang current_space_lvl = free_space_lvl; 28397a48dd6fSHong Zhang 28407a48dd6fSHong Zhang for (k=0; k<am; k++){ /* for each active row k */ 28417a48dd6fSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 28427a48dd6fSHong Zhang nzk = 0; 2843622e2028SHong Zhang ncols = ai[rip[k]+1] - ai[rip[k]]; 28443b4a8b6dSBarry Smith if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2845622e2028SHong Zhang ncols_upper = 0; 2846622e2028SHong Zhang for (j=0; j<ncols; j++){ 2847b635d36bSHong Zhang i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2848b635d36bSHong Zhang if (riip[i] >= k){ /* only take upper triangular entry */ 28495a8e39fbSHong Zhang ajtmp[ncols_upper] = i; 2850622e2028SHong Zhang ncols_upper++; 2851622e2028SHong Zhang } 2852622e2028SHong Zhang } 2853b635d36bSHong Zhang ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 28547a48dd6fSHong Zhang nzk += nlnk; 28557a48dd6fSHong Zhang 28567a48dd6fSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 28577a48dd6fSHong Zhang prow = jl[k]; /* 1st pivot row */ 28587a48dd6fSHong Zhang 28597a48dd6fSHong Zhang while (prow < k){ 28607a48dd6fSHong Zhang nextprow = jl[prow]; 28617a48dd6fSHong Zhang 28627a48dd6fSHong Zhang /* merge prow into k-th row */ 28637a48dd6fSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 28647a48dd6fSHong Zhang jmax = ui[prow+1]; 28657a48dd6fSHong Zhang ncols = jmax-jmin; 2866ed59401aSHong Zhang i = jmin - ui[prow]; 2867ed59401aSHong Zhang cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 28685a8e39fbSHong Zhang uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 28695a8e39fbSHong Zhang j = *(uj - 1); 28705a8e39fbSHong Zhang ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 28717a48dd6fSHong Zhang nzk += nlnk; 28727a48dd6fSHong Zhang 28737a48dd6fSHong Zhang /* update il and jl for prow */ 28747a48dd6fSHong Zhang if (jmin < jmax){ 28757a48dd6fSHong Zhang il[prow] = jmin; 28767a48dd6fSHong Zhang j = *cols; jl[prow] = jl[j]; jl[j] = prow; 28777a48dd6fSHong Zhang } 28787a48dd6fSHong Zhang prow = nextprow; 28797a48dd6fSHong Zhang } 28807a48dd6fSHong Zhang 28817a48dd6fSHong Zhang /* if free space is not available, make more free space */ 28827a48dd6fSHong Zhang if (current_space->local_remaining<nzk) { 28837a48dd6fSHong Zhang i = am - k + 1; /* num of unfactored rows */ 28847a48dd6fSHong Zhang i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2885a1a86e44SBarry Smith ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2886a1a86e44SBarry Smith ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 28877a48dd6fSHong Zhang reallocs++; 28887a48dd6fSHong Zhang } 28897a48dd6fSHong Zhang 28907a48dd6fSHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 2891b635d36bSHong Zhang if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 28922ed133dbSHong Zhang ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 28937a48dd6fSHong Zhang 28947a48dd6fSHong Zhang /* add the k-th row into il and jl */ 289539e3d630SHong Zhang if (nzk > 1){ 28967a48dd6fSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 28977a48dd6fSHong Zhang jl[k] = jl[i]; jl[i] = k; 28987a48dd6fSHong Zhang il[k] = ui[k] + 1; 28997a48dd6fSHong Zhang } 29007a48dd6fSHong Zhang uj_ptr[k] = current_space->array; 29017a48dd6fSHong Zhang uj_lvl_ptr[k] = current_space_lvl->array; 29027a48dd6fSHong Zhang 29037a48dd6fSHong Zhang current_space->array += nzk; 29047a48dd6fSHong Zhang current_space->local_used += nzk; 29057a48dd6fSHong Zhang current_space->local_remaining -= nzk; 29067a48dd6fSHong Zhang 29077a48dd6fSHong Zhang current_space_lvl->array += nzk; 29087a48dd6fSHong Zhang current_space_lvl->local_used += nzk; 29097a48dd6fSHong Zhang current_space_lvl->local_remaining -= nzk; 29107a48dd6fSHong Zhang 29117a48dd6fSHong Zhang ui[k+1] = ui[k] + nzk; 29127a48dd6fSHong Zhang } 29137a48dd6fSHong Zhang 29146cf91177SBarry Smith #if defined(PETSC_USE_INFO) 29157a48dd6fSHong Zhang if (ai[am] != 0) { 291639e3d630SHong Zhang PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2917ae15b995SBarry Smith ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2918ae15b995SBarry Smith ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2919ae15b995SBarry Smith ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 29207a48dd6fSHong Zhang } else { 2921ae15b995SBarry Smith ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 29227a48dd6fSHong Zhang } 292363ba0a88SBarry Smith #endif 29247a48dd6fSHong Zhang 29257a48dd6fSHong Zhang ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2926b635d36bSHong Zhang ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 29277a48dd6fSHong Zhang ierr = PetscFree(jl);CHKERRQ(ierr); 29285a8e39fbSHong Zhang ierr = PetscFree(ajtmp);CHKERRQ(ierr); 29297a48dd6fSHong Zhang 29307a48dd6fSHong Zhang /* destroy list of free space and other temporary array(s) */ 29317a48dd6fSHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2932a1a86e44SBarry Smith ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 29332ed133dbSHong Zhang ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2934a1a86e44SBarry Smith ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 29357a48dd6fSHong Zhang 293639e3d630SHong Zhang } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 293739e3d630SHong Zhang 29387a48dd6fSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 29397a48dd6fSHong Zhang 2940719d5645SBarry Smith b = (Mat_SeqSBAIJ*)(fact)->data; 29417a48dd6fSHong Zhang b->singlemalloc = PETSC_FALSE; 29427a48dd6fSHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 29437a48dd6fSHong Zhang b->j = uj; 29447a48dd6fSHong Zhang b->i = ui; 29455f44c854SHong Zhang b->diag = udiag; 29467f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 29477a48dd6fSHong Zhang b->ilen = 0; 29487a48dd6fSHong Zhang b->imax = 0; 29497a48dd6fSHong Zhang b->row = perm; 2950b635d36bSHong Zhang b->col = perm; 2951b635d36bSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2952b635d36bSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2953b635d36bSHong Zhang b->icol = iperm; 29547a48dd6fSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 29557a48dd6fSHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2956719d5645SBarry Smith ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 29577a48dd6fSHong Zhang b->maxnz = b->nz = ui[am]; 2958e6b907acSBarry Smith b->free_a = PETSC_TRUE; 2959e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 29607a48dd6fSHong Zhang 2961719d5645SBarry Smith (fact)->info.factor_mallocs = reallocs; 2962719d5645SBarry Smith (fact)->info.fill_ratio_given = fill; 29637a48dd6fSHong Zhang if (ai[am] != 0) { 2964719d5645SBarry Smith (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 29657a48dd6fSHong Zhang } else { 2966719d5645SBarry Smith (fact)->info.fill_ratio_needed = 0.0; 29677a48dd6fSHong Zhang } 2968719d5645SBarry Smith (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2969a6175056SHong Zhang PetscFunctionReturn(0); 2970a6175056SHong Zhang } 2971d5d45c9bSBarry Smith 2972f76d2b81SHong Zhang #undef __FUNCT__ 2973f76d2b81SHong Zhang #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 29740481f469SBarry Smith PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2975f76d2b81SHong Zhang { 2976f76d2b81SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 297710c27e3dSHong Zhang Mat_SeqSBAIJ *b; 2978dfbe8321SBarry Smith PetscErrorCode ierr; 2979f76d2b81SHong Zhang PetscTruth perm_identity; 298010c27e3dSHong Zhang PetscReal fill = info->fill; 29815d0c19d7SBarry Smith const PetscInt *rip,*riip; 29825d0c19d7SBarry Smith PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 298310c27e3dSHong Zhang PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 29842ed133dbSHong Zhang PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2985a1a86e44SBarry Smith PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 298610c27e3dSHong Zhang PetscBT lnkbt; 29872ed133dbSHong Zhang IS iperm; 2988f76d2b81SHong Zhang 2989f76d2b81SHong Zhang PetscFunctionBegin; 2990d0f46423SBarry Smith if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 29912ed133dbSHong Zhang /* check whether perm is the identity mapping */ 2992f76d2b81SHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 29932ed133dbSHong Zhang ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 29942ed133dbSHong Zhang ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 29959bfd6278SHong Zhang ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 299610c27e3dSHong Zhang 299710c27e3dSHong Zhang /* initialization */ 29981393bc97SHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 299910c27e3dSHong Zhang ui[0] = 0; 300010c27e3dSHong Zhang 300110c27e3dSHong Zhang /* jl: linked list for storing indices of the pivot rows 30021393bc97SHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 30031393bc97SHong Zhang ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 30041393bc97SHong Zhang il = jl + am; 30051393bc97SHong Zhang cols = il + am; 30061393bc97SHong Zhang ui_ptr = (PetscInt**)(cols + am); 30071393bc97SHong Zhang for (i=0; i<am; i++){ 30081393bc97SHong Zhang jl[i] = am; il[i] = 0; 3009f76d2b81SHong Zhang } 301010c27e3dSHong Zhang 301110c27e3dSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 30121393bc97SHong Zhang nlnk = am + 1; 30131393bc97SHong Zhang ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 301410c27e3dSHong Zhang 30151393bc97SHong Zhang /* initial FreeSpace size is fill*(ai[am]+1) */ 3016a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 301710c27e3dSHong Zhang current_space = free_space; 301810c27e3dSHong Zhang 30191393bc97SHong Zhang for (k=0; k<am; k++){ /* for each active row k */ 302010c27e3dSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 302110c27e3dSHong Zhang nzk = 0; 30222ed133dbSHong Zhang ncols = ai[rip[k]+1] - ai[rip[k]]; 30233b4a8b6dSBarry Smith if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 30242ed133dbSHong Zhang ncols_upper = 0; 30252ed133dbSHong Zhang for (j=0; j<ncols; j++){ 30269bfd6278SHong Zhang i = riip[*(aj + ai[rip[k]] + j)]; 30272ed133dbSHong Zhang if (i >= k){ /* only take upper triangular entry */ 30282ed133dbSHong Zhang cols[ncols_upper] = i; 30292ed133dbSHong Zhang ncols_upper++; 30302ed133dbSHong Zhang } 30312ed133dbSHong Zhang } 30321393bc97SHong Zhang ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 303310c27e3dSHong Zhang nzk += nlnk; 303410c27e3dSHong Zhang 303510c27e3dSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 303610c27e3dSHong Zhang prow = jl[k]; /* 1st pivot row */ 303710c27e3dSHong Zhang 303810c27e3dSHong Zhang while (prow < k){ 303910c27e3dSHong Zhang nextprow = jl[prow]; 304010c27e3dSHong Zhang /* merge prow into k-th row */ 30411393bc97SHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 304210c27e3dSHong Zhang jmax = ui[prow+1]; 304310c27e3dSHong Zhang ncols = jmax-jmin; 30441393bc97SHong Zhang uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 30455a8e39fbSHong Zhang ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 304610c27e3dSHong Zhang nzk += nlnk; 304710c27e3dSHong Zhang 304810c27e3dSHong Zhang /* update il and jl for prow */ 304910c27e3dSHong Zhang if (jmin < jmax){ 305010c27e3dSHong Zhang il[prow] = jmin; 30512ed133dbSHong Zhang j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 305210c27e3dSHong Zhang } 305310c27e3dSHong Zhang prow = nextprow; 305410c27e3dSHong Zhang } 305510c27e3dSHong Zhang 305610c27e3dSHong Zhang /* if free space is not available, make more free space */ 305710c27e3dSHong Zhang if (current_space->local_remaining<nzk) { 30581393bc97SHong Zhang i = am - k + 1; /* num of unfactored rows */ 305910c27e3dSHong Zhang i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3060a1a86e44SBarry Smith ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 306110c27e3dSHong Zhang reallocs++; 306210c27e3dSHong Zhang } 306310c27e3dSHong Zhang 306410c27e3dSHong Zhang /* copy data into free space, then initialize lnk */ 30651393bc97SHong Zhang ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 306610c27e3dSHong Zhang 306710c27e3dSHong Zhang /* add the k-th row into il and jl */ 306810c27e3dSHong Zhang if (nzk-1 > 0){ 30691393bc97SHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 307010c27e3dSHong Zhang jl[k] = jl[i]; jl[i] = k; 307110c27e3dSHong Zhang il[k] = ui[k] + 1; 307210c27e3dSHong Zhang } 30732ed133dbSHong Zhang ui_ptr[k] = current_space->array; 307410c27e3dSHong Zhang current_space->array += nzk; 307510c27e3dSHong Zhang current_space->local_used += nzk; 307610c27e3dSHong Zhang current_space->local_remaining -= nzk; 307710c27e3dSHong Zhang 307810c27e3dSHong Zhang ui[k+1] = ui[k] + nzk; 307910c27e3dSHong Zhang } 308010c27e3dSHong Zhang 30816cf91177SBarry Smith #if defined(PETSC_USE_INFO) 30821393bc97SHong Zhang if (ai[am] != 0) { 30831393bc97SHong Zhang PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3084ae15b995SBarry Smith ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 3085ae15b995SBarry Smith ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 3086ae15b995SBarry Smith ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 308710c27e3dSHong Zhang } else { 3088ae15b995SBarry Smith ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 308910c27e3dSHong Zhang } 309063ba0a88SBarry Smith #endif 309110c27e3dSHong Zhang 309210c27e3dSHong Zhang ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 30939bfd6278SHong Zhang ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 309410c27e3dSHong Zhang ierr = PetscFree(jl);CHKERRQ(ierr); 309510c27e3dSHong Zhang 309610c27e3dSHong Zhang /* destroy list of free space and other temporary array(s) */ 30971393bc97SHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 3098a1a86e44SBarry Smith ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 309910c27e3dSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 310010c27e3dSHong Zhang 310110c27e3dSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 310210c27e3dSHong Zhang 3103719d5645SBarry Smith b = (Mat_SeqSBAIJ*)(fact)->data; 310410c27e3dSHong Zhang b->singlemalloc = PETSC_FALSE; 3105e6b907acSBarry Smith b->free_a = PETSC_TRUE; 3106e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 31071393bc97SHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 310810c27e3dSHong Zhang b->j = uj; 310910c27e3dSHong Zhang b->i = ui; 311010c27e3dSHong Zhang b->diag = 0; 311110c27e3dSHong Zhang b->ilen = 0; 311210c27e3dSHong Zhang b->imax = 0; 311310c27e3dSHong Zhang b->row = perm; 31149bfd6278SHong Zhang b->col = perm; 31159bfd6278SHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 31169bfd6278SHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 31179bfd6278SHong Zhang b->icol = iperm; 311810c27e3dSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 31191393bc97SHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3120719d5645SBarry Smith ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 31211393bc97SHong Zhang b->maxnz = b->nz = ui[am]; 312210c27e3dSHong Zhang 3123719d5645SBarry Smith (fact)->info.factor_mallocs = reallocs; 3124719d5645SBarry Smith (fact)->info.fill_ratio_given = fill; 31251393bc97SHong Zhang if (ai[am] != 0) { 3126719d5645SBarry Smith (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 312710c27e3dSHong Zhang } else { 3128719d5645SBarry Smith (fact)->info.fill_ratio_needed = 0.0; 312910c27e3dSHong Zhang } 3130719d5645SBarry Smith (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 3131f76d2b81SHong Zhang PetscFunctionReturn(0); 3132f76d2b81SHong Zhang } 3133599ef60dSHong Zhang 3134599ef60dSHong Zhang #undef __FUNCT__ 3135894a2336SHong Zhang #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_newdatastruct" 3136894a2336SHong Zhang PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_newdatastruct(Mat A,Vec bb,Vec xx) 3137599ef60dSHong Zhang { 31381d098868SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 31391d098868SHong Zhang PetscErrorCode ierr; 31401d098868SHong Zhang PetscInt n = A->rmap->n; 31418fc3a347SHong Zhang const PetscInt *ai = a->i,*aj = a->j,*vi; 31421d098868SHong Zhang PetscScalar *x,sum; 31431d098868SHong Zhang const PetscScalar *b; 31441d098868SHong Zhang const MatScalar *aa = a->a,*v; 31458fc3a347SHong Zhang PetscInt i,nz; 31461d098868SHong Zhang 31471d098868SHong Zhang PetscFunctionBegin; 31481d098868SHong Zhang if (!n) PetscFunctionReturn(0); 31491d098868SHong Zhang 31501d098868SHong Zhang ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 31511d098868SHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 31521d098868SHong Zhang 31531d098868SHong Zhang /* forward solve the lower triangular */ 31541d098868SHong Zhang x[0] = b[0]; 31551d098868SHong Zhang v = aa; 31561d098868SHong Zhang vi = aj; 31571d098868SHong Zhang for (i=1; i<n; i++) { 31581d098868SHong Zhang nz = ai[i+1] - ai[i]; 31591d098868SHong Zhang sum = b[i]; 3160003131ecSBarry Smith PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3161e0e43300SBarry Smith v += nz; 3162e0e43300SBarry Smith vi += nz; 31631d098868SHong Zhang x[i] = sum; 31641d098868SHong Zhang } 31651d098868SHong Zhang 31661d098868SHong Zhang /* backward solve the upper triangular */ 31678fc3a347SHong Zhang v = aa + ai[n+1]; 31688fc3a347SHong Zhang vi = aj + ai[n+1]; 31691d098868SHong Zhang for (i=n-1; i>=0; i--){ 31708fc3a347SHong Zhang nz = ai[2*n-i +1] - ai[2*n-i]-1; 31711d098868SHong Zhang sum = x[i]; 3172003131ecSBarry Smith PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3173e0e43300SBarry Smith v += nz; 31748fc3a347SHong Zhang vi += nz; vi++; 3175fe31b70dSHong Zhang x[i] = *v++ *sum; /* x[i]=aa[adiag[i]]*sum; v++; */ 31761d098868SHong Zhang } 31771d098868SHong Zhang 31781d098868SHong Zhang ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 31791d098868SHong Zhang ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 31801d098868SHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 31811d098868SHong Zhang PetscFunctionReturn(0); 31821d098868SHong Zhang } 31831d098868SHong Zhang 31841d098868SHong Zhang #undef __FUNCT__ 3185*813a1f2bSShri Abhyankar #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_newdatastruct_v2" 3186*813a1f2bSShri Abhyankar PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_newdatastruct_v2(Mat A,Vec bb,Vec xx) 3187*813a1f2bSShri Abhyankar { 3188*813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3189*813a1f2bSShri Abhyankar PetscErrorCode ierr; 3190*813a1f2bSShri Abhyankar PetscInt n = A->rmap->n; 3191*813a1f2bSShri Abhyankar const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3192*813a1f2bSShri Abhyankar PetscScalar *x,sum; 3193*813a1f2bSShri Abhyankar const PetscScalar *b; 3194*813a1f2bSShri Abhyankar const MatScalar *aa = a->a,*v; 3195*813a1f2bSShri Abhyankar PetscInt i,nz; 3196*813a1f2bSShri Abhyankar 3197*813a1f2bSShri Abhyankar PetscFunctionBegin; 3198*813a1f2bSShri Abhyankar if (!n) PetscFunctionReturn(0); 3199*813a1f2bSShri Abhyankar 3200*813a1f2bSShri Abhyankar ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 3201*813a1f2bSShri Abhyankar ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3202*813a1f2bSShri Abhyankar 3203*813a1f2bSShri Abhyankar /* forward solve the lower triangular */ 3204*813a1f2bSShri Abhyankar x[0] = b[0]; 3205*813a1f2bSShri Abhyankar v = aa; 3206*813a1f2bSShri Abhyankar vi = aj; 3207*813a1f2bSShri Abhyankar for (i=1; i<n; i++) { 3208*813a1f2bSShri Abhyankar nz = ai[i+1] - ai[i]; 3209*813a1f2bSShri Abhyankar sum = b[i]; 3210*813a1f2bSShri Abhyankar PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3211*813a1f2bSShri Abhyankar v += nz; 3212*813a1f2bSShri Abhyankar vi += nz; 3213*813a1f2bSShri Abhyankar x[i] = sum; 3214*813a1f2bSShri Abhyankar } 3215*813a1f2bSShri Abhyankar 3216*813a1f2bSShri Abhyankar /* backward solve the upper triangular */ 3217*813a1f2bSShri Abhyankar /* v = aa + ai[n+1]; 3218*813a1f2bSShri Abhyankar vi = aj + ai[n+1]; */ 3219*813a1f2bSShri Abhyankar v = aa + adiag[n-1]; 3220*813a1f2bSShri Abhyankar vi = aj + adiag[n-1]; 3221*813a1f2bSShri Abhyankar x[n-1] = *v++ *x[n-1]; 3222*813a1f2bSShri Abhyankar vi++; 3223*813a1f2bSShri Abhyankar for (i=n-2; i>=0; i--){ 3224*813a1f2bSShri Abhyankar nz = adiag[i] - adiag[i+1]-1; 3225*813a1f2bSShri Abhyankar sum = x[i]; 3226*813a1f2bSShri Abhyankar PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3227*813a1f2bSShri Abhyankar v += nz; 3228*813a1f2bSShri Abhyankar vi += nz; vi++; 3229*813a1f2bSShri Abhyankar x[i] = *v++ *sum; /* x[i]=aa[adiag[i]]*sum; v++; */ 3230*813a1f2bSShri Abhyankar } 3231*813a1f2bSShri Abhyankar 3232*813a1f2bSShri Abhyankar ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3233*813a1f2bSShri Abhyankar ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 3234*813a1f2bSShri Abhyankar ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3235*813a1f2bSShri Abhyankar PetscFunctionReturn(0); 3236*813a1f2bSShri Abhyankar } 3237*813a1f2bSShri Abhyankar 3238*813a1f2bSShri Abhyankar #undef __FUNCT__ 3239894a2336SHong Zhang #define __FUNCT__ "MatSolve_SeqAIJ_newdatastruct" 3240894a2336SHong Zhang PetscErrorCode MatSolve_SeqAIJ_newdatastruct(Mat A,Vec bb,Vec xx) 32411d098868SHong Zhang { 32421d098868SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 32431d098868SHong Zhang IS iscol = a->col,isrow = a->row; 32441d098868SHong Zhang PetscErrorCode ierr; 3245894a2336SHong Zhang PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,nz,k; 32461d098868SHong Zhang const PetscInt *rout,*cout,*r,*c; 3247894a2336SHong Zhang PetscScalar *x,*tmp,*tmps,sum; 32481d098868SHong Zhang const PetscScalar *b; 32491d098868SHong Zhang const MatScalar *aa = a->a,*v; 32501d098868SHong Zhang 32511d098868SHong Zhang PetscFunctionBegin; 32521d098868SHong Zhang if (!n) PetscFunctionReturn(0); 32531d098868SHong Zhang 32541d098868SHong Zhang ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 32551d098868SHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 32561d098868SHong Zhang tmp = a->solve_work; 32571d098868SHong Zhang 32581d098868SHong Zhang ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3259894a2336SHong Zhang ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 32601d098868SHong Zhang 32611d098868SHong Zhang /* forward solve the lower triangular */ 32621d098868SHong Zhang tmp[0] = b[*r++]; 32631d098868SHong Zhang tmps = tmp; 32641d098868SHong Zhang v = aa; 32651d098868SHong Zhang vi = aj; 32661d098868SHong Zhang for (i=1; i<n; i++) { 32671d098868SHong Zhang nz = ai[i+1] - ai[i]; 3268894a2336SHong Zhang sum = b[*r++]; 3269894a2336SHong Zhang PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 3270894a2336SHong Zhang tmp[i] = sum; 32711d098868SHong Zhang v += nz; vi += nz; 32721d098868SHong Zhang } 32731d098868SHong Zhang 32741d098868SHong Zhang /* backward solve the upper triangular */ 3275894a2336SHong Zhang k = n+1; 3276894a2336SHong Zhang v = aa + ai[k]; /* 1st entry of U(n-1,:) */ 3277894a2336SHong Zhang vi = aj + ai[k]; 32781d098868SHong Zhang for (i=n-1; i>=0; i--){ 3279894a2336SHong Zhang k = 2*n-i; 3280894a2336SHong Zhang nz = ai[k +1] - ai[k] - 1; 3281894a2336SHong Zhang sum = tmp[i]; 3282894a2336SHong Zhang PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 3283894a2336SHong Zhang x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 32841d098868SHong Zhang v += nz+1; vi += nz+1; 32851d098868SHong Zhang } 32861d098868SHong Zhang 32871d098868SHong Zhang ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 32881d098868SHong Zhang ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 32891d098868SHong Zhang ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 32901d098868SHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 32911d098868SHong Zhang ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 32921d098868SHong Zhang PetscFunctionReturn(0); 32931d098868SHong Zhang } 32941d098868SHong Zhang 32951d098868SHong Zhang #undef __FUNCT__ 32961d098868SHong Zhang #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 32971d098868SHong Zhang PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 32981d098868SHong Zhang { 32991d098868SHong Zhang Mat B = *fact; 3300599ef60dSHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3301599ef60dSHong Zhang IS isicol; 3302599ef60dSHong Zhang PetscErrorCode ierr; 33031d098868SHong Zhang const PetscInt *r,*ic; 33041d098868SHong Zhang PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3305dc3a2fd3SHong Zhang PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3306f61a948aSLisandro Dalcin PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 33071d098868SHong Zhang PetscInt nlnk,*lnk; 33081d098868SHong Zhang PetscBT lnkbt; 33091d098868SHong Zhang PetscTruth row_identity,icol_identity,both_identity; 33108aee2decSHong Zhang MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 33111d098868SHong Zhang const PetscInt *ics; 33121d098868SHong Zhang PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3313f61a948aSLisandro Dalcin PetscReal dt=info->dt,dtcol=info->dtcol,shift=info->shiftinblocks; 3314f61a948aSLisandro Dalcin PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3315599ef60dSHong Zhang PetscTruth missing; 3316599ef60dSHong Zhang 3317599ef60dSHong Zhang PetscFunctionBegin; 3318f61a948aSLisandro Dalcin 3319f61a948aSLisandro Dalcin if (dt == PETSC_DEFAULT) dt = 0.005; 3320f61a948aSLisandro Dalcin if (dtcol == PETSC_DEFAULT) dtcol = 0.01; /* XXX unused! */ 3321f61a948aSLisandro Dalcin if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3322f61a948aSLisandro Dalcin 33231d098868SHong Zhang /* ------- symbolic factorization, can be reused ---------*/ 33241d098868SHong Zhang ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 33251d098868SHong Zhang if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 33261d098868SHong Zhang adiag=a->diag; 3327599ef60dSHong Zhang 3328599ef60dSHong Zhang ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3329599ef60dSHong Zhang 3330599ef60dSHong Zhang /* bdiag is location of diagonal in factor */ 3331a6226a1aSHong Zhang ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 3332a6226a1aSHong Zhang bdiag_rev = bdiag + n+1; 3333599ef60dSHong Zhang 33341d098868SHong Zhang /* allocate row pointers bi */ 33358fc3a347SHong Zhang ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 33361d098868SHong Zhang 33371d098868SHong Zhang /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3338393d3a68SHong Zhang if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 33391d098868SHong Zhang nnz_max = ai[n]+2*n*dtcount+2; 33408fc3a347SHong Zhang 33418fc3a347SHong Zhang ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 33428fc3a347SHong Zhang ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3343599ef60dSHong Zhang 3344599ef60dSHong Zhang /* put together the new matrix */ 3345599ef60dSHong Zhang ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 3346599ef60dSHong Zhang ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 3347599ef60dSHong Zhang b = (Mat_SeqAIJ*)(B)->data; 3348599ef60dSHong Zhang b->free_a = PETSC_TRUE; 3349599ef60dSHong Zhang b->free_ij = PETSC_TRUE; 3350599ef60dSHong Zhang b->singlemalloc = PETSC_FALSE; 3351599ef60dSHong Zhang b->a = ba; 3352599ef60dSHong Zhang b->j = bj; 3353599ef60dSHong Zhang b->i = bi; 3354599ef60dSHong Zhang b->diag = bdiag; 3355599ef60dSHong Zhang b->ilen = 0; 3356599ef60dSHong Zhang b->imax = 0; 3357599ef60dSHong Zhang b->row = isrow; 3358599ef60dSHong Zhang b->col = iscol; 3359599ef60dSHong Zhang ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3360599ef60dSHong Zhang ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3361599ef60dSHong Zhang b->icol = isicol; 3362599ef60dSHong Zhang ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3363599ef60dSHong Zhang 33641d098868SHong Zhang ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 33651d098868SHong Zhang b->maxnz = nnz_max; 3366599ef60dSHong Zhang 3367599ef60dSHong Zhang (B)->factor = MAT_FACTOR_ILUDT; 3368599ef60dSHong Zhang (B)->info.factor_mallocs = 0; 33691d098868SHong Zhang (B)->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 33701d098868SHong Zhang CHKMEMQ; 33711d098868SHong Zhang /* ------- end of symbolic factorization ---------*/ 3372599ef60dSHong Zhang 3373599ef60dSHong Zhang ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3374599ef60dSHong Zhang ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3375599ef60dSHong Zhang ics = ic; 3376599ef60dSHong Zhang 3377599ef60dSHong Zhang /* linked list for storing column indices of the active row */ 3378599ef60dSHong Zhang nlnk = n + 1; 3379599ef60dSHong Zhang ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 33801d098868SHong Zhang 33811d098868SHong Zhang /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 33821d098868SHong Zhang ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 33831d098868SHong Zhang jtmp = im + n; 33841d098868SHong Zhang /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 33858369b28dSHong Zhang ierr = PetscMalloc((2*n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 33868aee2decSHong Zhang ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 33878369b28dSHong Zhang vtmp = rtmp + n; 3388599ef60dSHong Zhang 3389599ef60dSHong Zhang bi[0] = 0; 33908fc3a347SHong Zhang bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3391dc3a2fd3SHong Zhang bdiag_rev[n] = bdiag[0]; 33928fc3a347SHong Zhang bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3393599ef60dSHong Zhang for (i=0; i<n; i++) { 3394599ef60dSHong Zhang /* copy initial fill into linked list */ 3395599ef60dSHong Zhang nzi = 0; /* nonzeros for active row i */ 33968369b28dSHong Zhang nzi = ai[r[i]+1] - ai[r[i]]; 33978369b28dSHong Zhang if (!nzi) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 33988369b28dSHong Zhang nzi_al = adiag[r[i]] - ai[r[i]]; 33998369b28dSHong Zhang nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3400599ef60dSHong Zhang ajtmp = aj + ai[r[i]]; 34018369b28dSHong Zhang ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3402599ef60dSHong Zhang 3403599ef60dSHong Zhang /* load in initial (unfactored row) */ 34041d098868SHong Zhang aatmp = a->a + ai[r[i]]; 34058369b28dSHong Zhang for (j=0; j<nzi; j++) { 34061d098868SHong Zhang rtmp[ics[*ajtmp++]] = *aatmp++; 3407599ef60dSHong Zhang } 3408599ef60dSHong Zhang 3409599ef60dSHong Zhang /* add pivot rows into linked list */ 3410599ef60dSHong Zhang row = lnk[n]; 3411599ef60dSHong Zhang while (row < i ) { 34121d098868SHong Zhang nzi_bl = bi[row+1] - bi[row] + 1; 34131d098868SHong Zhang bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 34141d098868SHong Zhang ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3415599ef60dSHong Zhang nzi += nlnk; 3416599ef60dSHong Zhang row = lnk[row]; 3417599ef60dSHong Zhang } 3418599ef60dSHong Zhang 34198369b28dSHong Zhang /* copy data from lnk into jtmp, then initialize lnk */ 34208369b28dSHong Zhang ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3421599ef60dSHong Zhang 3422599ef60dSHong Zhang /* numerical factorization */ 34238369b28dSHong Zhang bjtmp = jtmp; 3424599ef60dSHong Zhang row = *bjtmp++; /* 1st pivot row */ 3425599ef60dSHong Zhang while ( row < i ) { 3426599ef60dSHong Zhang pc = rtmp + row; 34273c2a7987SHong Zhang pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 34283c2a7987SHong Zhang multiplier = (*pc) * (*pv); 3429599ef60dSHong Zhang *pc = multiplier; 34303c2a7987SHong Zhang if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */ 34311d098868SHong Zhang pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 34321d098868SHong Zhang pv = ba + bdiag[row+1] + 1; 34331d098868SHong Zhang /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 34341d098868SHong Zhang nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 34351d098868SHong Zhang for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3436dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 3437599ef60dSHong Zhang } 3438599ef60dSHong Zhang row = *bjtmp++; 3439599ef60dSHong Zhang } 3440599ef60dSHong Zhang 34418369b28dSHong Zhang /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 34428aee2decSHong Zhang diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 34438369b28dSHong Zhang nzi_bl = 0; j = 0; 34448369b28dSHong Zhang while (jtmp[j] < i){ /* Note: jtmp is sorted */ 34458aee2decSHong Zhang vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 34468369b28dSHong Zhang nzi_bl++; j++; 34478369b28dSHong Zhang } 34488369b28dSHong Zhang nzi_bu = nzi - nzi_bl -1; 34498369b28dSHong Zhang while (j < nzi){ 34508aee2decSHong Zhang vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 34518369b28dSHong Zhang j++; 34528369b28dSHong Zhang } 34538369b28dSHong Zhang 3454599ef60dSHong Zhang bjtmp = bj + bi[i]; 3455599ef60dSHong Zhang batmp = ba + bi[i]; 34568369b28dSHong Zhang /* apply level dropping rule to L part */ 34578369b28dSHong Zhang ncut = nzi_al + dtcount; 34588369b28dSHong Zhang if (ncut < nzi_bl){ 34598369b28dSHong Zhang ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 34608369b28dSHong Zhang ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3461599ef60dSHong Zhang } else { 34628369b28dSHong Zhang ncut = nzi_bl; 34638369b28dSHong Zhang } 34648369b28dSHong Zhang for (j=0; j<ncut; j++){ 34658369b28dSHong Zhang bjtmp[j] = jtmp[j]; 34668369b28dSHong Zhang batmp[j] = vtmp[j]; 34676da515a1SHong Zhang /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 34688369b28dSHong Zhang } 34691d098868SHong Zhang bi[i+1] = bi[i] + ncut; 34708369b28dSHong Zhang nzi = ncut + 1; 34718369b28dSHong Zhang 34728369b28dSHong Zhang /* apply level dropping rule to U part */ 34738369b28dSHong Zhang ncut = nzi_au + dtcount; 34748369b28dSHong Zhang if (ncut < nzi_bu){ 34758369b28dSHong Zhang ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 34768369b28dSHong Zhang ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 34778369b28dSHong Zhang } else { 34788369b28dSHong Zhang ncut = nzi_bu; 34798369b28dSHong Zhang } 34808369b28dSHong Zhang nzi += ncut; 34811d098868SHong Zhang 34821d098868SHong Zhang /* mark bdiagonal */ 34831d098868SHong Zhang bdiag[i+1] = bdiag[i] - (ncut + 1); 3484dc3a2fd3SHong Zhang bdiag_rev[n-i-1] = bdiag[i+1]; 34858fc3a347SHong Zhang bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 34861d098868SHong Zhang bjtmp = bj + bdiag[i]; 34871d098868SHong Zhang batmp = ba + bdiag[i]; 34881d098868SHong Zhang *bjtmp = i; 34898aee2decSHong Zhang *batmp = diag_tmp; /* rtmp[i]; */ 3490c9c72f8fSHong Zhang if (*batmp == 0.0) { 3491c9c72f8fSHong Zhang *batmp = dt+shift; 3492bd1bc851SBarry Smith /* printf(" row %d add shift %g\n",i,shift); */ 3493c9c72f8fSHong Zhang } 3494b78a477dSHong Zhang *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 34956da515a1SHong Zhang /* printf(" (%d,%g),",*bjtmp,*batmp); */ 34961d098868SHong Zhang 34971d098868SHong Zhang bjtmp = bj + bdiag[i+1]+1; 34981d098868SHong Zhang batmp = ba + bdiag[i+1]+1; 34998369b28dSHong Zhang for (k=0; k<ncut; k++){ 35001d098868SHong Zhang bjtmp[k] = jtmp[nzi_bl+1+k]; 35011d098868SHong Zhang batmp[k] = vtmp[nzi_bl+1+k]; 35026da515a1SHong Zhang /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3503599ef60dSHong Zhang } 35046da515a1SHong Zhang /* printf("\n"); */ 3505599ef60dSHong Zhang 3506599ef60dSHong Zhang im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3507599ef60dSHong Zhang /* 35081d098868SHong Zhang printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 35098369b28dSHong Zhang printf(" ----------------------------\n"); 3510599ef60dSHong Zhang */ 35118369b28dSHong Zhang } /* for (i=0; i<n; i++) */ 35121d098868SHong Zhang /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 35131d098868SHong Zhang if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3514599ef60dSHong Zhang 3515599ef60dSHong Zhang ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3516599ef60dSHong Zhang ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3517599ef60dSHong Zhang 3518599ef60dSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3519599ef60dSHong Zhang ierr = PetscFree(im);CHKERRQ(ierr); 3520599ef60dSHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 3521599ef60dSHong Zhang 3522599ef60dSHong Zhang ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 35231d098868SHong Zhang b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3524599ef60dSHong Zhang 3525599ef60dSHong Zhang ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3526599ef60dSHong Zhang ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3527599ef60dSHong Zhang both_identity = (PetscTruth) (row_identity && icol_identity); 3528599ef60dSHong Zhang if (row_identity && icol_identity) { 3529894a2336SHong Zhang B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_newdatastruct; 3530599ef60dSHong Zhang } else { 3531894a2336SHong Zhang B->ops->solve = MatSolve_SeqAIJ_newdatastruct; 3532599ef60dSHong Zhang } 3533599ef60dSHong Zhang 3534b78a477dSHong Zhang B->ops->lufactorsymbolic = MatILUDTFactorSymbolic_SeqAIJ; 3535b78a477dSHong Zhang B->ops->lufactornumeric = MatILUDTFactorNumeric_SeqAIJ; 35361d098868SHong Zhang B->ops->solveadd = 0; 35371d098868SHong Zhang B->ops->solvetranspose = 0; 35381d098868SHong Zhang B->ops->solvetransposeadd = 0; 35391d098868SHong Zhang B->ops->matsolve = 0; 3540599ef60dSHong Zhang B->assembled = PETSC_TRUE; 3541599ef60dSHong Zhang B->preallocated = PETSC_TRUE; 3542599ef60dSHong Zhang PetscFunctionReturn(0); 3543599ef60dSHong Zhang } 3544599ef60dSHong Zhang 35453c2a7987SHong Zhang /* a wraper of MatILUDTFactor_SeqAIJ() */ 35463c2a7987SHong Zhang #undef __FUNCT__ 35473c2a7987SHong Zhang #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 35483c2a7987SHong Zhang PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 35493c2a7987SHong Zhang { 35503c2a7987SHong Zhang PetscErrorCode ierr; 35513c2a7987SHong Zhang 35523c2a7987SHong Zhang PetscFunctionBegin; 35533c2a7987SHong Zhang ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 35543c2a7987SHong Zhang 35553c2a7987SHong Zhang fact->ops->lufactornumeric = MatILUDTFactorNumeric_SeqAIJ; 35563c2a7987SHong Zhang PetscFunctionReturn(0); 35573c2a7987SHong Zhang } 35583c2a7987SHong Zhang 35593c2a7987SHong Zhang /* 35603c2a7987SHong Zhang same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 35613c2a7987SHong Zhang - intend to replace existing MatLUFactorNumeric_SeqAIJ() 35623c2a7987SHong Zhang */ 35633c2a7987SHong Zhang #undef __FUNCT__ 35643c2a7987SHong Zhang #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 35653c2a7987SHong Zhang PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 35663c2a7987SHong Zhang { 35673c2a7987SHong Zhang Mat C=fact; 35683c2a7987SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 35693c2a7987SHong Zhang IS isrow = b->row,isicol = b->icol; 35703c2a7987SHong Zhang PetscErrorCode ierr; 35713c2a7987SHong Zhang const PetscInt *r,*ic,*ics; 3572b78a477dSHong Zhang PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3573b78a477dSHong Zhang PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 35743c2a7987SHong Zhang MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3575b78a477dSHong Zhang PetscReal dt=info->dt,shift=info->shiftinblocks; 3576b78a477dSHong Zhang PetscTruth row_identity, col_identity; 35773c2a7987SHong Zhang 35783c2a7987SHong Zhang PetscFunctionBegin; 35793c2a7987SHong Zhang ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 35803c2a7987SHong Zhang ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3581b78a477dSHong Zhang ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 35823c2a7987SHong Zhang ics = ic; 35833c2a7987SHong Zhang 35843c2a7987SHong Zhang for (i=0; i<n; i++){ 3585b78a477dSHong Zhang /* initialize rtmp array */ 3586b78a477dSHong Zhang nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 35873c2a7987SHong Zhang bjtmp = bj + bi[i]; 3588b78a477dSHong Zhang for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3589b78a477dSHong Zhang rtmp[i] = 0.0; 3590b78a477dSHong Zhang nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3591b78a477dSHong Zhang bjtmp = bj + bdiag[i+1] + 1; 3592b78a477dSHong Zhang for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 35933c2a7987SHong Zhang 3594b78a477dSHong Zhang /* load in initial unfactored row of A */ 35956da515a1SHong Zhang /* printf("row %d\n",i); */ 35963c2a7987SHong Zhang nz = ai[r[i]+1] - ai[r[i]]; 35973c2a7987SHong Zhang ajtmp = aj + ai[r[i]]; 35983c2a7987SHong Zhang v = aa + ai[r[i]]; 35993c2a7987SHong Zhang for (j=0; j<nz; j++) { 3600b78a477dSHong Zhang rtmp[ics[*ajtmp++]] = v[j]; 36016da515a1SHong Zhang /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 36023c2a7987SHong Zhang } 36036da515a1SHong Zhang /* printf("\n"); */ 36043c2a7987SHong Zhang 3605b78a477dSHong Zhang /* numerical factorization */ 3606b78a477dSHong Zhang bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3607b78a477dSHong Zhang nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3608b78a477dSHong Zhang k = 0; 3609b78a477dSHong Zhang while (k < nzl){ 36103c2a7987SHong Zhang row = *bjtmp++; 36116da515a1SHong Zhang /* printf(" prow %d\n",row); */ 36123c2a7987SHong Zhang pc = rtmp + row; 3613b78a477dSHong Zhang pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3614b78a477dSHong Zhang multiplier = (*pc) * (*pv); 36153c2a7987SHong Zhang *pc = multiplier; 3616b78a477dSHong Zhang if (PetscAbsScalar(multiplier) > dt){ 3617b78a477dSHong Zhang pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3618b78a477dSHong Zhang pv = b->a + bdiag[row+1] + 1; 3619b78a477dSHong Zhang nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3620b78a477dSHong Zhang for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 36216da515a1SHong Zhang /* ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); */ 36223c2a7987SHong Zhang } 3623b78a477dSHong Zhang k++; 36243c2a7987SHong Zhang } 36253c2a7987SHong Zhang 3626b78a477dSHong Zhang /* finished row so stick it into b->a */ 3627b78a477dSHong Zhang /* L-part */ 3628b78a477dSHong Zhang pv = b->a + bi[i] ; 3629b78a477dSHong Zhang pj = bj + bi[i] ; 3630b78a477dSHong Zhang nzl = bi[i+1] - bi[i]; 3631b78a477dSHong Zhang for (j=0; j<nzl; j++) { 3632b78a477dSHong Zhang pv[j] = rtmp[pj[j]]; 36336da515a1SHong Zhang /* printf(" (%d,%g),",pj[j],pv[j]); */ 36343c2a7987SHong Zhang } 3635b78a477dSHong Zhang 3636b78a477dSHong Zhang /* diagonal: invert diagonal entries for simplier triangular solves */ 3637b78a477dSHong Zhang if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3638b78a477dSHong Zhang b->a[bdiag[i]] = 1.0/rtmp[i]; 36396da515a1SHong Zhang /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3640b78a477dSHong Zhang 3641b78a477dSHong Zhang /* U-part */ 3642b78a477dSHong Zhang pv = b->a + bdiag[i+1] + 1; 3643b78a477dSHong Zhang pj = bj + bdiag[i+1] + 1; 3644b78a477dSHong Zhang nzu = bdiag[i] - bdiag[i+1] - 1; 3645b78a477dSHong Zhang for (j=0; j<nzu; j++) { 3646b78a477dSHong Zhang pv[j] = rtmp[pj[j]]; 36476da515a1SHong Zhang /* printf(" (%d,%g),",pj[j],pv[j]); */ 3648b78a477dSHong Zhang } 36496da515a1SHong Zhang /* printf("\n"); */ 3650b78a477dSHong Zhang } 3651b78a477dSHong Zhang 36523c2a7987SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 36533c2a7987SHong Zhang ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 36543c2a7987SHong Zhang ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3655b78a477dSHong Zhang 36563c2a7987SHong Zhang ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 36573c2a7987SHong Zhang ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 36583c2a7987SHong Zhang if (row_identity && col_identity) { 3659894a2336SHong Zhang C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_newdatastruct; 36603c2a7987SHong Zhang } else { 3661894a2336SHong Zhang C->ops->solve = MatSolve_SeqAIJ_newdatastruct; 36623c2a7987SHong Zhang } 3663b78a477dSHong Zhang C->ops->solveadd = 0; 3664b78a477dSHong Zhang C->ops->solvetranspose = 0; 3665b78a477dSHong Zhang C->ops->solvetransposeadd = 0; 3666b78a477dSHong Zhang C->ops->matsolve = 0; 36673c2a7987SHong Zhang C->assembled = PETSC_TRUE; 36683c2a7987SHong Zhang C->preallocated = PETSC_TRUE; 36693c2a7987SHong Zhang ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 36703c2a7987SHong Zhang PetscFunctionReturn(0); 36713c2a7987SHong Zhang } 3672