1b377110cSBarry Smith 2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h> 4c6db04a5SJed Brown #include <petscbt.h> 5c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 63b2fbd54SBarry Smith 7f6a26d55SMatthew Knepley EXTERN_C_BEGIN 84a2ae208SSatish Balay #undef __FUNCT__ 9a7a54a73SBarry Smith #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ" 108fa24674SBarry Smith /* 118fa24674SBarry Smith Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 12312e372aSBarry Smith 13312e372aSBarry Smith This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 148fa24674SBarry Smith */ 1519fd82e9SBarry Smith PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 163b2fbd54SBarry Smith { 178fa24674SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 188fa24674SBarry Smith PetscErrorCode ierr; 19889a8dedSBarry Smith PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 208fa24674SBarry Smith const PetscInt *ai = a->i, *aj = a->j; 218fa24674SBarry Smith const PetscScalar *aa = a->a; 22ace3abfcSBarry Smith PetscBool *done; 23889a8dedSBarry Smith PetscReal best,past = 0,future; 243a40ed3dSBarry Smith 258fa24674SBarry Smith PetscFunctionBegin; 268fa24674SBarry Smith /* pick initial row */ 278fa24674SBarry Smith best = -1; 288fa24674SBarry Smith for (i=0; i<n; i++) { 2975567043SBarry Smith future = 0.0; 308fa24674SBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 31*db4deed7SKarl Rupp if (aj[j] != i) future += PetscAbsScalar(aa[j]); 32*db4deed7SKarl Rupp else past = PetscAbsScalar(aa[j]); 338fa24674SBarry Smith } 348fa24674SBarry Smith if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 358fa24674SBarry Smith if (past/future > best) { 368fa24674SBarry Smith best = past/future; 378fa24674SBarry Smith current = i; 388fa24674SBarry Smith } 398fa24674SBarry Smith } 408fa24674SBarry Smith 41ace3abfcSBarry Smith ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr); 42ace3abfcSBarry Smith ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr); 430e83c824SBarry Smith ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr); 448fa24674SBarry Smith order[0] = current; 458fa24674SBarry Smith for (i=0; i<n-1; i++) { 468fa24674SBarry Smith done[current] = PETSC_TRUE; 478fa24674SBarry Smith best = -1; 488fa24674SBarry Smith /* loop over all neighbors of current pivot */ 498fa24674SBarry Smith for (j=ai[current]; j<ai[current+1]; j++) { 508fa24674SBarry Smith jj = aj[j]; 518fa24674SBarry Smith if (done[jj]) continue; 528fa24674SBarry Smith /* loop over columns of potential next row computing weights for below and above diagonal */ 538fa24674SBarry Smith past = future = 0.0; 548fa24674SBarry Smith for (k=ai[jj]; k<ai[jj+1]; k++) { 558fa24674SBarry Smith kk = aj[k]; 568fa24674SBarry Smith if (done[kk]) past += PetscAbsScalar(aa[k]); 578fa24674SBarry Smith else if (kk != jj) future += PetscAbsScalar(aa[k]); 588fa24674SBarry Smith } 598fa24674SBarry Smith if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 608fa24674SBarry Smith if (past/future > best) { 618fa24674SBarry Smith best = past/future; 628fa24674SBarry Smith newcurrent = jj; 638fa24674SBarry Smith } 648fa24674SBarry Smith } 658fa24674SBarry Smith if (best == -1) { /* no neighbors to select from so select best of all that remain */ 668fa24674SBarry Smith best = -1; 678fa24674SBarry Smith for (k=0; k<n; k++) { 688fa24674SBarry Smith if (done[k]) continue; 6975567043SBarry Smith future = 0.0; 7075567043SBarry Smith past = 0.0; 718fa24674SBarry Smith for (j=ai[k]; j<ai[k+1]; j++) { 728fa24674SBarry Smith kk = aj[j]; 738fa24674SBarry Smith if (done[kk]) past += PetscAbsScalar(aa[j]); 748fa24674SBarry Smith else if (kk != k) future += PetscAbsScalar(aa[j]); 758fa24674SBarry Smith } 768fa24674SBarry Smith if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 778fa24674SBarry Smith if (past/future > best) { 788fa24674SBarry Smith best = past/future; 798fa24674SBarry Smith newcurrent = k; 808fa24674SBarry Smith } 818fa24674SBarry Smith } 828fa24674SBarry Smith } 83e32f2f54SBarry Smith if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 848fa24674SBarry Smith current = newcurrent; 858fa24674SBarry Smith order[i+1] = current; 868fa24674SBarry Smith } 8770b3c8c7SBarry Smith ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 888fa24674SBarry Smith *icol = *irow; 898fa24674SBarry Smith ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 908fa24674SBarry Smith ierr = PetscFree(done);CHKERRQ(ierr); 918fa24674SBarry Smith ierr = PetscFree(order);CHKERRQ(ierr); 92d64ed03dSBarry Smith PetscFunctionReturn(0); 933b2fbd54SBarry Smith } 94f6a26d55SMatthew Knepley EXTERN_C_END 953b2fbd54SBarry Smith 96e631078cSBarry Smith EXTERN_C_BEGIN 974a2ae208SSatish Balay #undef __FUNCT__ 98db4efbfdSBarry Smith #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 99ace3abfcSBarry Smith PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg) 100db4efbfdSBarry Smith { 101db4efbfdSBarry Smith PetscFunctionBegin; 102db4efbfdSBarry Smith *flg = PETSC_TRUE; 103db4efbfdSBarry Smith PetscFunctionReturn(0); 104db4efbfdSBarry Smith } 105db4efbfdSBarry Smith EXTERN_C_END 106db4efbfdSBarry Smith 107db4efbfdSBarry Smith EXTERN_C_BEGIN 108db4efbfdSBarry Smith #undef __FUNCT__ 109b24902e0SBarry Smith #define __FUNCT__ "MatGetFactor_seqaij_petsc" 110b24902e0SBarry Smith PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 111b24902e0SBarry Smith { 112d0f46423SBarry Smith PetscInt n = A->rmap->n; 113b24902e0SBarry Smith PetscErrorCode ierr; 114b24902e0SBarry Smith 115b24902e0SBarry Smith PetscFunctionBegin; 116891bceaeSHong Zhang #if defined(PETSC_USE_COMPLEX) 117891bceaeSHong Zhang if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC))SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported"); 118891bceaeSHong Zhang #endif 119b24902e0SBarry Smith ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 120b24902e0SBarry Smith ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 121599ef60dSHong Zhang if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 122b24902e0SBarry Smith ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 12335233d99SShri Abhyankar (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 12435233d99SShri Abhyankar (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 125e0527adcSJed Brown ierr = MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 126b24902e0SBarry Smith } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 127b24902e0SBarry Smith ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 128b24902e0SBarry Smith ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 12935233d99SShri Abhyankar (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 13035233d99SShri Abhyankar (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 131e32f2f54SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 132d5f3da31SBarry Smith (*B)->factortype = ftype; 133b24902e0SBarry Smith PetscFunctionReturn(0); 134b24902e0SBarry Smith } 135e631078cSBarry Smith EXTERN_C_END 136b24902e0SBarry Smith 137b24902e0SBarry Smith #undef __FUNCT__ 138ad04f41aSHong Zhang #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace" 139ad04f41aSHong Zhang PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 140289bc588SBarry Smith { 141416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 142289bc588SBarry Smith IS isicol; 1436849ba73SBarry Smith PetscErrorCode ierr; 1445d0c19d7SBarry Smith const PetscInt *r,*ic; 1455d0c19d7SBarry Smith PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 1461393bc97SHong Zhang PetscInt *bi,*bj,*ajtmp; 1471393bc97SHong Zhang PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 1489e046878SBarry Smith PetscReal f; 1494875ba38SHong Zhang PetscInt nlnk,*lnk,k,**bi_ptr; 150a1a86e44SBarry Smith PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1511393bc97SHong Zhang PetscBT lnkbt; 152289bc588SBarry Smith 1533a40ed3dSBarry Smith PetscFunctionBegin; 154e32f2f54SBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 1554c49b128SBarry Smith ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 156ac121b3dSBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 157ac121b3dSBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 158289bc588SBarry Smith 159289bc588SBarry Smith /* get new row pointers */ 1601393bc97SHong Zhang ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1611393bc97SHong Zhang bi[0] = 0; 1621393bc97SHong Zhang 1631393bc97SHong Zhang /* bdiag is location of diagonal in factor */ 1641393bc97SHong Zhang ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1651393bc97SHong Zhang bdiag[0] = 0; 1661393bc97SHong Zhang 1671393bc97SHong Zhang /* linked list for storing column indices of the active row */ 1681393bc97SHong Zhang nlnk = n + 1; 1691393bc97SHong Zhang ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1701393bc97SHong Zhang 17135baf27eSSatish Balay ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 1721393bc97SHong Zhang 1731393bc97SHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 174d3d32019SBarry Smith f = info->fill; 175a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1761393bc97SHong Zhang current_space = free_space; 177289bc588SBarry Smith 178289bc588SBarry Smith for (i=0; i<n; i++) { 1791393bc97SHong Zhang /* copy previous fill into linked list */ 180289bc588SBarry Smith nzi = 0; 1811393bc97SHong Zhang nnz = ai[r[i]+1] - ai[r[i]]; 182e32f2f54SBarry Smith if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1831393bc97SHong Zhang ajtmp = aj + ai[r[i]]; 184afcc9904SHong Zhang ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1851393bc97SHong Zhang nzi += nlnk; 1861393bc97SHong Zhang 1871393bc97SHong Zhang /* add pivot rows into linked list */ 1881393bc97SHong Zhang row = lnk[n]; 1891393bc97SHong Zhang while (row < i) { 190d1170560SHong Zhang nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 191d1170560SHong Zhang ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 192d1170560SHong Zhang ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 1931393bc97SHong Zhang nzi += nlnk; 1941393bc97SHong Zhang row = lnk[row]; 195289bc588SBarry Smith } 1961393bc97SHong Zhang bi[i+1] = bi[i] + nzi; 1971393bc97SHong Zhang im[i] = nzi; 1981393bc97SHong Zhang 1991393bc97SHong Zhang /* mark bdiag */ 2001393bc97SHong Zhang nzbd = 0; 2011393bc97SHong Zhang nnz = nzi; 2021393bc97SHong Zhang k = lnk[n]; 2031393bc97SHong Zhang while (nnz-- && k < i) { 2041393bc97SHong Zhang nzbd++; 2051393bc97SHong Zhang k = lnk[k]; 2061393bc97SHong Zhang } 2071393bc97SHong Zhang bdiag[i] = bi[i] + nzbd; 2081393bc97SHong Zhang 2091393bc97SHong Zhang /* if free space is not available, make more free space */ 2101393bc97SHong Zhang if (current_space->local_remaining<nzi) { 2111393bc97SHong Zhang nnz = (n - i)*nzi; /* estimated and max additional space needed */ 212a1a86e44SBarry Smith ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2131393bc97SHong Zhang reallocs++; 2141393bc97SHong Zhang } 2151393bc97SHong Zhang 2161393bc97SHong Zhang /* copy data into free space, then initialize lnk */ 2171393bc97SHong Zhang ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2181393bc97SHong Zhang bi_ptr[i] = current_space->array; 2191393bc97SHong Zhang current_space->array += nzi; 2201393bc97SHong Zhang current_space->local_used += nzi; 2211393bc97SHong Zhang current_space->local_remaining -= nzi; 222289bc588SBarry Smith } 2236cf91177SBarry Smith #if defined(PETSC_USE_INFO) 224464e8d44SSatish Balay if (ai[n] != 0) { 2251393bc97SHong Zhang PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 226ae15b995SBarry Smith ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 227ae15b995SBarry Smith ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 228ae15b995SBarry Smith ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 229ae15b995SBarry Smith ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 23005bf559cSSatish Balay } else { 231ae15b995SBarry Smith ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 23205bf559cSSatish Balay } 23363ba0a88SBarry Smith #endif 2342fb3ed76SBarry Smith 235898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 236898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2371987afe7SBarry Smith 2381393bc97SHong Zhang /* destroy list of free space and other temporary array(s) */ 2391393bc97SHong Zhang ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 240a1a86e44SBarry Smith ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 2411393bc97SHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 24235baf27eSSatish Balay ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 243289bc588SBarry Smith 244289bc588SBarry Smith /* put together the new matrix */ 245719d5645SBarry Smith ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 246719d5645SBarry Smith ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 247719d5645SBarry Smith b = (Mat_SeqAIJ*)(B)->data; 248e6b907acSBarry Smith b->free_a = PETSC_TRUE; 249e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 2507c922b88SBarry Smith b->singlemalloc = PETSC_FALSE; 2511393bc97SHong Zhang ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 2521393bc97SHong Zhang b->j = bj; 2531393bc97SHong Zhang b->i = bi; 2541393bc97SHong Zhang b->diag = bdiag; 255416022c9SBarry Smith b->ilen = 0; 256416022c9SBarry Smith b->imax = 0; 257416022c9SBarry Smith b->row = isrow; 258416022c9SBarry Smith b->col = iscol; 259c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 260c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 26182bf6240SBarry Smith b->icol = isicol; 26287828ca2SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2631393bc97SHong Zhang 2641393bc97SHong Zhang /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 265719d5645SBarry Smith ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2661393bc97SHong Zhang b->maxnz = b->nz = bi[n] ; 2677fd98bd6SLois Curfman McInnes 268d5f3da31SBarry Smith (B)->factortype = MAT_FACTOR_LU; 269719d5645SBarry Smith (B)->info.factor_mallocs = reallocs; 270719d5645SBarry Smith (B)->info.fill_ratio_given = f; 271703deb49SSatish Balay 2729f7d0b68SHong Zhang if (ai[n]) { 273719d5645SBarry Smith (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 274eea2913aSSatish Balay } else { 275719d5645SBarry Smith (B)->info.fill_ratio_needed = 0.0; 276eea2913aSSatish Balay } 277ad04f41aSHong Zhang (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 278f2344125SBarry Smith if (a->inode.size) { 279f2344125SBarry Smith (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 280f2344125SBarry Smith } 2813a40ed3dSBarry Smith PetscFunctionReturn(0); 282289bc588SBarry Smith } 2831393bc97SHong Zhang 284ce3d78c0SShri Abhyankar #undef __FUNCT__ 28535233d99SShri Abhyankar #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 28635233d99SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 287ce3d78c0SShri Abhyankar { 288ce3d78c0SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 289ce3d78c0SShri Abhyankar IS isicol; 290ce3d78c0SShri Abhyankar PetscErrorCode ierr; 291ce3d78c0SShri Abhyankar const PetscInt *r,*ic; 292ce3d78c0SShri Abhyankar PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 293ce3d78c0SShri Abhyankar PetscInt *bi,*bj,*ajtmp; 294ce3d78c0SShri Abhyankar PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 295ce3d78c0SShri Abhyankar PetscReal f; 296ce3d78c0SShri Abhyankar PetscInt nlnk,*lnk,k,**bi_ptr; 297ce3d78c0SShri Abhyankar PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 298ce3d78c0SShri Abhyankar PetscBT lnkbt; 299ce3d78c0SShri Abhyankar 300ce3d78c0SShri Abhyankar PetscFunctionBegin; 3013898ab1dSSatish Balay /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */ 302a4be958cSHong Zhang /* 3031a26ff17SHong Zhang PetscBool olddatastruct=PETSC_FALSE; 304acfcf0e5SJed Brown ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 305628f99d7SShri Abhyankar if (olddatastruct) { 306628f99d7SShri Abhyankar ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr); 307628f99d7SShri Abhyankar PetscFunctionReturn(0); 308628f99d7SShri Abhyankar } 309a4be958cSHong Zhang */ 310e32f2f54SBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 311ce3d78c0SShri Abhyankar ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 312ce3d78c0SShri Abhyankar ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 313ce3d78c0SShri Abhyankar ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 314ce3d78c0SShri Abhyankar 3151bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 3161bfa06eaSJed Brown ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3171bfa06eaSJed Brown ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 318a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 3199b48462bSShri Abhyankar 3209b48462bSShri Abhyankar /* linked list for storing column indices of the active row */ 3219b48462bSShri Abhyankar nlnk = n + 1; 3229b48462bSShri Abhyankar ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3239b48462bSShri Abhyankar 3249b48462bSShri Abhyankar ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 3259b48462bSShri Abhyankar 3269b48462bSShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 3279b48462bSShri Abhyankar f = info->fill; 3289b48462bSShri Abhyankar ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 3299b48462bSShri Abhyankar current_space = free_space; 3309b48462bSShri Abhyankar 3319b48462bSShri Abhyankar for (i=0; i<n; i++) { 3329b48462bSShri Abhyankar /* copy previous fill into linked list */ 3339b48462bSShri Abhyankar nzi = 0; 3349b48462bSShri Abhyankar nnz = ai[r[i]+1] - ai[r[i]]; 335e32f2f54SBarry Smith if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3369b48462bSShri Abhyankar ajtmp = aj + ai[r[i]]; 3379b48462bSShri Abhyankar ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3389b48462bSShri Abhyankar nzi += nlnk; 3399b48462bSShri Abhyankar 3409b48462bSShri Abhyankar /* add pivot rows into linked list */ 3419b48462bSShri Abhyankar row = lnk[n]; 3429b48462bSShri Abhyankar while (row < i) { 3439b48462bSShri Abhyankar nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 3449b48462bSShri Abhyankar ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 3459b48462bSShri Abhyankar ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 3469b48462bSShri Abhyankar nzi += nlnk; 3479b48462bSShri Abhyankar row = lnk[row]; 3489b48462bSShri Abhyankar } 3499b48462bSShri Abhyankar bi[i+1] = bi[i] + nzi; 3509b48462bSShri Abhyankar im[i] = nzi; 3519b48462bSShri Abhyankar 3529b48462bSShri Abhyankar /* mark bdiag */ 3539b48462bSShri Abhyankar nzbd = 0; 3549b48462bSShri Abhyankar nnz = nzi; 3559b48462bSShri Abhyankar k = lnk[n]; 3569b48462bSShri Abhyankar while (nnz-- && k < i) { 3579b48462bSShri Abhyankar nzbd++; 3589b48462bSShri Abhyankar k = lnk[k]; 3599b48462bSShri Abhyankar } 3609b48462bSShri Abhyankar bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 3619b48462bSShri Abhyankar 3629b48462bSShri Abhyankar /* if free space is not available, make more free space */ 3639b48462bSShri Abhyankar if (current_space->local_remaining<nzi) { 3649b48462bSShri Abhyankar nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 3659b48462bSShri Abhyankar ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 3669b48462bSShri Abhyankar reallocs++; 3679b48462bSShri Abhyankar } 3689b48462bSShri Abhyankar 3699b48462bSShri Abhyankar /* copy data into free space, then initialize lnk */ 3709b48462bSShri Abhyankar ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3719b48462bSShri Abhyankar bi_ptr[i] = current_space->array; 3729b48462bSShri Abhyankar current_space->array += nzi; 3739b48462bSShri Abhyankar current_space->local_used += nzi; 3749b48462bSShri Abhyankar current_space->local_remaining -= nzi; 3759b48462bSShri Abhyankar } 3769b48462bSShri Abhyankar 3779b48462bSShri Abhyankar ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3789b48462bSShri Abhyankar ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3799b48462bSShri Abhyankar 3809263d837SHong Zhang /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 3819b48462bSShri Abhyankar ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3822ce24eb6SHong Zhang ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 3839b48462bSShri Abhyankar ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3849b48462bSShri Abhyankar ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 3859b48462bSShri Abhyankar 3869b48462bSShri Abhyankar /* put together the new matrix */ 3879b48462bSShri Abhyankar ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 3889b48462bSShri Abhyankar ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 3899b48462bSShri Abhyankar b = (Mat_SeqAIJ*)(B)->data; 3909b48462bSShri Abhyankar b->free_a = PETSC_TRUE; 3919b48462bSShri Abhyankar b->free_ij = PETSC_TRUE; 3929b48462bSShri Abhyankar b->singlemalloc = PETSC_FALSE; 3939b48462bSShri Abhyankar ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 3949b48462bSShri Abhyankar b->j = bj; 3959b48462bSShri Abhyankar b->i = bi; 3969b48462bSShri Abhyankar b->diag = bdiag; 3979b48462bSShri Abhyankar b->ilen = 0; 3989b48462bSShri Abhyankar b->imax = 0; 3999b48462bSShri Abhyankar b->row = isrow; 4009b48462bSShri Abhyankar b->col = iscol; 4019b48462bSShri Abhyankar ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4029b48462bSShri Abhyankar ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4039b48462bSShri Abhyankar b->icol = isicol; 4049b48462bSShri Abhyankar ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 4059b48462bSShri Abhyankar 4069b48462bSShri Abhyankar /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 4079b48462bSShri Abhyankar ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 4089b48462bSShri Abhyankar b->maxnz = b->nz = bdiag[0]+1; 409d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 410f284f12aSHong Zhang B->info.factor_mallocs = reallocs; 411f284f12aSHong Zhang B->info.fill_ratio_given = f; 4129b48462bSShri Abhyankar 4139b48462bSShri Abhyankar if (ai[n]) { 414f284f12aSHong Zhang B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 4159b48462bSShri Abhyankar } else { 416f284f12aSHong Zhang B->info.fill_ratio_needed = 0.0; 4179b48462bSShri Abhyankar } 4189263d837SHong Zhang #if defined(PETSC_USE_INFO) 4199263d837SHong Zhang if (ai[n] != 0) { 4209263d837SHong Zhang PetscReal af = B->info.fill_ratio_needed; 4219263d837SHong Zhang ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 4229263d837SHong Zhang ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 4239263d837SHong Zhang ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 4249263d837SHong Zhang ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 4259263d837SHong Zhang } else { 4269263d837SHong Zhang ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 4279263d837SHong Zhang } 4289263d837SHong Zhang #endif 42935233d99SShri Abhyankar B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 430f2344125SBarry Smith if (a->inode.size) { 431d3ac4fa3SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 432d3ac4fa3SBarry Smith } 4339b48462bSShri Abhyankar PetscFunctionReturn(0); 4349b48462bSShri Abhyankar } 4359b48462bSShri Abhyankar 4365b5bc046SBarry Smith /* 4375b5bc046SBarry Smith Trouble in factorization, should we dump the original matrix? 4385b5bc046SBarry Smith */ 4395b5bc046SBarry Smith #undef __FUNCT__ 4405b5bc046SBarry Smith #define __FUNCT__ "MatFactorDumpMatrix" 4415b5bc046SBarry Smith PetscErrorCode MatFactorDumpMatrix(Mat A) 4425b5bc046SBarry Smith { 4435b5bc046SBarry Smith PetscErrorCode ierr; 444ace3abfcSBarry Smith PetscBool flg = PETSC_FALSE; 4455b5bc046SBarry Smith 4465b5bc046SBarry Smith PetscFunctionBegin; 447acfcf0e5SJed Brown ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 4485b5bc046SBarry Smith if (flg) { 4495b5bc046SBarry Smith PetscViewer viewer; 4505b5bc046SBarry Smith char filename[PETSC_MAX_PATH_LEN]; 4515b5bc046SBarry Smith 4525b5bc046SBarry Smith ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 4537adad957SLisandro Dalcin ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 4545b5bc046SBarry Smith ierr = MatView(A,viewer);CHKERRQ(ierr); 4556bf464f9SBarry Smith ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4565b5bc046SBarry Smith } 4575b5bc046SBarry Smith PetscFunctionReturn(0); 4585b5bc046SBarry Smith } 4595b5bc046SBarry Smith 460c9c72f8fSHong Zhang #undef __FUNCT__ 46135233d99SShri Abhyankar #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 46235233d99SShri Abhyankar PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 463c9c72f8fSHong Zhang { 464c9c72f8fSHong Zhang Mat C=B; 465c9c72f8fSHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 466c9c72f8fSHong Zhang IS isrow = b->row,isicol = b->icol; 467c9c72f8fSHong Zhang PetscErrorCode ierr; 468c9c72f8fSHong Zhang const PetscInt *r,*ic,*ics; 469bbd65245SShri Abhyankar const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 470bbd65245SShri Abhyankar PetscInt i,j,k,nz,nzL,row,*pj; 471bbd65245SShri Abhyankar const PetscInt *ajtmp,*bjtmp; 472bbd65245SShri Abhyankar MatScalar *rtmp,*pc,multiplier,*pv; 473bbd65245SShri Abhyankar const MatScalar *aa=a->a,*v; 474ace3abfcSBarry Smith PetscBool row_identity,col_identity; 475d90ac83dSHong Zhang FactorShiftCtx sctx; 4764f81c4b7SBarry Smith const PetscInt *ddiag; 477d4ad06f7SHong Zhang PetscReal rs; 478d4ad06f7SHong Zhang MatScalar d; 479d4ad06f7SHong Zhang 480c9c72f8fSHong Zhang PetscFunctionBegin; 481d6e5152cSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 482d90ac83dSHong Zhang ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 483d4ad06f7SHong Zhang 484f4db908eSBarry Smith if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 485d4ad06f7SHong Zhang ddiag = a->diag; 486d4ad06f7SHong Zhang sctx.shift_top = info->zeropivot; 487d4ad06f7SHong Zhang for (i=0; i<n; i++) { 488d4ad06f7SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 489d4ad06f7SHong Zhang d = (aa)[ddiag[i]]; 490d4ad06f7SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 491d4ad06f7SHong Zhang v = aa+ai[i]; 492d4ad06f7SHong Zhang nz = ai[i+1] - ai[i]; 493d4ad06f7SHong Zhang for (j=0; j<nz; j++) 494d4ad06f7SHong Zhang rs += PetscAbsScalar(v[j]); 495d4ad06f7SHong Zhang if (rs>sctx.shift_top) sctx.shift_top = rs; 496d4ad06f7SHong Zhang } 497d4ad06f7SHong Zhang sctx.shift_top *= 1.1; 498d4ad06f7SHong Zhang sctx.nshift_max = 5; 499d4ad06f7SHong Zhang sctx.shift_lo = 0.; 500d4ad06f7SHong Zhang sctx.shift_hi = 1.; 501d4ad06f7SHong Zhang } 502d4ad06f7SHong Zhang 503c9c72f8fSHong Zhang ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 504c9c72f8fSHong Zhang ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 505c9c72f8fSHong Zhang ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 506c9c72f8fSHong Zhang ics = ic; 507c9c72f8fSHong Zhang 508d4ad06f7SHong Zhang do { 50907b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 510c9c72f8fSHong Zhang for (i=0; i<n; i++) { 511c9c72f8fSHong Zhang /* zero rtmp */ 512c9c72f8fSHong Zhang /* L part */ 513c9c72f8fSHong Zhang nz = bi[i+1] - bi[i]; 514c9c72f8fSHong Zhang bjtmp = bj + bi[i]; 515c9c72f8fSHong Zhang for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 516c9c72f8fSHong Zhang 517c9c72f8fSHong Zhang /* U part */ 51862fbd6afSShri Abhyankar nz = bdiag[i]-bdiag[i+1]; 51962fbd6afSShri Abhyankar bjtmp = bj + bdiag[i+1]+1; 520813a1f2bSShri Abhyankar for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 521813a1f2bSShri Abhyankar 522813a1f2bSShri Abhyankar /* load in initial (unfactored row) */ 523813a1f2bSShri Abhyankar nz = ai[r[i]+1] - ai[r[i]]; 524813a1f2bSShri Abhyankar ajtmp = aj + ai[r[i]]; 525813a1f2bSShri Abhyankar v = aa + ai[r[i]]; 526813a1f2bSShri Abhyankar for (j=0; j<nz; j++) { 527813a1f2bSShri Abhyankar rtmp[ics[ajtmp[j]]] = v[j]; 528813a1f2bSShri Abhyankar } 529813a1f2bSShri Abhyankar /* ZeropivotApply() */ 53098a93161SHong Zhang rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 531813a1f2bSShri Abhyankar 532813a1f2bSShri Abhyankar /* elimination */ 533813a1f2bSShri Abhyankar bjtmp = bj + bi[i]; 534813a1f2bSShri Abhyankar row = *bjtmp++; 535813a1f2bSShri Abhyankar nzL = bi[i+1] - bi[i]; 536f268cba8SShri Abhyankar for (k=0; k < nzL;k++) { 537813a1f2bSShri Abhyankar pc = rtmp + row; 538813a1f2bSShri Abhyankar if (*pc != 0.0) { 539813a1f2bSShri Abhyankar pv = b->a + bdiag[row]; 540813a1f2bSShri Abhyankar multiplier = *pc * (*pv); 541813a1f2bSShri Abhyankar *pc = multiplier; 54262fbd6afSShri Abhyankar pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 54362fbd6afSShri Abhyankar pv = b->a + bdiag[row+1]+1; 54462fbd6afSShri Abhyankar nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 545813a1f2bSShri Abhyankar for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 54628b1a77fSLisandro Dalcin ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 547813a1f2bSShri Abhyankar } 548f268cba8SShri Abhyankar row = *bjtmp++; 549813a1f2bSShri Abhyankar } 550813a1f2bSShri Abhyankar 551813a1f2bSShri Abhyankar /* finished row so stick it into b->a */ 552813a1f2bSShri Abhyankar rs = 0.0; 553813a1f2bSShri Abhyankar /* L part */ 554813a1f2bSShri Abhyankar pv = b->a + bi[i] ; 555813a1f2bSShri Abhyankar pj = b->j + bi[i] ; 556813a1f2bSShri Abhyankar nz = bi[i+1] - bi[i]; 557813a1f2bSShri Abhyankar for (j=0; j<nz; j++) { 558813a1f2bSShri Abhyankar pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 559813a1f2bSShri Abhyankar } 560813a1f2bSShri Abhyankar 561813a1f2bSShri Abhyankar /* U part */ 56262fbd6afSShri Abhyankar pv = b->a + bdiag[i+1]+1; 56362fbd6afSShri Abhyankar pj = b->j + bdiag[i+1]+1; 56462fbd6afSShri Abhyankar nz = bdiag[i] - bdiag[i+1]-1; 565813a1f2bSShri Abhyankar for (j=0; j<nz; j++) { 566813a1f2bSShri Abhyankar pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 567813a1f2bSShri Abhyankar } 568813a1f2bSShri Abhyankar 569813a1f2bSShri Abhyankar sctx.rs = rs; 570813a1f2bSShri Abhyankar sctx.pv = rtmp[i]; 571d0660788SBarry Smith ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 57207b50cabSHong Zhang if (sctx.newshift) break; /* break for-loop */ 57307b50cabSHong Zhang rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 574813a1f2bSShri Abhyankar 575813a1f2bSShri Abhyankar /* Mark diagonal and invert diagonal for simplier triangular solves */ 576813a1f2bSShri Abhyankar pv = b->a + bdiag[i]; 577813a1f2bSShri Abhyankar *pv = 1.0/rtmp[i]; 578813a1f2bSShri Abhyankar 579813a1f2bSShri Abhyankar } /* endof for (i=0; i<n; i++) { */ 580813a1f2bSShri Abhyankar 5818ff23777SHong Zhang /* MatPivotRefine() */ 58207b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 583813a1f2bSShri Abhyankar /* 584813a1f2bSShri Abhyankar * if no shift in this attempt & shifting & started shifting & can refine, 585813a1f2bSShri Abhyankar * then try lower shift 586813a1f2bSShri Abhyankar */ 587813a1f2bSShri Abhyankar sctx.shift_hi = sctx.shift_fraction; 588813a1f2bSShri Abhyankar sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 589813a1f2bSShri Abhyankar sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 59007b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 591813a1f2bSShri Abhyankar sctx.nshift++; 592813a1f2bSShri Abhyankar } 59307b50cabSHong Zhang } while (sctx.newshift); 594813a1f2bSShri Abhyankar 595813a1f2bSShri Abhyankar ierr = PetscFree(rtmp);CHKERRQ(ierr); 596813a1f2bSShri Abhyankar ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 597813a1f2bSShri Abhyankar ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 598d3ac4fa3SBarry Smith 599813a1f2bSShri Abhyankar ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 600813a1f2bSShri Abhyankar ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 601813a1f2bSShri Abhyankar if (row_identity && col_identity) { 60235233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 603813a1f2bSShri Abhyankar } else { 60435233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ; 605813a1f2bSShri Abhyankar } 60635233d99SShri Abhyankar C->ops->solveadd = MatSolveAdd_SeqAIJ; 60735233d99SShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 60835233d99SShri Abhyankar C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 60935233d99SShri Abhyankar C->ops->matsolve = MatMatSolve_SeqAIJ; 610813a1f2bSShri Abhyankar C->assembled = PETSC_TRUE; 611813a1f2bSShri Abhyankar C->preallocated = PETSC_TRUE; 612813a1f2bSShri Abhyankar ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 613813a1f2bSShri Abhyankar 61414d2772eSHong Zhang /* MatShiftView(A,info,&sctx) */ 615813a1f2bSShri Abhyankar if (sctx.nshift) { 616f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 617813a1f2bSShri Abhyankar ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 618f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 619813a1f2bSShri Abhyankar ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 620f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 621915743fcSHong Zhang ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 622813a1f2bSShri Abhyankar } 623813a1f2bSShri Abhyankar } 624d3ac4fa3SBarry Smith ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr); 625813a1f2bSShri Abhyankar PetscFunctionReturn(0); 626813a1f2bSShri Abhyankar } 627813a1f2bSShri Abhyankar 6284a2ae208SSatish Balay #undef __FUNCT__ 629ad04f41aSHong Zhang #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace" 630ad04f41aSHong Zhang PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 631289bc588SBarry Smith { 632719d5645SBarry Smith Mat C=B; 633416022c9SBarry Smith Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 63482bf6240SBarry Smith IS isrow = b->row,isicol = b->icol; 6356849ba73SBarry Smith PetscErrorCode ierr; 6365d0c19d7SBarry Smith const PetscInt *r,*ic,*ics; 637d278edc7SBarry Smith PetscInt nz,row,i,j,n=A->rmap->n,diag; 638d278edc7SBarry Smith const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 639d278edc7SBarry Smith const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 640d278edc7SBarry Smith MatScalar *pv,*rtmp,*pc,multiplier,d; 641d278edc7SBarry Smith const MatScalar *v,*aa=a->a; 64233f9893dSHong Zhang PetscReal rs=0.0; 643d90ac83dSHong Zhang FactorShiftCtx sctx; 6444f81c4b7SBarry Smith const PetscInt *ddiag; 645ace3abfcSBarry Smith PetscBool row_identity, col_identity; 646289bc588SBarry Smith 6473a40ed3dSBarry Smith PetscFunctionBegin; 648504a3fadSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 649504a3fadSHong Zhang ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 650ada7143aSSatish Balay 651f4db908eSBarry Smith if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 65242898933SHong Zhang ddiag = a->diag; 6539f7d0b68SHong Zhang sctx.shift_top = info->zeropivot; 6546cc28720Svictorle for (i=0; i<n; i++) { 6559f95998fSHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 6569f7d0b68SHong Zhang d = (aa)[ddiag[i]]; 6571a890ab1SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 6589f7d0b68SHong Zhang v = aa+ai[i]; 6599f7d0b68SHong Zhang nz = ai[i+1] - ai[i]; 660e24cfe64SHong Zhang for (j=0; j<nz; j++) 6611a890ab1SHong Zhang rs += PetscAbsScalar(v[j]); 6621a890ab1SHong Zhang if (rs>sctx.shift_top) sctx.shift_top = rs; 6636cc28720Svictorle } 664118f5789SBarry Smith sctx.shift_top *= 1.1; 665d2276718SHong Zhang sctx.nshift_max = 5; 666d2276718SHong Zhang sctx.shift_lo = 0.; 667d2276718SHong Zhang sctx.shift_hi = 1.; 668a0a356efSHong Zhang } 669a0a356efSHong Zhang 670504a3fadSHong Zhang ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 671504a3fadSHong Zhang ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 672504a3fadSHong Zhang ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 673504a3fadSHong Zhang ics = ic; 674504a3fadSHong Zhang 675085256b3SBarry Smith do { 67607b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 677289bc588SBarry Smith for (i=0; i<n; i++) { 678b3bf805bSHong Zhang nz = bi[i+1] - bi[i]; 679b3bf805bSHong Zhang bjtmp = bj + bi[i]; 6809f7d0b68SHong Zhang for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 681289bc588SBarry Smith 682289bc588SBarry Smith /* load in initial (unfactored row) */ 6839f7d0b68SHong Zhang nz = ai[r[i]+1] - ai[r[i]]; 6849f7d0b68SHong Zhang ajtmp = aj + ai[r[i]]; 6859f7d0b68SHong Zhang v = aa + ai[r[i]]; 686085256b3SBarry Smith for (j=0; j<nz; j++) { 687b3bf805bSHong Zhang rtmp[ics[ajtmp[j]]] = v[j]; 688085256b3SBarry Smith } 689d2276718SHong Zhang rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 690289bc588SBarry Smith 691b3bf805bSHong Zhang row = *bjtmp++; 692f2582111SSatish Balay while (row < i) { 6938c37ef55SBarry Smith pc = rtmp + row; 6948c37ef55SBarry Smith if (*pc != 0.0) { 695010a20caSHong Zhang pv = b->a + diag_offset[row]; 696010a20caSHong Zhang pj = b->j + diag_offset[row] + 1; 69735aab85fSBarry Smith multiplier = *pc / *pv++; 6988c37ef55SBarry Smith *pc = multiplier; 699b3bf805bSHong Zhang nz = bi[row+1] - diag_offset[row] - 1; 7009f7d0b68SHong Zhang for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 70128b1a77fSLisandro Dalcin ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 702289bc588SBarry Smith } 703b3bf805bSHong Zhang row = *bjtmp++; 704289bc588SBarry Smith } 705416022c9SBarry Smith /* finished row so stick it into b->a */ 706b3bf805bSHong Zhang pv = b->a + bi[i] ; 707b3bf805bSHong Zhang pj = b->j + bi[i] ; 708b3bf805bSHong Zhang nz = bi[i+1] - bi[i]; 709b3bf805bSHong Zhang diag = diag_offset[i] - bi[i]; 710e57ff13aSBarry Smith rs = 0.0; 711d3d32019SBarry Smith for (j=0; j<nz; j++) { 7129f7d0b68SHong Zhang pv[j] = rtmp[pj[j]]; 7139f7d0b68SHong Zhang rs += PetscAbsScalar(pv[j]); 714d3d32019SBarry Smith } 715e57ff13aSBarry Smith rs -= PetscAbsScalar(pv[diag]); 716d2276718SHong Zhang 717d2276718SHong Zhang sctx.rs = rs; 718d2276718SHong Zhang sctx.pv = pv[diag]; 719d0660788SBarry Smith ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 72007b50cabSHong Zhang if (sctx.newshift) break; 721504a3fadSHong Zhang pv[diag] = sctx.pv; 72235aab85fSBarry Smith } 723d2276718SHong Zhang 72407b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 7256cc28720Svictorle /* 7266c037d1bSvictorle * if no shift in this attempt & shifting & started shifting & can refine, 7276cc28720Svictorle * then try lower shift 7286cc28720Svictorle */ 7290481f469SBarry Smith sctx.shift_hi = sctx.shift_fraction; 7300481f469SBarry Smith sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 7310481f469SBarry Smith sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 73207b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 733d2276718SHong Zhang sctx.nshift++; 7346cc28720Svictorle } 73507b50cabSHong Zhang } while (sctx.newshift); 7360f11f9aeSSatish Balay 73735aab85fSBarry Smith /* invert diagonal entries for simplier triangular solves */ 73835aab85fSBarry Smith for (i=0; i<n; i++) { 739010a20caSHong Zhang b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 74035aab85fSBarry Smith } 741606d414cSSatish Balay ierr = PetscFree(rtmp);CHKERRQ(ierr); 74278b31e54SBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 74378b31e54SBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 744d3ac4fa3SBarry Smith 7458d8c7c43SBarry Smith ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 7468d8c7c43SBarry Smith ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 7478d8c7c43SBarry Smith if (row_identity && col_identity) { 748ad04f41aSHong Zhang C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 7498d8c7c43SBarry Smith } else { 750ad04f41aSHong Zhang C->ops->solve = MatSolve_SeqAIJ_inplace; 751db4efbfdSBarry Smith } 752ad04f41aSHong Zhang C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 753ad04f41aSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 754ad04f41aSHong Zhang C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 755ad04f41aSHong Zhang C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 756c456f294SBarry Smith C->assembled = PETSC_TRUE; 7575c9eb25fSBarry Smith C->preallocated = PETSC_TRUE; 758d0f46423SBarry Smith ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 759d2276718SHong Zhang if (sctx.nshift) { 760f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 7610481f469SBarry Smith ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 762f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 763e738e422SBarry Smith ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 7646cc28720Svictorle } 7650697b6caSHong Zhang } 766d3ac4fa3SBarry Smith (C)->ops->solve = MatSolve_SeqAIJ_inplace; 767d3ac4fa3SBarry Smith (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 768d3ac4fa3SBarry Smith ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr); 7693a40ed3dSBarry Smith PetscFunctionReturn(0); 770289bc588SBarry Smith } 7710889b5dcSvictorle 772137fb511SHong Zhang /* 773137fb511SHong Zhang This routine implements inplace ILU(0) with row or/and column permutations. 774137fb511SHong Zhang Input: 775137fb511SHong Zhang A - original matrix 776137fb511SHong Zhang Output; 777137fb511SHong Zhang A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 778137fb511SHong Zhang a->j (col index) is permuted by the inverse of colperm, then sorted 779137fb511SHong Zhang a->a reordered accordingly with a->j 780137fb511SHong Zhang a->diag (ptr to diagonal elements) is updated. 781137fb511SHong Zhang */ 782137fb511SHong Zhang #undef __FUNCT__ 783137fb511SHong Zhang #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 7840481f469SBarry Smith PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 785137fb511SHong Zhang { 786b051339dSHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 787b051339dSHong Zhang IS isrow = a->row,isicol = a->icol; 788137fb511SHong Zhang PetscErrorCode ierr; 7895d0c19d7SBarry Smith const PetscInt *r,*ic,*ics; 7905d0c19d7SBarry Smith PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 7915d0c19d7SBarry Smith PetscInt *ajtmp,nz,row; 792b051339dSHong Zhang PetscInt *diag = a->diag,nbdiag,*pj; 793a77337e4SBarry Smith PetscScalar *rtmp,*pc,multiplier,d; 794504a3fadSHong Zhang MatScalar *pv,*v; 795137fb511SHong Zhang PetscReal rs; 796d90ac83dSHong Zhang FactorShiftCtx sctx; 797504a3fadSHong Zhang const MatScalar *aa=a->a,*vtmp; 798137fb511SHong Zhang 799137fb511SHong Zhang PetscFunctionBegin; 800e32f2f54SBarry Smith if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 801504a3fadSHong Zhang 802504a3fadSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 803504a3fadSHong Zhang ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 804504a3fadSHong Zhang 805504a3fadSHong Zhang if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 806504a3fadSHong Zhang const PetscInt *ddiag = a->diag; 807504a3fadSHong Zhang sctx.shift_top = info->zeropivot; 808504a3fadSHong Zhang for (i=0; i<n; i++) { 809504a3fadSHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 810504a3fadSHong Zhang d = (aa)[ddiag[i]]; 811504a3fadSHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 812504a3fadSHong Zhang vtmp = aa+ai[i]; 813504a3fadSHong Zhang nz = ai[i+1] - ai[i]; 814504a3fadSHong Zhang for (j=0; j<nz; j++) 815504a3fadSHong Zhang rs += PetscAbsScalar(vtmp[j]); 816504a3fadSHong Zhang if (rs>sctx.shift_top) sctx.shift_top = rs; 817504a3fadSHong Zhang } 818504a3fadSHong Zhang sctx.shift_top *= 1.1; 819504a3fadSHong Zhang sctx.nshift_max = 5; 820504a3fadSHong Zhang sctx.shift_lo = 0.; 821504a3fadSHong Zhang sctx.shift_hi = 1.; 822504a3fadSHong Zhang } 823504a3fadSHong Zhang 824137fb511SHong Zhang ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 825137fb511SHong Zhang ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 826137fb511SHong Zhang ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 827137fb511SHong Zhang ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 828b051339dSHong Zhang ics = ic; 829137fb511SHong Zhang 830504a3fadSHong Zhang #if defined(MV) 83175567043SBarry Smith sctx.shift_top = 0.; 832e60cf9a0SBarry Smith sctx.nshift_max = 0; 83375567043SBarry Smith sctx.shift_lo = 0.; 83475567043SBarry Smith sctx.shift_hi = 0.; 83575567043SBarry Smith sctx.shift_fraction = 0.; 836137fb511SHong Zhang 837f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 83875567043SBarry Smith sctx.shift_top = 0.; 839137fb511SHong Zhang for (i=0; i<n; i++) { 840137fb511SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 841b051339dSHong Zhang d = (a->a)[diag[i]]; 842137fb511SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 843b051339dSHong Zhang v = a->a+ai[i]; 844b051339dSHong Zhang nz = ai[i+1] - ai[i]; 845137fb511SHong Zhang for (j=0; j<nz; j++) 846137fb511SHong Zhang rs += PetscAbsScalar(v[j]); 847137fb511SHong Zhang if (rs>sctx.shift_top) sctx.shift_top = rs; 848137fb511SHong Zhang } 849137fb511SHong Zhang if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 850137fb511SHong Zhang sctx.shift_top *= 1.1; 851137fb511SHong Zhang sctx.nshift_max = 5; 852137fb511SHong Zhang sctx.shift_lo = 0.; 853137fb511SHong Zhang sctx.shift_hi = 1.; 854137fb511SHong Zhang } 855137fb511SHong Zhang 85675567043SBarry Smith sctx.shift_amount = 0.; 857137fb511SHong Zhang sctx.nshift = 0; 858504a3fadSHong Zhang #endif 859504a3fadSHong Zhang 860137fb511SHong Zhang do { 86107b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 862137fb511SHong Zhang for (i=0; i<n; i++) { 863b051339dSHong Zhang /* load in initial unfactored row */ 864b051339dSHong Zhang nz = ai[r[i]+1] - ai[r[i]]; 865b051339dSHong Zhang ajtmp = aj + ai[r[i]]; 866b051339dSHong Zhang v = a->a + ai[r[i]]; 867137fb511SHong Zhang /* sort permuted ajtmp and values v accordingly */ 868b051339dSHong Zhang for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 869137fb511SHong Zhang ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 870137fb511SHong Zhang 871b051339dSHong Zhang diag[r[i]] = ai[r[i]]; 872137fb511SHong Zhang for (j=0; j<nz; j++) { 873137fb511SHong Zhang rtmp[ajtmp[j]] = v[j]; 874b051339dSHong Zhang if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 875137fb511SHong Zhang } 876137fb511SHong Zhang rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 877137fb511SHong Zhang 878b051339dSHong Zhang row = *ajtmp++; 879137fb511SHong Zhang while (row < i) { 880137fb511SHong Zhang pc = rtmp + row; 881137fb511SHong Zhang if (*pc != 0.0) { 882b051339dSHong Zhang pv = a->a + diag[r[row]]; 883b051339dSHong Zhang pj = aj + diag[r[row]] + 1; 884137fb511SHong Zhang 885137fb511SHong Zhang multiplier = *pc / *pv++; 886137fb511SHong Zhang *pc = multiplier; 887b051339dSHong Zhang nz = ai[r[row]+1] - diag[r[row]] - 1; 888b051339dSHong Zhang for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 88928b1a77fSLisandro Dalcin ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 890137fb511SHong Zhang } 891b051339dSHong Zhang row = *ajtmp++; 892137fb511SHong Zhang } 893b051339dSHong Zhang /* finished row so overwrite it onto a->a */ 894b051339dSHong Zhang pv = a->a + ai[r[i]] ; 895b051339dSHong Zhang pj = aj + ai[r[i]] ; 896b051339dSHong Zhang nz = ai[r[i]+1] - ai[r[i]]; 897b051339dSHong Zhang nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 898137fb511SHong Zhang 899137fb511SHong Zhang rs = 0.0; 900137fb511SHong Zhang for (j=0; j<nz; j++) { 901b051339dSHong Zhang pv[j] = rtmp[pj[j]]; 902b051339dSHong Zhang if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 903137fb511SHong Zhang } 904137fb511SHong Zhang 905137fb511SHong Zhang sctx.rs = rs; 906b051339dSHong Zhang sctx.pv = pv[nbdiag]; 907d0660788SBarry Smith ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 90807b50cabSHong Zhang if (sctx.newshift) break; 909504a3fadSHong Zhang pv[nbdiag] = sctx.pv; 910137fb511SHong Zhang } 911137fb511SHong Zhang 91207b50cabSHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 913137fb511SHong Zhang /* 914137fb511SHong Zhang * if no shift in this attempt & shifting & started shifting & can refine, 915137fb511SHong Zhang * then try lower shift 916137fb511SHong Zhang */ 9170481f469SBarry Smith sctx.shift_hi = sctx.shift_fraction; 9180481f469SBarry Smith sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 9190481f469SBarry Smith sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 92007b50cabSHong Zhang sctx.newshift = PETSC_TRUE; 921137fb511SHong Zhang sctx.nshift++; 922137fb511SHong Zhang } 92307b50cabSHong Zhang } while (sctx.newshift); 924137fb511SHong Zhang 925137fb511SHong Zhang /* invert diagonal entries for simplier triangular solves */ 926137fb511SHong Zhang for (i=0; i<n; i++) { 927b051339dSHong Zhang a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 928137fb511SHong Zhang } 929137fb511SHong Zhang 930137fb511SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 931137fb511SHong Zhang ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 932137fb511SHong Zhang ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 933b051339dSHong Zhang A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 934ad04f41aSHong Zhang A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 935ad04f41aSHong Zhang A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 936ad04f41aSHong Zhang A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 937b051339dSHong Zhang A->assembled = PETSC_TRUE; 9385c9eb25fSBarry Smith A->preallocated = PETSC_TRUE; 939d0f46423SBarry Smith ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 940137fb511SHong Zhang if (sctx.nshift) { 941f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 9420481f469SBarry Smith ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 943f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 944e738e422SBarry Smith ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 945137fb511SHong Zhang } 946137fb511SHong Zhang } 947137fb511SHong Zhang PetscFunctionReturn(0); 948137fb511SHong Zhang } 949137fb511SHong Zhang 9500a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */ 9514a2ae208SSatish Balay #undef __FUNCT__ 9524a2ae208SSatish Balay #define __FUNCT__ "MatLUFactor_SeqAIJ" 9530481f469SBarry Smith PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 954da3a660dSBarry Smith { 955dfbe8321SBarry Smith PetscErrorCode ierr; 956416022c9SBarry Smith Mat C; 957416022c9SBarry Smith 9583a40ed3dSBarry Smith PetscFunctionBegin; 9592692d6eeSBarry Smith ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 960719d5645SBarry Smith ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 961719d5645SBarry Smith ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 962db4efbfdSBarry Smith A->ops->solve = C->ops->solve; 963db4efbfdSBarry Smith A->ops->solvetranspose = C->ops->solvetranspose; 964eb6b5d47SBarry Smith ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 96552e6d16bSBarry Smith ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 9663a40ed3dSBarry Smith PetscFunctionReturn(0); 967da3a660dSBarry Smith } 9680a6dbcc7SLois Curfman McInnes /* ----------------------------------------------------------- */ 9691d098868SHong Zhang 970889a8dedSBarry Smith 9714a2ae208SSatish Balay #undef __FUNCT__ 972ad04f41aSHong Zhang #define __FUNCT__ "MatSolve_SeqAIJ_inplace" 973ad04f41aSHong Zhang PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 9748c37ef55SBarry Smith { 975416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 976416022c9SBarry Smith IS iscol = a->col,isrow = a->row; 9776849ba73SBarry Smith PetscErrorCode ierr; 9785d0c19d7SBarry Smith PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 9795d0c19d7SBarry Smith PetscInt nz; 9805d0c19d7SBarry Smith const PetscInt *rout,*cout,*r,*c; 981dd6ea824SBarry Smith PetscScalar *x,*tmp,*tmps,sum; 982d9fead3dSBarry Smith const PetscScalar *b; 983dd6ea824SBarry Smith const MatScalar *aa = a->a,*v; 9848c37ef55SBarry Smith 9853a40ed3dSBarry Smith PetscFunctionBegin; 9863a40ed3dSBarry Smith if (!n) PetscFunctionReturn(0); 98791bf9adeSBarry Smith 9883649974fSBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 9891ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 990416022c9SBarry Smith tmp = a->solve_work; 9918c37ef55SBarry Smith 9923b2fbd54SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 9933b2fbd54SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 9948c37ef55SBarry Smith 9958c37ef55SBarry Smith /* forward solve the lower triangular */ 9968c37ef55SBarry Smith tmp[0] = b[*r++]; 997010a20caSHong Zhang tmps = tmp; 9988c37ef55SBarry Smith for (i=1; i<n; i++) { 999010a20caSHong Zhang v = aa + ai[i] ; 1000010a20caSHong Zhang vi = aj + ai[i] ; 100153e7425aSBarry Smith nz = a->diag[i] - ai[i]; 100253e7425aSBarry Smith sum = b[*r++]; 1003003131ecSBarry Smith PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 10048c37ef55SBarry Smith tmp[i] = sum; 10058c37ef55SBarry Smith } 10068c37ef55SBarry Smith 10078c37ef55SBarry Smith /* backward solve the upper triangular */ 10088c37ef55SBarry Smith for (i=n-1; i>=0; i--) { 1009010a20caSHong Zhang v = aa + a->diag[i] + 1; 1010010a20caSHong Zhang vi = aj + a->diag[i] + 1; 1011416022c9SBarry Smith nz = ai[i+1] - a->diag[i] - 1; 10128c37ef55SBarry Smith sum = tmp[i]; 1013003131ecSBarry Smith PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1014010a20caSHong Zhang x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 10158c37ef55SBarry Smith } 10168c37ef55SBarry Smith 10173b2fbd54SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 10183b2fbd54SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 10193649974fSBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 10201ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1021dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 10223a40ed3dSBarry Smith PetscFunctionReturn(0); 10238c37ef55SBarry Smith } 1024026e39d0SSatish Balay 10252bebee5dSHong Zhang #undef __FUNCT__ 1026ad04f41aSHong Zhang #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace" 1027ad04f41aSHong Zhang PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 10282bebee5dSHong Zhang { 10292bebee5dSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 10302bebee5dSHong Zhang IS iscol = a->col,isrow = a->row; 10312bebee5dSHong Zhang PetscErrorCode ierr; 10325d0c19d7SBarry Smith PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 10335d0c19d7SBarry Smith PetscInt nz,neq; 10345d0c19d7SBarry Smith const PetscInt *rout,*cout,*r,*c; 1035dd6ea824SBarry Smith PetscScalar *x,*b,*tmp,*tmps,sum; 1036dd6ea824SBarry Smith const MatScalar *aa = a->a,*v; 1037ace3abfcSBarry Smith PetscBool bisdense,xisdense; 10382bebee5dSHong Zhang 10392bebee5dSHong Zhang PetscFunctionBegin; 10402bebee5dSHong Zhang if (!n) PetscFunctionReturn(0); 10412bebee5dSHong Zhang 1042251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1043e32f2f54SBarry Smith if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1044251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1045e32f2f54SBarry Smith if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1046db4efbfdSBarry Smith 10478c778c55SBarry Smith ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 10488c778c55SBarry Smith ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 10492bebee5dSHong Zhang 10502bebee5dSHong Zhang tmp = a->solve_work; 10512bebee5dSHong Zhang ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 10522bebee5dSHong Zhang ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 10532bebee5dSHong Zhang 1054a36b22ccSSatish Balay for (neq=0; neq<B->cmap->n; neq++) { 10552bebee5dSHong Zhang /* forward solve the lower triangular */ 10562bebee5dSHong Zhang tmp[0] = b[r[0]]; 10572bebee5dSHong Zhang tmps = tmp; 10582bebee5dSHong Zhang for (i=1; i<n; i++) { 10592bebee5dSHong Zhang v = aa + ai[i] ; 10602bebee5dSHong Zhang vi = aj + ai[i] ; 10612bebee5dSHong Zhang nz = a->diag[i] - ai[i]; 10622bebee5dSHong Zhang sum = b[r[i]]; 1063003131ecSBarry Smith PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 10642bebee5dSHong Zhang tmp[i] = sum; 10652bebee5dSHong Zhang } 10662bebee5dSHong Zhang /* backward solve the upper triangular */ 10672bebee5dSHong Zhang for (i=n-1; i>=0; i--) { 10682bebee5dSHong Zhang v = aa + a->diag[i] + 1; 10692bebee5dSHong Zhang vi = aj + a->diag[i] + 1; 10702bebee5dSHong Zhang nz = ai[i+1] - a->diag[i] - 1; 10712bebee5dSHong Zhang sum = tmp[i]; 1072003131ecSBarry Smith PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 10732bebee5dSHong Zhang x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 10742bebee5dSHong Zhang } 10752bebee5dSHong Zhang 10762bebee5dSHong Zhang b += n; 10772bebee5dSHong Zhang x += n; 10782bebee5dSHong Zhang } 10792bebee5dSHong Zhang ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 10802bebee5dSHong Zhang ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 10818c778c55SBarry Smith ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 10828c778c55SBarry Smith ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1083dc0b31edSSatish Balay ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 10842bebee5dSHong Zhang PetscFunctionReturn(0); 10852bebee5dSHong Zhang } 10862bebee5dSHong Zhang 1087137fb511SHong Zhang #undef __FUNCT__ 108835233d99SShri Abhyankar #define __FUNCT__ "MatMatSolve_SeqAIJ" 108935233d99SShri Abhyankar PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 10909bd0847aSShri Abhyankar { 10919bd0847aSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 10929bd0847aSShri Abhyankar IS iscol = a->col,isrow = a->row; 10939bd0847aSShri Abhyankar PetscErrorCode ierr; 10949bd0847aSShri Abhyankar PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 10959bd0847aSShri Abhyankar PetscInt nz,neq; 10969bd0847aSShri Abhyankar const PetscInt *rout,*cout,*r,*c; 10979bd0847aSShri Abhyankar PetscScalar *x,*b,*tmp,sum; 10989bd0847aSShri Abhyankar const MatScalar *aa = a->a,*v; 1099ace3abfcSBarry Smith PetscBool bisdense,xisdense; 11009bd0847aSShri Abhyankar 11019bd0847aSShri Abhyankar PetscFunctionBegin; 11029bd0847aSShri Abhyankar if (!n) PetscFunctionReturn(0); 11039bd0847aSShri Abhyankar 1104251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1105e32f2f54SBarry Smith if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1106251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1107e32f2f54SBarry Smith if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 11089bd0847aSShri Abhyankar 11098c778c55SBarry Smith ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 11108c778c55SBarry Smith ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 11119bd0847aSShri Abhyankar 11129bd0847aSShri Abhyankar tmp = a->solve_work; 11139bd0847aSShri Abhyankar ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 11149bd0847aSShri Abhyankar ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 11159bd0847aSShri Abhyankar 11169bd0847aSShri Abhyankar for (neq=0; neq<B->cmap->n; neq++) { 11179bd0847aSShri Abhyankar /* forward solve the lower triangular */ 11189bd0847aSShri Abhyankar tmp[0] = b[r[0]]; 11199bd0847aSShri Abhyankar v = aa; 11209bd0847aSShri Abhyankar vi = aj; 11219bd0847aSShri Abhyankar for (i=1; i<n; i++) { 11229bd0847aSShri Abhyankar nz = ai[i+1] - ai[i]; 11239bd0847aSShri Abhyankar sum = b[r[i]]; 11249bd0847aSShri Abhyankar PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 11259bd0847aSShri Abhyankar tmp[i] = sum; 11269bd0847aSShri Abhyankar v += nz; vi += nz; 11279bd0847aSShri Abhyankar } 11289bd0847aSShri Abhyankar 11299bd0847aSShri Abhyankar /* backward solve the upper triangular */ 11309bd0847aSShri Abhyankar for (i=n-1; i>=0; i--) { 11319bd0847aSShri Abhyankar v = aa + adiag[i+1]+1; 11329bd0847aSShri Abhyankar vi = aj + adiag[i+1]+1; 11339bd0847aSShri Abhyankar nz = adiag[i]-adiag[i+1]-1; 11349bd0847aSShri Abhyankar sum = tmp[i]; 11359bd0847aSShri Abhyankar PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 11369bd0847aSShri Abhyankar x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 11379bd0847aSShri Abhyankar } 11389bd0847aSShri Abhyankar 11399bd0847aSShri Abhyankar b += n; 11409bd0847aSShri Abhyankar x += n; 11419bd0847aSShri Abhyankar } 11429bd0847aSShri Abhyankar ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 11439bd0847aSShri Abhyankar ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 11448c778c55SBarry Smith ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 11458c778c55SBarry Smith ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 11469bd0847aSShri Abhyankar ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 11479bd0847aSShri Abhyankar PetscFunctionReturn(0); 11489bd0847aSShri Abhyankar } 11499bd0847aSShri Abhyankar 11509bd0847aSShri Abhyankar #undef __FUNCT__ 1151137fb511SHong Zhang #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1152137fb511SHong Zhang PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1153137fb511SHong Zhang { 1154137fb511SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1155137fb511SHong Zhang IS iscol = a->col,isrow = a->row; 1156137fb511SHong Zhang PetscErrorCode ierr; 11575d0c19d7SBarry Smith const PetscInt *r,*c,*rout,*cout; 11585d0c19d7SBarry Smith PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 11595d0c19d7SBarry Smith PetscInt nz,row; 1160dd6ea824SBarry Smith PetscScalar *x,*b,*tmp,*tmps,sum; 1161dd6ea824SBarry Smith const MatScalar *aa = a->a,*v; 1162137fb511SHong Zhang 1163137fb511SHong Zhang PetscFunctionBegin; 1164137fb511SHong Zhang if (!n) PetscFunctionReturn(0); 1165137fb511SHong Zhang 1166137fb511SHong Zhang ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1167137fb511SHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1168137fb511SHong Zhang tmp = a->solve_work; 1169137fb511SHong Zhang 1170137fb511SHong Zhang ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1171137fb511SHong Zhang ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1172137fb511SHong Zhang 1173137fb511SHong Zhang /* forward solve the lower triangular */ 1174137fb511SHong Zhang tmp[0] = b[*r++]; 1175137fb511SHong Zhang tmps = tmp; 1176137fb511SHong Zhang for (row=1; row<n; row++) { 1177137fb511SHong Zhang i = rout[row]; /* permuted row */ 1178137fb511SHong Zhang v = aa + ai[i] ; 1179137fb511SHong Zhang vi = aj + ai[i] ; 1180137fb511SHong Zhang nz = a->diag[i] - ai[i]; 1181137fb511SHong Zhang sum = b[*r++]; 1182003131ecSBarry Smith PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1183137fb511SHong Zhang tmp[row] = sum; 1184137fb511SHong Zhang } 1185137fb511SHong Zhang 1186137fb511SHong Zhang /* backward solve the upper triangular */ 1187137fb511SHong Zhang for (row=n-1; row>=0; row--) { 1188137fb511SHong Zhang i = rout[row]; /* permuted row */ 1189137fb511SHong Zhang v = aa + a->diag[i] + 1; 1190137fb511SHong Zhang vi = aj + a->diag[i] + 1; 1191137fb511SHong Zhang nz = ai[i+1] - a->diag[i] - 1; 1192137fb511SHong Zhang sum = tmp[row]; 1193003131ecSBarry Smith PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1194137fb511SHong Zhang x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1195137fb511SHong Zhang } 1196137fb511SHong Zhang 1197137fb511SHong Zhang ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1198137fb511SHong Zhang ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1199137fb511SHong Zhang ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1200137fb511SHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1201dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1202137fb511SHong Zhang PetscFunctionReturn(0); 1203137fb511SHong Zhang } 1204137fb511SHong Zhang 1205930ae53cSBarry Smith /* ----------------------------------------------------------- */ 1206c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 12074a2ae208SSatish Balay #undef __FUNCT__ 1208ad04f41aSHong Zhang #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace" 1209ad04f41aSHong Zhang PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1210930ae53cSBarry Smith { 1211930ae53cSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 12126849ba73SBarry Smith PetscErrorCode ierr; 1213d0f46423SBarry Smith PetscInt n = A->rmap->n; 1214b377110cSBarry Smith const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1215356650c2SBarry Smith PetscScalar *x; 1216356650c2SBarry Smith const PetscScalar *b; 1217dd6ea824SBarry Smith const MatScalar *aa = a->a; 1218aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1219356650c2SBarry Smith PetscInt adiag_i,i,nz,ai_i; 1220b377110cSBarry Smith const PetscInt *vi; 1221dd6ea824SBarry Smith const MatScalar *v; 1222dd6ea824SBarry Smith PetscScalar sum; 1223d85afc42SSatish Balay #endif 1224930ae53cSBarry Smith 12253a40ed3dSBarry Smith PetscFunctionBegin; 12263a40ed3dSBarry Smith if (!n) PetscFunctionReturn(0); 1227930ae53cSBarry Smith 12283649974fSBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 12291ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1230930ae53cSBarry Smith 1231aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 12321c4feecaSSatish Balay fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 12331c4feecaSSatish Balay #else 1234930ae53cSBarry Smith /* forward solve the lower triangular */ 1235930ae53cSBarry Smith x[0] = b[0]; 1236930ae53cSBarry Smith for (i=1; i<n; i++) { 1237930ae53cSBarry Smith ai_i = ai[i]; 1238930ae53cSBarry Smith v = aa + ai_i; 1239930ae53cSBarry Smith vi = aj + ai_i; 1240930ae53cSBarry Smith nz = adiag[i] - ai_i; 1241930ae53cSBarry Smith sum = b[i]; 1242003131ecSBarry Smith PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1243930ae53cSBarry Smith x[i] = sum; 1244930ae53cSBarry Smith } 1245930ae53cSBarry Smith 1246930ae53cSBarry Smith /* backward solve the upper triangular */ 1247930ae53cSBarry Smith for (i=n-1; i>=0; i--) { 1248930ae53cSBarry Smith adiag_i = adiag[i]; 1249d708749eSBarry Smith v = aa + adiag_i + 1; 1250d708749eSBarry Smith vi = aj + adiag_i + 1; 1251930ae53cSBarry Smith nz = ai[i+1] - adiag_i - 1; 1252930ae53cSBarry Smith sum = x[i]; 1253003131ecSBarry Smith PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1254930ae53cSBarry Smith x[i] = sum*aa[adiag_i]; 1255930ae53cSBarry Smith } 12561c4feecaSSatish Balay #endif 1257dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 12583649974fSBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 12591ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 12603a40ed3dSBarry Smith PetscFunctionReturn(0); 1261930ae53cSBarry Smith } 1262930ae53cSBarry Smith 12634a2ae208SSatish Balay #undef __FUNCT__ 1264ad04f41aSHong Zhang #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace" 1265ad04f41aSHong Zhang PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1266da3a660dSBarry Smith { 1267416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1268416022c9SBarry Smith IS iscol = a->col,isrow = a->row; 12696849ba73SBarry Smith PetscErrorCode ierr; 127004fbf559SBarry Smith PetscInt i, n = A->rmap->n,j; 12715d0c19d7SBarry Smith PetscInt nz; 127204fbf559SBarry Smith const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 127304fbf559SBarry Smith PetscScalar *x,*tmp,sum; 127404fbf559SBarry Smith const PetscScalar *b; 1275dd6ea824SBarry Smith const MatScalar *aa = a->a,*v; 1276da3a660dSBarry Smith 12773a40ed3dSBarry Smith PetscFunctionBegin; 127878b31e54SBarry Smith if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1279da3a660dSBarry Smith 12803649974fSBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 12811ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1282416022c9SBarry Smith tmp = a->solve_work; 1283da3a660dSBarry Smith 12843b2fbd54SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 12853b2fbd54SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1286da3a660dSBarry Smith 1287da3a660dSBarry Smith /* forward solve the lower triangular */ 1288da3a660dSBarry Smith tmp[0] = b[*r++]; 1289da3a660dSBarry Smith for (i=1; i<n; i++) { 1290010a20caSHong Zhang v = aa + ai[i] ; 1291010a20caSHong Zhang vi = aj + ai[i] ; 1292416022c9SBarry Smith nz = a->diag[i] - ai[i]; 1293da3a660dSBarry Smith sum = b[*r++]; 129404fbf559SBarry Smith for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1295da3a660dSBarry Smith tmp[i] = sum; 1296da3a660dSBarry Smith } 1297da3a660dSBarry Smith 1298da3a660dSBarry Smith /* backward solve the upper triangular */ 1299da3a660dSBarry Smith for (i=n-1; i>=0; i--) { 1300010a20caSHong Zhang v = aa + a->diag[i] + 1; 1301010a20caSHong Zhang vi = aj + a->diag[i] + 1; 1302416022c9SBarry Smith nz = ai[i+1] - a->diag[i] - 1; 1303da3a660dSBarry Smith sum = tmp[i]; 130404fbf559SBarry Smith for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1305010a20caSHong Zhang tmp[i] = sum*aa[a->diag[i]]; 1306da3a660dSBarry Smith x[*c--] += tmp[i]; 1307da3a660dSBarry Smith } 1308da3a660dSBarry Smith 13093b2fbd54SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 13103b2fbd54SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 13113649974fSBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 13121ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1313dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1314898183ecSLois Curfman McInnes 13153a40ed3dSBarry Smith PetscFunctionReturn(0); 1316da3a660dSBarry Smith } 131704fbf559SBarry Smith 13184a2ae208SSatish Balay #undef __FUNCT__ 131935233d99SShri Abhyankar #define __FUNCT__ "MatSolveAdd_SeqAIJ" 132035233d99SShri Abhyankar PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 13213c0119dfSShri Abhyankar { 13223c0119dfSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 13233c0119dfSShri Abhyankar IS iscol = a->col,isrow = a->row; 13243c0119dfSShri Abhyankar PetscErrorCode ierr; 13253c0119dfSShri Abhyankar PetscInt i, n = A->rmap->n,j; 13263c0119dfSShri Abhyankar PetscInt nz; 13273c0119dfSShri Abhyankar const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 13283c0119dfSShri Abhyankar PetscScalar *x,*tmp,sum; 13293c0119dfSShri Abhyankar const PetscScalar *b; 13303c0119dfSShri Abhyankar const MatScalar *aa = a->a,*v; 13313c0119dfSShri Abhyankar 13323c0119dfSShri Abhyankar PetscFunctionBegin; 13333c0119dfSShri Abhyankar if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 13343c0119dfSShri Abhyankar 13353649974fSBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 13363c0119dfSShri Abhyankar ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 13373c0119dfSShri Abhyankar tmp = a->solve_work; 13383c0119dfSShri Abhyankar 13393c0119dfSShri Abhyankar ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 13403c0119dfSShri Abhyankar ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 13413c0119dfSShri Abhyankar 13423c0119dfSShri Abhyankar /* forward solve the lower triangular */ 13433c0119dfSShri Abhyankar tmp[0] = b[r[0]]; 13443c0119dfSShri Abhyankar v = aa; 13453c0119dfSShri Abhyankar vi = aj; 13463c0119dfSShri Abhyankar for (i=1; i<n; i++) { 13473c0119dfSShri Abhyankar nz = ai[i+1] - ai[i]; 13483c0119dfSShri Abhyankar sum = b[r[i]]; 13493c0119dfSShri Abhyankar for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 13503c0119dfSShri Abhyankar tmp[i] = sum; 13513c0119dfSShri Abhyankar v += nz; vi += nz; 13523c0119dfSShri Abhyankar } 13533c0119dfSShri Abhyankar 13543c0119dfSShri Abhyankar /* backward solve the upper triangular */ 13553c0119dfSShri Abhyankar v = aa + adiag[n-1]; 13563c0119dfSShri Abhyankar vi = aj + adiag[n-1]; 13573c0119dfSShri Abhyankar for (i=n-1; i>=0; i--) { 13583c0119dfSShri Abhyankar nz = adiag[i] - adiag[i+1] - 1; 13593c0119dfSShri Abhyankar sum = tmp[i]; 13603c0119dfSShri Abhyankar for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 13613c0119dfSShri Abhyankar tmp[i] = sum*v[nz]; 13623c0119dfSShri Abhyankar x[c[i]] += tmp[i]; 13633c0119dfSShri Abhyankar v += nz+1; vi += nz+1; 13643c0119dfSShri Abhyankar } 13653c0119dfSShri Abhyankar 13663c0119dfSShri Abhyankar ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 13673c0119dfSShri Abhyankar ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 13683649974fSBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 13693c0119dfSShri Abhyankar ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 13703c0119dfSShri Abhyankar ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 13713c0119dfSShri Abhyankar 13723c0119dfSShri Abhyankar PetscFunctionReturn(0); 13733c0119dfSShri Abhyankar } 13743c0119dfSShri Abhyankar 13753c0119dfSShri Abhyankar #undef __FUNCT__ 1376ad04f41aSHong Zhang #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace" 1377ad04f41aSHong Zhang PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1378da3a660dSBarry Smith { 1379416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 13802235e667SBarry Smith IS iscol = a->col,isrow = a->row; 13816849ba73SBarry Smith PetscErrorCode ierr; 138204fbf559SBarry Smith const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 138304fbf559SBarry Smith PetscInt i,n = A->rmap->n,j; 138404fbf559SBarry Smith PetscInt nz; 138504fbf559SBarry Smith PetscScalar *x,*tmp,s1; 1386dd6ea824SBarry Smith const MatScalar *aa = a->a,*v; 138704fbf559SBarry Smith const PetscScalar *b; 1388da3a660dSBarry Smith 13893a40ed3dSBarry Smith PetscFunctionBegin; 13903649974fSBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 13911ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1392416022c9SBarry Smith tmp = a->solve_work; 1393da3a660dSBarry Smith 13942235e667SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 13952235e667SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1396da3a660dSBarry Smith 1397da3a660dSBarry Smith /* copy the b into temp work space according to permutation */ 13982235e667SBarry Smith for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1399da3a660dSBarry Smith 1400da3a660dSBarry Smith /* forward solve the U^T */ 1401da3a660dSBarry Smith for (i=0; i<n; i++) { 1402010a20caSHong Zhang v = aa + diag[i] ; 1403010a20caSHong Zhang vi = aj + diag[i] + 1; 1404f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 1405c38d4ed2SBarry Smith s1 = tmp[i]; 1406ef66eb69SBarry Smith s1 *= (*v++); /* multiply by inverse of diagonal entry */ 140704fbf559SBarry Smith for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1408c38d4ed2SBarry Smith tmp[i] = s1; 1409da3a660dSBarry Smith } 1410da3a660dSBarry Smith 1411da3a660dSBarry Smith /* backward solve the L^T */ 1412da3a660dSBarry Smith for (i=n-1; i>=0; i--) { 1413010a20caSHong Zhang v = aa + diag[i] - 1 ; 1414010a20caSHong Zhang vi = aj + diag[i] - 1 ; 1415f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 1416f1af5d2fSBarry Smith s1 = tmp[i]; 141704fbf559SBarry Smith for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1418da3a660dSBarry Smith } 1419da3a660dSBarry Smith 1420da3a660dSBarry Smith /* copy tmp into x according to permutation */ 1421591294e4SBarry Smith for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1422da3a660dSBarry Smith 14232235e667SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 14242235e667SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 14253649974fSBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 14261ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1427da3a660dSBarry Smith 1428dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 14293a40ed3dSBarry Smith PetscFunctionReturn(0); 1430da3a660dSBarry Smith } 1431da3a660dSBarry Smith 1432d1fa9404SShri Abhyankar #undef __FUNCT__ 143335233d99SShri Abhyankar #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 143435233d99SShri Abhyankar PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1435d1fa9404SShri Abhyankar { 1436d1fa9404SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1437d1fa9404SShri Abhyankar IS iscol = a->col,isrow = a->row; 1438d1fa9404SShri Abhyankar PetscErrorCode ierr; 1439d1fa9404SShri Abhyankar const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1440d1fa9404SShri Abhyankar PetscInt i,n = A->rmap->n,j; 1441d1fa9404SShri Abhyankar PetscInt nz; 1442d1fa9404SShri Abhyankar PetscScalar *x,*tmp,s1; 1443d1fa9404SShri Abhyankar const MatScalar *aa = a->a,*v; 1444d1fa9404SShri Abhyankar const PetscScalar *b; 1445d1fa9404SShri Abhyankar 1446d1fa9404SShri Abhyankar PetscFunctionBegin; 14473649974fSBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1448d1fa9404SShri Abhyankar ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1449d1fa9404SShri Abhyankar tmp = a->solve_work; 1450d1fa9404SShri Abhyankar 1451d1fa9404SShri Abhyankar ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1452d1fa9404SShri Abhyankar ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1453d1fa9404SShri Abhyankar 1454d1fa9404SShri Abhyankar /* copy the b into temp work space according to permutation */ 1455d1fa9404SShri Abhyankar for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1456d1fa9404SShri Abhyankar 1457d1fa9404SShri Abhyankar /* forward solve the U^T */ 1458d1fa9404SShri Abhyankar for (i=0; i<n; i++) { 1459d1fa9404SShri Abhyankar v = aa + adiag[i+1] + 1; 1460d1fa9404SShri Abhyankar vi = aj + adiag[i+1] + 1; 1461d1fa9404SShri Abhyankar nz = adiag[i] - adiag[i+1] - 1; 1462d1fa9404SShri Abhyankar s1 = tmp[i]; 1463d1fa9404SShri Abhyankar s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1464d1fa9404SShri Abhyankar for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1465d1fa9404SShri Abhyankar tmp[i] = s1; 1466d1fa9404SShri Abhyankar } 1467d1fa9404SShri Abhyankar 1468d1fa9404SShri Abhyankar /* backward solve the L^T */ 1469d1fa9404SShri Abhyankar for (i=n-1; i>=0; i--) { 1470d1fa9404SShri Abhyankar v = aa + ai[i]; 1471d1fa9404SShri Abhyankar vi = aj + ai[i]; 1472d1fa9404SShri Abhyankar nz = ai[i+1] - ai[i]; 1473d1fa9404SShri Abhyankar s1 = tmp[i]; 1474d1fa9404SShri Abhyankar for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1475d1fa9404SShri Abhyankar } 1476d1fa9404SShri Abhyankar 1477d1fa9404SShri Abhyankar /* copy tmp into x according to permutation */ 1478d1fa9404SShri Abhyankar for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1479d1fa9404SShri Abhyankar 1480d1fa9404SShri Abhyankar ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1481d1fa9404SShri Abhyankar ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 14823649974fSBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1483d1fa9404SShri Abhyankar ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1484d1fa9404SShri Abhyankar 1485d1fa9404SShri Abhyankar ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1486d1fa9404SShri Abhyankar PetscFunctionReturn(0); 1487d1fa9404SShri Abhyankar } 1488d1fa9404SShri Abhyankar 14894a2ae208SSatish Balay #undef __FUNCT__ 1490ad04f41aSHong Zhang #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace" 1491ad04f41aSHong Zhang PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1492da3a660dSBarry Smith { 1493416022c9SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 14942235e667SBarry Smith IS iscol = a->col,isrow = a->row; 14956849ba73SBarry Smith PetscErrorCode ierr; 149604fbf559SBarry Smith const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 149704fbf559SBarry Smith PetscInt i,n = A->rmap->n,j; 149804fbf559SBarry Smith PetscInt nz; 149904fbf559SBarry Smith PetscScalar *x,*tmp,s1; 1500dd6ea824SBarry Smith const MatScalar *aa = a->a,*v; 150104fbf559SBarry Smith const PetscScalar *b; 15026abc6512SBarry Smith 15033a40ed3dSBarry Smith PetscFunctionBegin; 150423bc6035SMatthew Knepley if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 15053649974fSBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 15061ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1507416022c9SBarry Smith tmp = a->solve_work; 15086abc6512SBarry Smith 15092235e667SBarry Smith ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 15102235e667SBarry Smith ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 15116abc6512SBarry Smith 15126abc6512SBarry Smith /* copy the b into temp work space according to permutation */ 15132235e667SBarry Smith for (i=0; i<n; i++) tmp[i] = b[c[i]]; 15146abc6512SBarry Smith 15156abc6512SBarry Smith /* forward solve the U^T */ 15166abc6512SBarry Smith for (i=0; i<n; i++) { 1517010a20caSHong Zhang v = aa + diag[i] ; 1518010a20caSHong Zhang vi = aj + diag[i] + 1; 1519f1af5d2fSBarry Smith nz = ai[i+1] - diag[i] - 1; 152004fbf559SBarry Smith s1 = tmp[i]; 152104fbf559SBarry Smith s1 *= (*v++); /* multiply by inverse of diagonal entry */ 152204fbf559SBarry Smith for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 152304fbf559SBarry Smith tmp[i] = s1; 15246abc6512SBarry Smith } 15256abc6512SBarry Smith 15266abc6512SBarry Smith /* backward solve the L^T */ 15276abc6512SBarry Smith for (i=n-1; i>=0; i--) { 1528010a20caSHong Zhang v = aa + diag[i] - 1 ; 1529010a20caSHong Zhang vi = aj + diag[i] - 1 ; 1530f1af5d2fSBarry Smith nz = diag[i] - ai[i]; 153104fbf559SBarry Smith s1 = tmp[i]; 153204fbf559SBarry Smith for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 15336abc6512SBarry Smith } 15346abc6512SBarry Smith 15356abc6512SBarry Smith /* copy tmp into x according to permutation */ 15366abc6512SBarry Smith for (i=0; i<n; i++) x[r[i]] += tmp[i]; 15376abc6512SBarry Smith 15382235e667SBarry Smith ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 15392235e667SBarry Smith ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 15403649974fSBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 15411ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 15426abc6512SBarry Smith 154304fbf559SBarry Smith ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 15443a40ed3dSBarry Smith PetscFunctionReturn(0); 1545da3a660dSBarry Smith } 154604fbf559SBarry Smith 1547533e6011SShri Abhyankar #undef __FUNCT__ 154835233d99SShri Abhyankar #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 154935233d99SShri Abhyankar PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1550533e6011SShri Abhyankar { 1551533e6011SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1552533e6011SShri Abhyankar IS iscol = a->col,isrow = a->row; 1553533e6011SShri Abhyankar PetscErrorCode ierr; 1554533e6011SShri Abhyankar const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1555533e6011SShri Abhyankar PetscInt i,n = A->rmap->n,j; 1556533e6011SShri Abhyankar PetscInt nz; 1557533e6011SShri Abhyankar PetscScalar *x,*tmp,s1; 1558533e6011SShri Abhyankar const MatScalar *aa = a->a,*v; 1559533e6011SShri Abhyankar const PetscScalar *b; 1560533e6011SShri Abhyankar 1561533e6011SShri Abhyankar PetscFunctionBegin; 1562533e6011SShri Abhyankar if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 15633649974fSBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1564533e6011SShri Abhyankar ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1565533e6011SShri Abhyankar tmp = a->solve_work; 1566533e6011SShri Abhyankar 1567533e6011SShri Abhyankar ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1568533e6011SShri Abhyankar ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1569533e6011SShri Abhyankar 1570533e6011SShri Abhyankar /* copy the b into temp work space according to permutation */ 1571533e6011SShri Abhyankar for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1572533e6011SShri Abhyankar 1573533e6011SShri Abhyankar /* forward solve the U^T */ 1574533e6011SShri Abhyankar for (i=0; i<n; i++) { 1575533e6011SShri Abhyankar v = aa + adiag[i+1] + 1; 1576533e6011SShri Abhyankar vi = aj + adiag[i+1] + 1; 1577533e6011SShri Abhyankar nz = adiag[i] - adiag[i+1] - 1; 1578533e6011SShri Abhyankar s1 = tmp[i]; 1579533e6011SShri Abhyankar s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1580533e6011SShri Abhyankar for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1581533e6011SShri Abhyankar tmp[i] = s1; 1582533e6011SShri Abhyankar } 1583533e6011SShri Abhyankar 1584533e6011SShri Abhyankar 1585533e6011SShri Abhyankar /* backward solve the L^T */ 1586533e6011SShri Abhyankar for (i=n-1; i>=0; i--) { 1587533e6011SShri Abhyankar v = aa + ai[i] ; 1588533e6011SShri Abhyankar vi = aj + ai[i]; 1589533e6011SShri Abhyankar nz = ai[i+1] - ai[i]; 1590533e6011SShri Abhyankar s1 = tmp[i]; 1591c5e3b2a3SShri Abhyankar for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1592533e6011SShri Abhyankar } 1593533e6011SShri Abhyankar 1594533e6011SShri Abhyankar /* copy tmp into x according to permutation */ 1595533e6011SShri Abhyankar for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1596533e6011SShri Abhyankar 1597533e6011SShri Abhyankar ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1598533e6011SShri Abhyankar ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 15993649974fSBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1600533e6011SShri Abhyankar ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1601533e6011SShri Abhyankar 1602533e6011SShri Abhyankar ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1603533e6011SShri Abhyankar PetscFunctionReturn(0); 1604533e6011SShri Abhyankar } 1605533e6011SShri Abhyankar 16069e25ed09SBarry Smith /* ----------------------------------------------------------------*/ 1607b47f5a65SHong Zhang 160809573ac7SBarry Smith extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool ); 160908480c60SBarry Smith 16108fc3a347SHong Zhang /* 16118a76ed4fSHong Zhang ilu() under revised new data structure. 1612813a1f2bSShri Abhyankar Factored arrays bj and ba are stored as 1613813a1f2bSShri Abhyankar L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1614813a1f2bSShri Abhyankar 1615813a1f2bSShri Abhyankar bi=fact->i is an array of size n+1, in which 1616813a1f2bSShri Abhyankar bi+ 1617baabb860SHong Zhang bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1618baabb860SHong Zhang bi[n]: points to L(n-1,n-1)+1 1619baabb860SHong Zhang 1620813a1f2bSShri Abhyankar bdiag=fact->diag is an array of size n+1,in which 1621baabb860SHong Zhang bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1622a24f213cSHong Zhang bdiag[n]: points to entry of U(n-1,0)-1 1623813a1f2bSShri Abhyankar 1624813a1f2bSShri Abhyankar U(i,:) contains bdiag[i] as its last entry, i.e., 1625813a1f2bSShri Abhyankar U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1626813a1f2bSShri Abhyankar */ 1627813a1f2bSShri Abhyankar #undef __FUNCT__ 162835233d99SShri Abhyankar #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0" 162935233d99SShri Abhyankar PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1630813a1f2bSShri Abhyankar { 1631813a1f2bSShri Abhyankar 1632813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1633813a1f2bSShri Abhyankar PetscErrorCode ierr; 1634bbd65245SShri Abhyankar const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1635bbd65245SShri Abhyankar PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1636ace3abfcSBarry Smith PetscBool missing; 16371df811f5SHong Zhang IS isicol; 1638813a1f2bSShri Abhyankar 1639813a1f2bSShri Abhyankar PetscFunctionBegin; 1640e32f2f54SBarry Smith if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 16411df811f5SHong Zhang ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1642e32f2f54SBarry Smith if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 16431df811f5SHong Zhang ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1644813a1f2bSShri Abhyankar ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1645813a1f2bSShri Abhyankar b = (Mat_SeqAIJ*)(fact)->data; 1646813a1f2bSShri Abhyankar 1647813a1f2bSShri Abhyankar /* allocate matrix arrays for new data structure */ 1648813a1f2bSShri Abhyankar ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr); 1649813a1f2bSShri Abhyankar ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1650813a1f2bSShri Abhyankar b->singlemalloc = PETSC_TRUE; 1651813a1f2bSShri Abhyankar if (!b->diag) { 1652813a1f2bSShri Abhyankar ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr); 1653813a1f2bSShri Abhyankar ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1654813a1f2bSShri Abhyankar } 1655813a1f2bSShri Abhyankar bdiag = b->diag; 1656813a1f2bSShri Abhyankar 1657813a1f2bSShri Abhyankar if (n > 0) { 1658813a1f2bSShri Abhyankar ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1659813a1f2bSShri Abhyankar } 1660813a1f2bSShri Abhyankar 1661813a1f2bSShri Abhyankar /* set bi and bj with new data structure */ 1662813a1f2bSShri Abhyankar bi = b->i; 1663813a1f2bSShri Abhyankar bj = b->j; 1664813a1f2bSShri Abhyankar 1665813a1f2bSShri Abhyankar /* L part */ 1666e60cf9a0SBarry Smith bi[0] = 0; 1667813a1f2bSShri Abhyankar for (i=0; i<n; i++) { 1668813a1f2bSShri Abhyankar nz = adiag[i] - ai[i]; 1669813a1f2bSShri Abhyankar bi[i+1] = bi[i] + nz; 1670813a1f2bSShri Abhyankar aj = a->j + ai[i]; 1671813a1f2bSShri Abhyankar for (j=0; j<nz; j++) { 1672bbd65245SShri Abhyankar /* *bj = aj[j]; bj++; */ 1673bbd65245SShri Abhyankar bj[k++] = aj[j]; 1674813a1f2bSShri Abhyankar } 1675813a1f2bSShri Abhyankar } 1676813a1f2bSShri Abhyankar 1677813a1f2bSShri Abhyankar /* U part */ 167862fbd6afSShri Abhyankar bdiag[n] = bi[n]-1; 1679813a1f2bSShri Abhyankar for (i=n-1; i>=0; i--) { 1680813a1f2bSShri Abhyankar nz = ai[i+1] - adiag[i] - 1; 1681813a1f2bSShri Abhyankar aj = a->j + adiag[i] + 1; 1682813a1f2bSShri Abhyankar for (j=0; j<nz; j++) { 1683bbd65245SShri Abhyankar /* *bj = aj[j]; bj++; */ 1684bbd65245SShri Abhyankar bj[k++] = aj[j]; 1685813a1f2bSShri Abhyankar } 1686813a1f2bSShri Abhyankar /* diag[i] */ 1687bbd65245SShri Abhyankar /* *bj = i; bj++; */ 1688bbd65245SShri Abhyankar bj[k++] = i; 1689a24f213cSHong Zhang bdiag[i] = bdiag[i+1] + nz + 1; 1690813a1f2bSShri Abhyankar } 16911df811f5SHong Zhang 1692d5f3da31SBarry Smith fact->factortype = MAT_FACTOR_ILU; 16931df811f5SHong Zhang fact->info.factor_mallocs = 0; 16941df811f5SHong Zhang fact->info.fill_ratio_given = info->fill; 16951df811f5SHong Zhang fact->info.fill_ratio_needed = 1.0; 169635233d99SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 16971df811f5SHong Zhang 16981df811f5SHong Zhang b = (Mat_SeqAIJ*)(fact)->data; 16991df811f5SHong Zhang b->row = isrow; 17001df811f5SHong Zhang b->col = iscol; 17011df811f5SHong Zhang b->icol = isicol; 17021df811f5SHong Zhang ierr = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 17031df811f5SHong Zhang ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 17041df811f5SHong Zhang ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1705813a1f2bSShri Abhyankar PetscFunctionReturn(0); 1706813a1f2bSShri Abhyankar } 1707813a1f2bSShri Abhyankar 1708813a1f2bSShri Abhyankar #undef __FUNCT__ 170935233d99SShri Abhyankar #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 171035233d99SShri Abhyankar PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1711813a1f2bSShri Abhyankar { 1712813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1713813a1f2bSShri Abhyankar IS isicol; 1714813a1f2bSShri Abhyankar PetscErrorCode ierr; 1715813a1f2bSShri Abhyankar const PetscInt *r,*ic; 17161df811f5SHong Zhang PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1717813a1f2bSShri Abhyankar PetscInt *bi,*cols,nnz,*cols_lvl; 1718813a1f2bSShri Abhyankar PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1719813a1f2bSShri Abhyankar PetscInt i,levels,diagonal_fill; 1720ace3abfcSBarry Smith PetscBool col_identity,row_identity; 1721813a1f2bSShri Abhyankar PetscReal f; 1722813a1f2bSShri Abhyankar PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1723813a1f2bSShri Abhyankar PetscBT lnkbt; 1724813a1f2bSShri Abhyankar PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1725813a1f2bSShri Abhyankar PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1726813a1f2bSShri Abhyankar PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1727813a1f2bSShri Abhyankar 1728813a1f2bSShri Abhyankar PetscFunctionBegin; 17293898ab1dSSatish Balay /* Uncomment the old data struct part only while testing new data structure for MatSolve() */ 1730124a3351SHong Zhang /* 1731ace3abfcSBarry Smith PetscBool olddatastruct=PETSC_FALSE; 1732acfcf0e5SJed Brown ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 173335233d99SShri Abhyankar if (olddatastruct) { 1734ad04f41aSHong Zhang ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1735ad04f41aSHong Zhang PetscFunctionReturn(0); 1736ad04f41aSHong Zhang } 1737124a3351SHong Zhang */ 1738ad04f41aSHong Zhang 1739813a1f2bSShri Abhyankar levels = (PetscInt)info->levels; 1740813a1f2bSShri Abhyankar ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1741813a1f2bSShri Abhyankar ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1742813a1f2bSShri Abhyankar if (!levels && row_identity && col_identity) { 1743813a1f2bSShri Abhyankar /* special case: ilu(0) with natural ordering */ 174435233d99SShri Abhyankar ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1745124a3351SHong Zhang if (a->inode.size) { 1746124a3351SHong Zhang fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1747124a3351SHong Zhang } 1748813a1f2bSShri Abhyankar PetscFunctionReturn(0); 1749813a1f2bSShri Abhyankar } 1750813a1f2bSShri Abhyankar 1751e32f2f54SBarry Smith if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 17521df811f5SHong Zhang ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1753813a1f2bSShri Abhyankar ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1754813a1f2bSShri Abhyankar ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1755813a1f2bSShri Abhyankar 17561bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 17571bfa06eaSJed Brown ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 17581bfa06eaSJed Brown ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1759e60cf9a0SBarry Smith bi[0] = bdiag[0] = 0; 17600e83c824SBarry Smith ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1761813a1f2bSShri Abhyankar 1762813a1f2bSShri Abhyankar /* create a linked list for storing column indices of the active row */ 1763813a1f2bSShri Abhyankar nlnk = n + 1; 1764813a1f2bSShri Abhyankar ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1765813a1f2bSShri Abhyankar 1766813a1f2bSShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 17671df811f5SHong Zhang f = info->fill; 17681df811f5SHong Zhang diagonal_fill = (PetscInt)info->diagonal_fill; 1769813a1f2bSShri Abhyankar ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1770813a1f2bSShri Abhyankar current_space = free_space; 1771813a1f2bSShri Abhyankar ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1772813a1f2bSShri Abhyankar current_space_lvl = free_space_lvl; 1773813a1f2bSShri Abhyankar for (i=0; i<n; i++) { 1774813a1f2bSShri Abhyankar nzi = 0; 1775813a1f2bSShri Abhyankar /* copy current row into linked list */ 1776813a1f2bSShri Abhyankar nnz = ai[r[i]+1] - ai[r[i]]; 1777e32f2f54SBarry Smith if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1778813a1f2bSShri Abhyankar cols = aj + ai[r[i]]; 1779813a1f2bSShri Abhyankar lnk[i] = -1; /* marker to indicate if diagonal exists */ 1780813a1f2bSShri Abhyankar ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1781813a1f2bSShri Abhyankar nzi += nlnk; 1782813a1f2bSShri Abhyankar 1783813a1f2bSShri Abhyankar /* make sure diagonal entry is included */ 1784813a1f2bSShri Abhyankar if (diagonal_fill && lnk[i] == -1) { 1785813a1f2bSShri Abhyankar fm = n; 1786813a1f2bSShri Abhyankar while (lnk[fm] < i) fm = lnk[fm]; 1787813a1f2bSShri Abhyankar lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1788813a1f2bSShri Abhyankar lnk[fm] = i; 1789813a1f2bSShri Abhyankar lnk_lvl[i] = 0; 1790813a1f2bSShri Abhyankar nzi++; dcount++; 1791813a1f2bSShri Abhyankar } 1792813a1f2bSShri Abhyankar 1793813a1f2bSShri Abhyankar /* add pivot rows into the active row */ 1794813a1f2bSShri Abhyankar nzbd = 0; 1795813a1f2bSShri Abhyankar prow = lnk[n]; 1796813a1f2bSShri Abhyankar while (prow < i) { 1797813a1f2bSShri Abhyankar nnz = bdiag[prow]; 1798813a1f2bSShri Abhyankar cols = bj_ptr[prow] + nnz + 1; 1799813a1f2bSShri Abhyankar cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1800813a1f2bSShri Abhyankar nnz = bi[prow+1] - bi[prow] - nnz - 1; 1801813a1f2bSShri Abhyankar ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1802813a1f2bSShri Abhyankar nzi += nlnk; 1803813a1f2bSShri Abhyankar prow = lnk[prow]; 1804813a1f2bSShri Abhyankar nzbd++; 1805813a1f2bSShri Abhyankar } 1806813a1f2bSShri Abhyankar bdiag[i] = nzbd; 1807813a1f2bSShri Abhyankar bi[i+1] = bi[i] + nzi; 1808813a1f2bSShri Abhyankar /* if free space is not available, make more free space */ 1809813a1f2bSShri Abhyankar if (current_space->local_remaining<nzi) { 1810813a1f2bSShri Abhyankar nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1811813a1f2bSShri Abhyankar ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1812813a1f2bSShri Abhyankar ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1813813a1f2bSShri Abhyankar reallocs++; 1814813a1f2bSShri Abhyankar } 1815813a1f2bSShri Abhyankar 1816813a1f2bSShri Abhyankar /* copy data into free_space and free_space_lvl, then initialize lnk */ 1817813a1f2bSShri Abhyankar ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1818813a1f2bSShri Abhyankar bj_ptr[i] = current_space->array; 1819813a1f2bSShri Abhyankar bjlvl_ptr[i] = current_space_lvl->array; 1820813a1f2bSShri Abhyankar 1821813a1f2bSShri Abhyankar /* make sure the active row i has diagonal entry */ 182265e19b50SBarry Smith if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1823813a1f2bSShri Abhyankar 1824813a1f2bSShri Abhyankar current_space->array += nzi; 1825813a1f2bSShri Abhyankar current_space->local_used += nzi; 1826813a1f2bSShri Abhyankar current_space->local_remaining -= nzi; 1827813a1f2bSShri Abhyankar current_space_lvl->array += nzi; 1828813a1f2bSShri Abhyankar current_space_lvl->local_used += nzi; 1829813a1f2bSShri Abhyankar current_space_lvl->local_remaining -= nzi; 1830813a1f2bSShri Abhyankar } 1831813a1f2bSShri Abhyankar 1832813a1f2bSShri Abhyankar ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1833813a1f2bSShri Abhyankar ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1834813a1f2bSShri Abhyankar /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 18359263d837SHong Zhang ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 18362ce24eb6SHong Zhang ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1837813a1f2bSShri Abhyankar 1838813a1f2bSShri Abhyankar ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1839813a1f2bSShri Abhyankar ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 18400e83c824SBarry Smith ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1841813a1f2bSShri Abhyankar 1842813a1f2bSShri Abhyankar #if defined(PETSC_USE_INFO) 1843813a1f2bSShri Abhyankar { 1844aef85c9fSShri Abhyankar PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1845813a1f2bSShri Abhyankar ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1846813a1f2bSShri Abhyankar ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1847813a1f2bSShri Abhyankar ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1848813a1f2bSShri Abhyankar ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1849813a1f2bSShri Abhyankar if (diagonal_fill) { 1850813a1f2bSShri Abhyankar ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1851813a1f2bSShri Abhyankar } 1852813a1f2bSShri Abhyankar } 1853813a1f2bSShri Abhyankar #endif 1854813a1f2bSShri Abhyankar /* put together the new matrix */ 1855813a1f2bSShri Abhyankar ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1856813a1f2bSShri Abhyankar ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1857813a1f2bSShri Abhyankar b = (Mat_SeqAIJ*)(fact)->data; 1858813a1f2bSShri Abhyankar b->free_a = PETSC_TRUE; 1859813a1f2bSShri Abhyankar b->free_ij = PETSC_TRUE; 1860813a1f2bSShri Abhyankar b->singlemalloc = PETSC_FALSE; 1861f268cba8SShri Abhyankar ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1862813a1f2bSShri Abhyankar b->j = bj; 1863813a1f2bSShri Abhyankar b->i = bi; 1864813a1f2bSShri Abhyankar b->diag = bdiag; 1865813a1f2bSShri Abhyankar b->ilen = 0; 1866813a1f2bSShri Abhyankar b->imax = 0; 1867813a1f2bSShri Abhyankar b->row = isrow; 1868813a1f2bSShri Abhyankar b->col = iscol; 1869813a1f2bSShri Abhyankar ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1870813a1f2bSShri Abhyankar ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1871813a1f2bSShri Abhyankar b->icol = isicol; 1872813a1f2bSShri Abhyankar ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1873813a1f2bSShri Abhyankar /* In b structure: Free imax, ilen, old a, old j. 1874813a1f2bSShri Abhyankar Allocate bdiag, solve_work, new a, new j */ 1875f268cba8SShri Abhyankar ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1876f268cba8SShri Abhyankar b->maxnz = b->nz = bdiag[0]+1; 1877813a1f2bSShri Abhyankar (fact)->info.factor_mallocs = reallocs; 1878813a1f2bSShri Abhyankar (fact)->info.fill_ratio_given = f; 1879f268cba8SShri Abhyankar (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 188035233d99SShri Abhyankar (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1881f2344125SBarry Smith if (a->inode.size) { 1882f2344125SBarry Smith (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1883f2344125SBarry Smith } 1884813a1f2bSShri Abhyankar PetscFunctionReturn(0); 1885813a1f2bSShri Abhyankar } 1886813a1f2bSShri Abhyankar 18876516239fSHong Zhang #undef __FUNCT__ 1888ad04f41aSHong Zhang #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace" 1889ad04f41aSHong Zhang PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 18906516239fSHong Zhang { 18916516239fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 18926516239fSHong Zhang IS isicol; 18936516239fSHong Zhang PetscErrorCode ierr; 18946516239fSHong Zhang const PetscInt *r,*ic; 18956516239fSHong Zhang PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 18966516239fSHong Zhang PetscInt *bi,*cols,nnz,*cols_lvl; 18976516239fSHong Zhang PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 18986516239fSHong Zhang PetscInt i,levels,diagonal_fill; 1899ace3abfcSBarry Smith PetscBool col_identity,row_identity; 19006516239fSHong Zhang PetscReal f; 19016516239fSHong Zhang PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 19026516239fSHong Zhang PetscBT lnkbt; 19036516239fSHong Zhang PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 19046516239fSHong Zhang PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 19056516239fSHong Zhang PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1906ace3abfcSBarry Smith PetscBool missing; 19076516239fSHong Zhang 19086516239fSHong Zhang PetscFunctionBegin; 1909e32f2f54SBarry Smith if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 19106516239fSHong Zhang f = info->fill; 19116516239fSHong Zhang levels = (PetscInt)info->levels; 19126516239fSHong Zhang diagonal_fill = (PetscInt)info->diagonal_fill; 19136516239fSHong Zhang ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 19146516239fSHong Zhang 19156516239fSHong Zhang ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 19166516239fSHong Zhang ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 19176516239fSHong Zhang if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1918f77e22a1SHong Zhang ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1919ad04f41aSHong Zhang (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1920f2344125SBarry Smith if (a->inode.size) { 1921f2344125SBarry Smith (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1922f2344125SBarry Smith } 1923d5f3da31SBarry Smith fact->factortype = MAT_FACTOR_ILU; 1924719d5645SBarry Smith (fact)->info.factor_mallocs = 0; 1925719d5645SBarry Smith (fact)->info.fill_ratio_given = info->fill; 1926719d5645SBarry Smith (fact)->info.fill_ratio_needed = 1.0; 1927719d5645SBarry Smith b = (Mat_SeqAIJ*)(fact)->data; 19288ef3462fSBarry Smith ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1929e32f2f54SBarry Smith if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 193008480c60SBarry Smith b->row = isrow; 193108480c60SBarry Smith b->col = iscol; 193282bf6240SBarry Smith b->icol = isicol; 1933719d5645SBarry Smith ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1934c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1935c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 19363a40ed3dSBarry Smith PetscFunctionReturn(0); 193708480c60SBarry Smith } 193808480c60SBarry Smith 1939898183ecSLois Curfman McInnes ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1940898183ecSLois Curfman McInnes ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 19419e25ed09SBarry Smith 19421bfa06eaSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 19431bfa06eaSJed Brown ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 19441bfa06eaSJed Brown ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1945a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 19466cf997b0SBarry Smith 19470e83c824SBarry Smith ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 19485a8e39fbSHong Zhang 19495a8e39fbSHong Zhang /* create a linked list for storing column indices of the active row */ 19505a8e39fbSHong Zhang nlnk = n + 1; 19515a8e39fbSHong Zhang ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 19525a8e39fbSHong Zhang 19535a8e39fbSHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 1954a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 19555a8e39fbSHong Zhang current_space = free_space; 1956a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 19575a8e39fbSHong Zhang current_space_lvl = free_space_lvl; 19585a8e39fbSHong Zhang 19595a8e39fbSHong Zhang for (i=0; i<n; i++) { 19605a8e39fbSHong Zhang nzi = 0; 19616cf997b0SBarry Smith /* copy current row into linked list */ 19625a8e39fbSHong Zhang nnz = ai[r[i]+1] - ai[r[i]]; 1963e32f2f54SBarry Smith if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 19645a8e39fbSHong Zhang cols = aj + ai[r[i]]; 19655a8e39fbSHong Zhang lnk[i] = -1; /* marker to indicate if diagonal exists */ 19665a8e39fbSHong Zhang ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 19675a8e39fbSHong Zhang nzi += nlnk; 19686cf997b0SBarry Smith 19696cf997b0SBarry Smith /* make sure diagonal entry is included */ 19705a8e39fbSHong Zhang if (diagonal_fill && lnk[i] == -1) { 19716cf997b0SBarry Smith fm = n; 19725a8e39fbSHong Zhang while (lnk[fm] < i) fm = lnk[fm]; 19735a8e39fbSHong Zhang lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 19745a8e39fbSHong Zhang lnk[fm] = i; 19755a8e39fbSHong Zhang lnk_lvl[i] = 0; 19765a8e39fbSHong Zhang nzi++; dcount++; 19776cf997b0SBarry Smith } 19786cf997b0SBarry Smith 19795a8e39fbSHong Zhang /* add pivot rows into the active row */ 19805a8e39fbSHong Zhang nzbd = 0; 19815a8e39fbSHong Zhang prow = lnk[n]; 19825a8e39fbSHong Zhang while (prow < i) { 19835a8e39fbSHong Zhang nnz = bdiag[prow]; 19845a8e39fbSHong Zhang cols = bj_ptr[prow] + nnz + 1; 19855a8e39fbSHong Zhang cols_lvl = bjlvl_ptr[prow] + nnz + 1; 19865a8e39fbSHong Zhang nnz = bi[prow+1] - bi[prow] - nnz - 1; 19875a8e39fbSHong Zhang ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 19885a8e39fbSHong Zhang nzi += nlnk; 19895a8e39fbSHong Zhang prow = lnk[prow]; 19905a8e39fbSHong Zhang nzbd++; 199156beaf04SBarry Smith } 19925a8e39fbSHong Zhang bdiag[i] = nzbd; 19935a8e39fbSHong Zhang bi[i+1] = bi[i] + nzi; 1994ecf371e4SBarry Smith 19955a8e39fbSHong Zhang /* if free space is not available, make more free space */ 19965a8e39fbSHong Zhang if (current_space->local_remaining<nzi) { 19975a8e39fbSHong Zhang nnz = nzi*(n - i); /* estimated and max additional space needed */ 1998a1a86e44SBarry Smith ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1999a1a86e44SBarry Smith ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 20005a8e39fbSHong Zhang reallocs++; 20015a8e39fbSHong Zhang } 2002ecf371e4SBarry Smith 20035a8e39fbSHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 20045a8e39fbSHong Zhang ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 20055a8e39fbSHong Zhang bj_ptr[i] = current_space->array; 20065a8e39fbSHong Zhang bjlvl_ptr[i] = current_space_lvl->array; 20075a8e39fbSHong Zhang 20085a8e39fbSHong Zhang /* make sure the active row i has diagonal entry */ 200965e19b50SBarry Smith if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 20105a8e39fbSHong Zhang 20115a8e39fbSHong Zhang current_space->array += nzi; 20125a8e39fbSHong Zhang current_space->local_used += nzi; 20135a8e39fbSHong Zhang current_space->local_remaining -= nzi; 20145a8e39fbSHong Zhang current_space_lvl->array += nzi; 20155a8e39fbSHong Zhang current_space_lvl->local_used += nzi; 20165a8e39fbSHong Zhang current_space_lvl->local_remaining -= nzi; 20179e25ed09SBarry Smith } 20185a8e39fbSHong Zhang 2019898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2020898183ecSLois Curfman McInnes ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 20215a8e39fbSHong Zhang 20225a8e39fbSHong Zhang /* destroy list of free space and other temporary arrays */ 20235a8e39fbSHong Zhang ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 20246516239fSHong Zhang ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 20255a8e39fbSHong Zhang ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2026a1a86e44SBarry Smith ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 20270e83c824SBarry Smith ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 20289e25ed09SBarry Smith 20296cf91177SBarry Smith #if defined(PETSC_USE_INFO) 2030f64065bbSBarry Smith { 20315a8e39fbSHong Zhang PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2032ae15b995SBarry Smith ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2033ae15b995SBarry Smith ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2034ae15b995SBarry Smith ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 2035ae15b995SBarry Smith ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2036335d9088SBarry Smith if (diagonal_fill) { 2037ae15b995SBarry Smith ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2038335d9088SBarry Smith } 2039f64065bbSBarry Smith } 204063ba0a88SBarry Smith #endif 2041bbb6d6a8SBarry Smith 20429e25ed09SBarry Smith /* put together the new matrix */ 204307dbf95fSBarry Smith ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2044719d5645SBarry Smith ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 2045719d5645SBarry Smith b = (Mat_SeqAIJ*)(fact)->data; 2046e6b907acSBarry Smith b->free_a = PETSC_TRUE; 2047e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 20487c922b88SBarry Smith b->singlemalloc = PETSC_FALSE; 20490e83c824SBarry Smith ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 20505a8e39fbSHong Zhang b->j = bj; 20515a8e39fbSHong Zhang b->i = bi; 20525a8e39fbSHong Zhang for (i=0; i<n; i++) bdiag[i] += bi[i]; 20535a8e39fbSHong Zhang b->diag = bdiag; 2054416022c9SBarry Smith b->ilen = 0; 2055416022c9SBarry Smith b->imax = 0; 2056416022c9SBarry Smith b->row = isrow; 2057416022c9SBarry Smith b->col = iscol; 2058c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2059c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 206082bf6240SBarry Smith b->icol = isicol; 206187828ca2SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2062416022c9SBarry Smith /* In b structure: Free imax, ilen, old a, old j. 20635a8e39fbSHong Zhang Allocate bdiag, solve_work, new a, new j */ 2064719d5645SBarry Smith ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 20655a8e39fbSHong Zhang b->maxnz = b->nz = bi[n] ; 2066719d5645SBarry Smith (fact)->info.factor_mallocs = reallocs; 2067719d5645SBarry Smith (fact)->info.fill_ratio_given = f; 2068719d5645SBarry Smith (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2069ad04f41aSHong Zhang (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2070f2344125SBarry Smith if (a->inode.size) { 2071f2344125SBarry Smith (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2072d3ac4fa3SBarry Smith } 20733a40ed3dSBarry Smith PetscFunctionReturn(0); 20749e25ed09SBarry Smith } 2075d5d45c9bSBarry Smith 2076a6175056SHong Zhang #undef __FUNCT__ 207735233d99SShri Abhyankar #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 207835233d99SShri Abhyankar PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 20795f44c854SHong Zhang { 20805f44c854SHong Zhang Mat C = B; 20815f44c854SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 20825f44c854SHong Zhang Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 20835f44c854SHong Zhang IS ip=b->row,iip = b->icol; 20845f44c854SHong Zhang PetscErrorCode ierr; 20855f44c854SHong Zhang const PetscInt *rip,*riip; 20865f44c854SHong Zhang PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 20875f44c854SHong Zhang PetscInt *ai=a->i,*aj=a->j; 20885f44c854SHong Zhang PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 20895f44c854SHong Zhang MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2090ace3abfcSBarry Smith PetscBool perm_identity; 2091d90ac83dSHong Zhang FactorShiftCtx sctx; 209298a93161SHong Zhang PetscReal rs; 209398a93161SHong Zhang MatScalar d,*v; 209498a93161SHong Zhang 20955f44c854SHong Zhang PetscFunctionBegin; 209698a93161SHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 2097d90ac83dSHong Zhang ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 209898a93161SHong Zhang 2099f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 210098a93161SHong Zhang sctx.shift_top = info->zeropivot; 210198a93161SHong Zhang for (i=0; i<mbs; i++) { 210298a93161SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 210398a93161SHong Zhang d = (aa)[a->diag[i]]; 210498a93161SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 210598a93161SHong Zhang v = aa+ai[i]; 210698a93161SHong Zhang nz = ai[i+1] - ai[i]; 210798a93161SHong Zhang for (j=0; j<nz; j++) 210898a93161SHong Zhang rs += PetscAbsScalar(v[j]); 210998a93161SHong Zhang if (rs>sctx.shift_top) sctx.shift_top = rs; 211098a93161SHong Zhang } 211198a93161SHong Zhang sctx.shift_top *= 1.1; 211298a93161SHong Zhang sctx.nshift_max = 5; 211398a93161SHong Zhang sctx.shift_lo = 0.; 211498a93161SHong Zhang sctx.shift_hi = 1.; 211598a93161SHong Zhang } 21165f44c854SHong Zhang 21175f44c854SHong Zhang ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 21185f44c854SHong Zhang ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 21195f44c854SHong Zhang 21205f44c854SHong Zhang /* allocate working arrays 21215f44c854SHong Zhang c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 21225f44c854SHong Zhang il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 21235f44c854SHong Zhang */ 21240e83c824SBarry Smith ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr); 21255f44c854SHong Zhang 212698a93161SHong Zhang do { 212707b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 212898a93161SHong Zhang 2129c5546cabSHong Zhang for (i=0; i<mbs; i++) c2r[i] = mbs; 21302e987568SSatish Balay if (mbs) il[0] = 0; 21315f44c854SHong Zhang 21325f44c854SHong Zhang for (k = 0; k<mbs; k++) { 21335f44c854SHong Zhang /* zero rtmp */ 21345f44c854SHong Zhang nz = bi[k+1] - bi[k]; 21355f44c854SHong Zhang bjtmp = bj + bi[k]; 21365f44c854SHong Zhang for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 21375f44c854SHong Zhang 21385f44c854SHong Zhang /* load in initial unfactored row */ 21395f44c854SHong Zhang bval = ba + bi[k]; 21405f44c854SHong Zhang jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 21415f44c854SHong Zhang for (j = jmin; j < jmax; j++) { 21425f44c854SHong Zhang col = riip[aj[j]]; 21435f44c854SHong Zhang if (col >= k) { /* only take upper triangular entry */ 21445f44c854SHong Zhang rtmp[col] = aa[j]; 21455f44c854SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 21465f44c854SHong Zhang } 21475f44c854SHong Zhang } 214898a93161SHong Zhang /* shift the diagonal of the matrix: ZeropivotApply() */ 214998a93161SHong Zhang rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 21505f44c854SHong Zhang 21515f44c854SHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 21525f44c854SHong Zhang dk = rtmp[k]; 21535f44c854SHong Zhang i = c2r[k]; /* first row to be added to k_th row */ 21545f44c854SHong Zhang 21555f44c854SHong Zhang while (i < k) { 21565f44c854SHong Zhang nexti = c2r[i]; /* next row to be added to k_th row */ 21575f44c854SHong Zhang 21585f44c854SHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 21595f44c854SHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 21605f44c854SHong Zhang uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 21615f44c854SHong Zhang dk += uikdi*ba[ili]; /* update diag[k] */ 21625f44c854SHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 21635f44c854SHong Zhang 21645f44c854SHong Zhang /* add multiple of row i to k-th row */ 21655f44c854SHong Zhang jmin = ili + 1; jmax = bi[i+1]; 21665f44c854SHong Zhang if (jmin < jmax) { 21675f44c854SHong Zhang for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 21685f44c854SHong Zhang /* update il and c2r for row i */ 21695f44c854SHong Zhang il[i] = jmin; 21705f44c854SHong Zhang j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 21715f44c854SHong Zhang } 21725f44c854SHong Zhang i = nexti; 21735f44c854SHong Zhang } 21745f44c854SHong Zhang 21755f44c854SHong Zhang /* copy data into U(k,:) */ 217698a93161SHong Zhang rs = 0.0; 21775f44c854SHong Zhang jmin = bi[k]; jmax = bi[k+1]-1; 21785f44c854SHong Zhang if (jmin < jmax) { 21795f44c854SHong Zhang for (j=jmin; j<jmax; j++) { 218098a93161SHong Zhang col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 21815f44c854SHong Zhang } 21825f44c854SHong Zhang /* add the k-th row into il and c2r */ 21835f44c854SHong Zhang il[k] = jmin; 21845f44c854SHong Zhang i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 21855f44c854SHong Zhang } 218698a93161SHong Zhang 218798a93161SHong Zhang /* MatPivotCheck() */ 218898a93161SHong Zhang sctx.rs = rs; 218998a93161SHong Zhang sctx.pv = dk; 2190d0660788SBarry Smith ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 219107b50cabSHong Zhang if (sctx.newshift) break; 219298a93161SHong Zhang dk = sctx.pv; 219398a93161SHong Zhang 219498a93161SHong Zhang ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 219598a93161SHong Zhang } 219607b50cabSHong Zhang } while (sctx.newshift); 21975f44c854SHong Zhang 21980e83c824SBarry Smith ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 21995f44c854SHong Zhang ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 22005f44c854SHong Zhang ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 22015f44c854SHong Zhang 22025f44c854SHong Zhang ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 22035f44c854SHong Zhang if (perm_identity) { 220435233d99SShri Abhyankar B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 220535233d99SShri Abhyankar B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 22062169352eSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 22072169352eSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 22085f44c854SHong Zhang } else { 220935233d99SShri Abhyankar B->ops->solve = MatSolve_SeqSBAIJ_1; 221035233d99SShri Abhyankar B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 221135233d99SShri Abhyankar B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 221235233d99SShri Abhyankar B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 22135f44c854SHong Zhang } 22145f44c854SHong Zhang 22155f44c854SHong Zhang C->assembled = PETSC_TRUE; 22165f44c854SHong Zhang C->preallocated = PETSC_TRUE; 22175f44c854SHong Zhang ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 221898a93161SHong Zhang 221998a93161SHong Zhang /* MatPivotView() */ 222098a93161SHong Zhang if (sctx.nshift) { 2221f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 222298a93161SHong Zhang ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 2223f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 222498a93161SHong Zhang ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2225f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2226b2a402fcSHong Zhang ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 222798a93161SHong Zhang } 222898a93161SHong Zhang } 22295f44c854SHong Zhang PetscFunctionReturn(0); 22305f44c854SHong Zhang } 22315f44c854SHong Zhang 22325f44c854SHong Zhang #undef __FUNCT__ 22330a3c351aSHong Zhang #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace" 22340a3c351aSHong Zhang PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2235a6175056SHong Zhang { 2236719d5645SBarry Smith Mat C = B; 22370968510aSHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 22382ed133dbSHong Zhang Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 22399bfd6278SHong Zhang IS ip=b->row,iip = b->icol; 22402ed133dbSHong Zhang PetscErrorCode ierr; 22415d0c19d7SBarry Smith const PetscInt *rip,*riip; 2242c5546cabSHong Zhang PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 22432ed133dbSHong Zhang PetscInt *ai=a->i,*aj=a->j; 22442ed133dbSHong Zhang PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2245622e2028SHong Zhang MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2246ace3abfcSBarry Smith PetscBool perm_identity; 22470e95ead3SHong Zhang FactorShiftCtx sctx; 22480e95ead3SHong Zhang PetscReal rs; 22490e95ead3SHong Zhang MatScalar d,*v; 2250d5d45c9bSBarry Smith 2251a6175056SHong Zhang PetscFunctionBegin; 22520e95ead3SHong Zhang /* MatPivotSetUp(): initialize shift context sctx */ 22530e95ead3SHong Zhang ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 22540e95ead3SHong Zhang 22550e95ead3SHong Zhang if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 22560e95ead3SHong Zhang sctx.shift_top = info->zeropivot; 22570e95ead3SHong Zhang for (i=0; i<mbs; i++) { 22580e95ead3SHong Zhang /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 22590e95ead3SHong Zhang d = (aa)[a->diag[i]]; 22600e95ead3SHong Zhang rs = -PetscAbsScalar(d) - PetscRealPart(d); 22610e95ead3SHong Zhang v = aa+ai[i]; 22620e95ead3SHong Zhang nz = ai[i+1] - ai[i]; 22630e95ead3SHong Zhang for (j=0; j<nz; j++) 22640e95ead3SHong Zhang rs += PetscAbsScalar(v[j]); 22650e95ead3SHong Zhang if (rs>sctx.shift_top) sctx.shift_top = rs; 22660e95ead3SHong Zhang } 22670e95ead3SHong Zhang sctx.shift_top *= 1.1; 22680e95ead3SHong Zhang sctx.nshift_max = 5; 22690e95ead3SHong Zhang sctx.shift_lo = 0.; 22700e95ead3SHong Zhang sctx.shift_hi = 1.; 22710e95ead3SHong Zhang } 2272ee45ca4aSHong Zhang 22732ed133dbSHong Zhang ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 22749bfd6278SHong Zhang ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 22752ed133dbSHong Zhang 22762ed133dbSHong Zhang /* initialization */ 22770e83c824SBarry Smith ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 22780e95ead3SHong Zhang 22792ed133dbSHong Zhang do { 228007b50cabSHong Zhang sctx.newshift = PETSC_FALSE; 22810e95ead3SHong Zhang 2282c5546cabSHong Zhang for (i=0; i<mbs; i++) jl[i] = mbs; 2283c5546cabSHong Zhang il[0] = 0; 22842ed133dbSHong Zhang 22852ed133dbSHong Zhang for (k = 0; k<mbs; k++) { 2286c5546cabSHong Zhang /* zero rtmp */ 2287c5546cabSHong Zhang nz = bi[k+1] - bi[k]; 2288c5546cabSHong Zhang bjtmp = bj + bi[k]; 2289c5546cabSHong Zhang for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2290c5546cabSHong Zhang 2291c05c3958SHong Zhang bval = ba + bi[k]; 22922ed133dbSHong Zhang /* initialize k-th row by the perm[k]-th row of A */ 22932ed133dbSHong Zhang jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 22942ed133dbSHong Zhang for (j = jmin; j < jmax; j++) { 22959bfd6278SHong Zhang col = riip[aj[j]]; 22962ed133dbSHong Zhang if (col >= k) { /* only take upper triangular entry */ 22972ed133dbSHong Zhang rtmp[col] = aa[j]; 2298c05c3958SHong Zhang *bval++ = 0.0; /* for in-place factorization */ 22992ed133dbSHong Zhang } 23002ed133dbSHong Zhang } 230139e3d630SHong Zhang /* shift the diagonal of the matrix */ 2302540703acSHong Zhang if (sctx.nshift) rtmp[k] += sctx.shift_amount; 23032ed133dbSHong Zhang 23042ed133dbSHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 23052ed133dbSHong Zhang dk = rtmp[k]; 23062ed133dbSHong Zhang i = jl[k]; /* first row to be added to k_th row */ 23072ed133dbSHong Zhang 23082ed133dbSHong Zhang while (i < k) { 23092ed133dbSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */ 23102ed133dbSHong Zhang 23112ed133dbSHong Zhang /* compute multiplier, update diag(k) and U(i,k) */ 23122ed133dbSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 23132ed133dbSHong Zhang uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 23142ed133dbSHong Zhang dk += uikdi*ba[ili]; 23152ed133dbSHong Zhang ba[ili] = uikdi; /* -U(i,k) */ 23162ed133dbSHong Zhang 23172ed133dbSHong Zhang /* add multiple of row i to k-th row */ 23182ed133dbSHong Zhang jmin = ili + 1; jmax = bi[i+1]; 23192ed133dbSHong Zhang if (jmin < jmax) { 23202ed133dbSHong Zhang for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 23212ed133dbSHong Zhang /* update il and jl for row i */ 23222ed133dbSHong Zhang il[i] = jmin; 23232ed133dbSHong Zhang j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 23242ed133dbSHong Zhang } 23252ed133dbSHong Zhang i = nexti; 23262ed133dbSHong Zhang } 23272ed133dbSHong Zhang 23280697b6caSHong Zhang /* shift the diagonals when zero pivot is detected */ 23290697b6caSHong Zhang /* compute rs=sum of abs(off-diagonal) */ 23300697b6caSHong Zhang rs = 0.0; 23313c9e8598SHong Zhang jmin = bi[k]+1; 23323c9e8598SHong Zhang nz = bi[k+1] - jmin; 23333c9e8598SHong Zhang bcol = bj + jmin; 233404fbf559SBarry Smith for (j=0; j<nz; j++) { 233504fbf559SBarry Smith rs += PetscAbsScalar(rtmp[bcol[j]]); 23363c9e8598SHong Zhang } 2337540703acSHong Zhang 2338540703acSHong Zhang sctx.rs = rs; 2339540703acSHong Zhang sctx.pv = dk; 2340d0660788SBarry Smith ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr); 234107b50cabSHong Zhang if (sctx.newshift) break; 23420e95ead3SHong Zhang dk = sctx.pv; 23433c9e8598SHong Zhang 23443c9e8598SHong Zhang /* copy data into U(k,:) */ 234539e3d630SHong Zhang ba[bi[k]] = 1.0/dk; /* U(k,k) */ 234639e3d630SHong Zhang jmin = bi[k]+1; jmax = bi[k+1]; 234739e3d630SHong Zhang if (jmin < jmax) { 234839e3d630SHong Zhang for (j=jmin; j<jmax; j++) { 2349c6ae9bfcSHong Zhang col = bj[j]; ba[j] = rtmp[col]; 23503c9e8598SHong Zhang } 235139e3d630SHong Zhang /* add the k-th row into il and jl */ 23523c9e8598SHong Zhang il[k] = jmin; 23533c9e8598SHong Zhang i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 23543c9e8598SHong Zhang } 23553c9e8598SHong Zhang } 235607b50cabSHong Zhang } while (sctx.newshift); 23570e95ead3SHong Zhang 23580e83c824SBarry Smith ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 235939e3d630SHong Zhang ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 23609bfd6278SHong Zhang ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2361db4efbfdSBarry Smith 2362db4efbfdSBarry Smith ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2363db4efbfdSBarry Smith if (perm_identity) { 23640a3c351aSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 23650a3c351aSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 23660a3c351aSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 23670a3c351aSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2368db4efbfdSBarry Smith } else { 23690a3c351aSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 23700a3c351aSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 23710a3c351aSHong Zhang B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 23720a3c351aSHong Zhang B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2373db4efbfdSBarry Smith } 2374db4efbfdSBarry Smith 23753c9e8598SHong Zhang C->assembled = PETSC_TRUE; 23763c9e8598SHong Zhang C->preallocated = PETSC_TRUE; 2377d0f46423SBarry Smith ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2378540703acSHong Zhang if (sctx.nshift) { 2379f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 23801e2582c4SBarry Smith ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2381f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 23821e2582c4SBarry Smith ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 23830697b6caSHong Zhang } 23843c9e8598SHong Zhang } 23853c9e8598SHong Zhang PetscFunctionReturn(0); 23863c9e8598SHong Zhang } 2387a6175056SHong Zhang 23888a76ed4fSHong Zhang /* 23898a76ed4fSHong Zhang icc() under revised new data structure. 23908a76ed4fSHong Zhang Factored arrays bj and ba are stored as 23918a76ed4fSHong Zhang U(0,:),...,U(i,:),U(n-1,:) 23928a76ed4fSHong Zhang 23938a76ed4fSHong Zhang ui=fact->i is an array of size n+1, in which 23948a76ed4fSHong Zhang ui+ 23958a76ed4fSHong Zhang ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 23968a76ed4fSHong Zhang ui[n]: points to U(n-1,n-1)+1 23978a76ed4fSHong Zhang 23988a76ed4fSHong Zhang udiag=fact->diag is an array of size n,in which 23998a76ed4fSHong Zhang udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 24008a76ed4fSHong Zhang 24018a76ed4fSHong Zhang U(i,:) contains udiag[i] as its last entry, i.e., 24028a76ed4fSHong Zhang U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 24038a76ed4fSHong Zhang */ 24048a76ed4fSHong Zhang 2405a6175056SHong Zhang #undef __FUNCT__ 240635233d99SShri Abhyankar #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 240735233d99SShri Abhyankar PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2408a6175056SHong Zhang { 24090968510aSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2410ed59401aSHong Zhang Mat_SeqSBAIJ *b; 2411dfbe8321SBarry Smith PetscErrorCode ierr; 2412ace3abfcSBarry Smith PetscBool perm_identity,missing; 24135f44c854SHong Zhang PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 24145d0c19d7SBarry Smith const PetscInt *rip,*riip; 2415ed59401aSHong Zhang PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 241658ebbce7SBarry Smith PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 24175a8e39fbSHong Zhang PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2418ed59401aSHong Zhang PetscReal fill=info->fill,levels=info->levels; 2419a1a86e44SBarry Smith PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2420a1a86e44SBarry Smith PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 24217a48dd6fSHong Zhang PetscBT lnkbt; 2422b635d36bSHong Zhang IS iperm; 2423a6175056SHong Zhang 2424a6175056SHong Zhang PetscFunctionBegin; 2425e32f2f54SBarry Smith if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 242658ebbce7SBarry Smith ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2427e32f2f54SBarry Smith if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2428653a6975SHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2429b635d36bSHong Zhang ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 24307a48dd6fSHong Zhang 243139e3d630SHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 24325f44c854SHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 243339e3d630SHong Zhang ui[0] = 0; 243439e3d630SHong Zhang 24358a76ed4fSHong Zhang /* ICC(0) without matrix ordering: simply rearrange column indices */ 2436622e2028SHong Zhang if (!levels && perm_identity) { 24375f44c854SHong Zhang for (i=0; i<am; i++) { 2438c5546cabSHong Zhang ncols = ai[i+1] - a->diag[i]; 2439c5546cabSHong Zhang ui[i+1] = ui[i] + ncols; 2440c5546cabSHong Zhang udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2441dc1e2a3fSHong Zhang } 2442dc1e2a3fSHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2443dc1e2a3fSHong Zhang cols = uj; 2444dc1e2a3fSHong Zhang for (i=0; i<am; i++) { 24455f44c854SHong Zhang aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2446dc1e2a3fSHong Zhang ncols = ai[i+1] - a->diag[i] -1; 24475f44c854SHong Zhang for (j=0; j<ncols; j++) *cols++ = aj[j]; 24485f44c854SHong Zhang *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 24495f44c854SHong Zhang } 24505f44c854SHong Zhang } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 24515f44c854SHong Zhang ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 24525f44c854SHong Zhang ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 24535f44c854SHong Zhang 24545f44c854SHong Zhang /* initialization */ 24555f44c854SHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 24565f44c854SHong Zhang 24575f44c854SHong Zhang /* jl: linked list for storing indices of the pivot rows 24585f44c854SHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 24590e83c824SBarry Smith ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 24605f44c854SHong Zhang for (i=0; i<am; i++) { 24615f44c854SHong Zhang jl[i] = am; il[i] = 0; 24625f44c854SHong Zhang } 24635f44c854SHong Zhang 24645f44c854SHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 24655f44c854SHong Zhang nlnk = am + 1; 24665f44c854SHong Zhang ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 24675f44c854SHong Zhang 246895b5ac22SHong Zhang /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 246995b5ac22SHong Zhang ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 24705f44c854SHong Zhang current_space = free_space; 247195b5ac22SHong Zhang ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 24725f44c854SHong Zhang current_space_lvl = free_space_lvl; 24735f44c854SHong Zhang 24745f44c854SHong Zhang for (k=0; k<am; k++) { /* for each active row k */ 24755f44c854SHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 24765f44c854SHong Zhang nzk = 0; 24775f44c854SHong Zhang ncols = ai[rip[k]+1] - ai[rip[k]]; 2478e32f2f54SBarry Smith if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 24795f44c854SHong Zhang ncols_upper = 0; 24805f44c854SHong Zhang for (j=0; j<ncols; j++) { 24815f44c854SHong Zhang i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 24825f44c854SHong Zhang if (riip[i] >= k) { /* only take upper triangular entry */ 24835f44c854SHong Zhang ajtmp[ncols_upper] = i; 24845f44c854SHong Zhang ncols_upper++; 24855f44c854SHong Zhang } 24865f44c854SHong Zhang } 24875f44c854SHong Zhang ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 24885f44c854SHong Zhang nzk += nlnk; 24895f44c854SHong Zhang 24905f44c854SHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 24915f44c854SHong Zhang prow = jl[k]; /* 1st pivot row */ 24925f44c854SHong Zhang 24935f44c854SHong Zhang while (prow < k) { 24945f44c854SHong Zhang nextprow = jl[prow]; 24955f44c854SHong Zhang 24965f44c854SHong Zhang /* merge prow into k-th row */ 24975f44c854SHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 24985f44c854SHong Zhang jmax = ui[prow+1]; 24995f44c854SHong Zhang ncols = jmax-jmin; 25005f44c854SHong Zhang i = jmin - ui[prow]; 25015f44c854SHong Zhang cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 25025f44c854SHong Zhang uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 25035f44c854SHong Zhang j = *(uj - 1); 25045f44c854SHong Zhang ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 25055f44c854SHong Zhang nzk += nlnk; 25065f44c854SHong Zhang 25075f44c854SHong Zhang /* update il and jl for prow */ 25085f44c854SHong Zhang if (jmin < jmax) { 25095f44c854SHong Zhang il[prow] = jmin; 25105f44c854SHong Zhang j = *cols; jl[prow] = jl[j]; jl[j] = prow; 25115f44c854SHong Zhang } 25125f44c854SHong Zhang prow = nextprow; 25135f44c854SHong Zhang } 25145f44c854SHong Zhang 25155f44c854SHong Zhang /* if free space is not available, make more free space */ 25165f44c854SHong Zhang if (current_space->local_remaining<nzk) { 25175f44c854SHong Zhang i = am - k + 1; /* num of unfactored rows */ 2518c6ae9bfcSHong Zhang i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 25195f44c854SHong Zhang ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 25205f44c854SHong Zhang ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 25215f44c854SHong Zhang reallocs++; 25225f44c854SHong Zhang } 25235f44c854SHong Zhang 25245f44c854SHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 2525e32f2f54SBarry Smith if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 25265f44c854SHong Zhang ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 25275f44c854SHong Zhang 25285f44c854SHong Zhang /* add the k-th row into il and jl */ 25295f44c854SHong Zhang if (nzk > 1) { 25305f44c854SHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 25315f44c854SHong Zhang jl[k] = jl[i]; jl[i] = k; 25325f44c854SHong Zhang il[k] = ui[k] + 1; 25335f44c854SHong Zhang } 25345f44c854SHong Zhang uj_ptr[k] = current_space->array; 25355f44c854SHong Zhang uj_lvl_ptr[k] = current_space_lvl->array; 25365f44c854SHong Zhang 25375f44c854SHong Zhang current_space->array += nzk; 25385f44c854SHong Zhang current_space->local_used += nzk; 25395f44c854SHong Zhang current_space->local_remaining -= nzk; 25405f44c854SHong Zhang 25415f44c854SHong Zhang current_space_lvl->array += nzk; 25425f44c854SHong Zhang current_space_lvl->local_used += nzk; 25435f44c854SHong Zhang current_space_lvl->local_remaining -= nzk; 25445f44c854SHong Zhang 25455f44c854SHong Zhang ui[k+1] = ui[k] + nzk; 25465f44c854SHong Zhang } 25475f44c854SHong Zhang 25485f44c854SHong Zhang ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 25495f44c854SHong Zhang ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 25500e83c824SBarry Smith ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 25515f44c854SHong Zhang ierr = PetscFree(ajtmp);CHKERRQ(ierr); 25525f44c854SHong Zhang 25539263d837SHong Zhang /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 25545f44c854SHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 255535233d99SShri Abhyankar ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 25565f44c854SHong Zhang ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 25575f44c854SHong Zhang ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 25585f44c854SHong Zhang 25595f44c854SHong Zhang } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 25605f44c854SHong Zhang 25615f44c854SHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 25625f44c854SHong Zhang b = (Mat_SeqSBAIJ*)(fact)->data; 25635f44c854SHong Zhang b->singlemalloc = PETSC_FALSE; 25645f44c854SHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 25655f44c854SHong Zhang b->j = uj; 25665f44c854SHong Zhang b->i = ui; 25675f44c854SHong Zhang b->diag = udiag; 25687f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 25695f44c854SHong Zhang b->ilen = 0; 25705f44c854SHong Zhang b->imax = 0; 25715f44c854SHong Zhang b->row = perm; 25725f44c854SHong Zhang b->col = perm; 25735f44c854SHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 25745f44c854SHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 25755f44c854SHong Zhang b->icol = iperm; 25765f44c854SHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 25775f44c854SHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2578d595f711SHong Zhang ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 25795f44c854SHong Zhang b->maxnz = b->nz = ui[am]; 25805f44c854SHong Zhang b->free_a = PETSC_TRUE; 25815f44c854SHong Zhang b->free_ij = PETSC_TRUE; 25825f44c854SHong Zhang 2583f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2584f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 25855f44c854SHong Zhang if (ai[am] != 0) { 25866a69fef8SHong Zhang /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 258795b5ac22SHong Zhang fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 25885f44c854SHong Zhang } else { 2589f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 25905f44c854SHong Zhang } 25919263d837SHong Zhang #if defined(PETSC_USE_INFO) 25929263d837SHong Zhang if (ai[am] != 0) { 25939263d837SHong Zhang PetscReal af = fact->info.fill_ratio_needed; 25949263d837SHong Zhang ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 25959263d837SHong Zhang ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 25969263d837SHong Zhang ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 25979263d837SHong Zhang } else { 25989263d837SHong Zhang ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 25999263d837SHong Zhang } 26009263d837SHong Zhang #endif 260135233d99SShri Abhyankar fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 26025f44c854SHong Zhang PetscFunctionReturn(0); 26035f44c854SHong Zhang } 26045f44c854SHong Zhang 26055f44c854SHong Zhang #undef __FUNCT__ 2606ad04f41aSHong Zhang #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace" 2607ad04f41aSHong Zhang PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 26085f44c854SHong Zhang { 26095f44c854SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 26105f44c854SHong Zhang Mat_SeqSBAIJ *b; 26115f44c854SHong Zhang PetscErrorCode ierr; 2612ace3abfcSBarry Smith PetscBool perm_identity,missing; 26135f44c854SHong Zhang PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 26145f44c854SHong Zhang const PetscInt *rip,*riip; 26155f44c854SHong Zhang PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 26165f44c854SHong Zhang PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 26175f44c854SHong Zhang PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 26185f44c854SHong Zhang PetscReal fill=info->fill,levels=info->levels; 26195f44c854SHong Zhang PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 26205f44c854SHong Zhang PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 26215f44c854SHong Zhang PetscBT lnkbt; 26225f44c854SHong Zhang IS iperm; 26235f44c854SHong Zhang 26245f44c854SHong Zhang PetscFunctionBegin; 2625e32f2f54SBarry Smith if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 26265f44c854SHong Zhang ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2627e32f2f54SBarry Smith if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 26285f44c854SHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 26295f44c854SHong Zhang ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 26305f44c854SHong Zhang 26315f44c854SHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 26325f44c854SHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 26335f44c854SHong Zhang ui[0] = 0; 26345f44c854SHong Zhang 26355f44c854SHong Zhang /* ICC(0) without matrix ordering: simply copies fill pattern */ 26365f44c854SHong Zhang if (!levels && perm_identity) { 26375f44c854SHong Zhang 26385f44c854SHong Zhang for (i=0; i<am; i++) { 26395f44c854SHong Zhang ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 26405f44c854SHong Zhang udiag[i] = ui[i]; 2641ed59401aSHong Zhang } 264239e3d630SHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 264339e3d630SHong Zhang cols = uj; 2644ed59401aSHong Zhang for (i=0; i<am; i++) { 2645ed59401aSHong Zhang aj = a->j + a->diag[i]; 264639e3d630SHong Zhang ncols = ui[i+1] - ui[i]; 264739e3d630SHong Zhang for (j=0; j<ncols; j++) *cols++ = *aj++; 2648ed59401aSHong Zhang } 264939e3d630SHong Zhang } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2650b635d36bSHong Zhang ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2651b635d36bSHong Zhang ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2652b635d36bSHong Zhang 26537a48dd6fSHong Zhang /* initialization */ 26545a8e39fbSHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 26557a48dd6fSHong Zhang 26567a48dd6fSHong Zhang /* jl: linked list for storing indices of the pivot rows 26577a48dd6fSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 26580e83c824SBarry Smith ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 26597a48dd6fSHong Zhang for (i=0; i<am; i++) { 26607a48dd6fSHong Zhang jl[i] = am; il[i] = 0; 26617a48dd6fSHong Zhang } 26627a48dd6fSHong Zhang 26637a48dd6fSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 26647a48dd6fSHong Zhang nlnk = am + 1; 26652ed133dbSHong Zhang ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 26667a48dd6fSHong Zhang 26677a48dd6fSHong Zhang /* initial FreeSpace size is fill*(ai[am]+1) */ 2668a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 26697a48dd6fSHong Zhang current_space = free_space; 2670a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 26717a48dd6fSHong Zhang current_space_lvl = free_space_lvl; 26727a48dd6fSHong Zhang 26737a48dd6fSHong Zhang for (k=0; k<am; k++) { /* for each active row k */ 26747a48dd6fSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 26757a48dd6fSHong Zhang nzk = 0; 2676622e2028SHong Zhang ncols = ai[rip[k]+1] - ai[rip[k]]; 2677e32f2f54SBarry Smith if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2678622e2028SHong Zhang ncols_upper = 0; 2679622e2028SHong Zhang for (j=0; j<ncols; j++) { 2680b635d36bSHong Zhang i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2681b635d36bSHong Zhang if (riip[i] >= k) { /* only take upper triangular entry */ 26825a8e39fbSHong Zhang ajtmp[ncols_upper] = i; 2683622e2028SHong Zhang ncols_upper++; 2684622e2028SHong Zhang } 2685622e2028SHong Zhang } 2686b635d36bSHong Zhang ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 26877a48dd6fSHong Zhang nzk += nlnk; 26887a48dd6fSHong Zhang 26897a48dd6fSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 26907a48dd6fSHong Zhang prow = jl[k]; /* 1st pivot row */ 26917a48dd6fSHong Zhang 26927a48dd6fSHong Zhang while (prow < k) { 26937a48dd6fSHong Zhang nextprow = jl[prow]; 26947a48dd6fSHong Zhang 26957a48dd6fSHong Zhang /* merge prow into k-th row */ 26967a48dd6fSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 26977a48dd6fSHong Zhang jmax = ui[prow+1]; 26987a48dd6fSHong Zhang ncols = jmax-jmin; 2699ed59401aSHong Zhang i = jmin - ui[prow]; 2700ed59401aSHong Zhang cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 27015a8e39fbSHong Zhang uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 27025a8e39fbSHong Zhang j = *(uj - 1); 27035a8e39fbSHong Zhang ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 27047a48dd6fSHong Zhang nzk += nlnk; 27057a48dd6fSHong Zhang 27067a48dd6fSHong Zhang /* update il and jl for prow */ 27077a48dd6fSHong Zhang if (jmin < jmax) { 27087a48dd6fSHong Zhang il[prow] = jmin; 27097a48dd6fSHong Zhang j = *cols; jl[prow] = jl[j]; jl[j] = prow; 27107a48dd6fSHong Zhang } 27117a48dd6fSHong Zhang prow = nextprow; 27127a48dd6fSHong Zhang } 27137a48dd6fSHong Zhang 27147a48dd6fSHong Zhang /* if free space is not available, make more free space */ 27157a48dd6fSHong Zhang if (current_space->local_remaining<nzk) { 27167a48dd6fSHong Zhang i = am - k + 1; /* num of unfactored rows */ 2717fd3f55cbSMatthew Knepley i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2718a1a86e44SBarry Smith ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2719a1a86e44SBarry Smith ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 27207a48dd6fSHong Zhang reallocs++; 27217a48dd6fSHong Zhang } 27227a48dd6fSHong Zhang 27237a48dd6fSHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */ 272465e19b50SBarry Smith if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 27252ed133dbSHong Zhang ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 27267a48dd6fSHong Zhang 27277a48dd6fSHong Zhang /* add the k-th row into il and jl */ 272839e3d630SHong Zhang if (nzk > 1) { 27297a48dd6fSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 27307a48dd6fSHong Zhang jl[k] = jl[i]; jl[i] = k; 27317a48dd6fSHong Zhang il[k] = ui[k] + 1; 27327a48dd6fSHong Zhang } 27337a48dd6fSHong Zhang uj_ptr[k] = current_space->array; 27347a48dd6fSHong Zhang uj_lvl_ptr[k] = current_space_lvl->array; 27357a48dd6fSHong Zhang 27367a48dd6fSHong Zhang current_space->array += nzk; 27377a48dd6fSHong Zhang current_space->local_used += nzk; 27387a48dd6fSHong Zhang current_space->local_remaining -= nzk; 27397a48dd6fSHong Zhang 27407a48dd6fSHong Zhang current_space_lvl->array += nzk; 27417a48dd6fSHong Zhang current_space_lvl->local_used += nzk; 27427a48dd6fSHong Zhang current_space_lvl->local_remaining -= nzk; 27437a48dd6fSHong Zhang 27447a48dd6fSHong Zhang ui[k+1] = ui[k] + nzk; 27457a48dd6fSHong Zhang } 27467a48dd6fSHong Zhang 27476cf91177SBarry Smith #if defined(PETSC_USE_INFO) 27487a48dd6fSHong Zhang if (ai[am] != 0) { 274939e3d630SHong Zhang PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2750ae15b995SBarry Smith ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2751ae15b995SBarry Smith ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2752ae15b995SBarry Smith ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 27537a48dd6fSHong Zhang } else { 2754ae15b995SBarry Smith ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 27557a48dd6fSHong Zhang } 275663ba0a88SBarry Smith #endif 27577a48dd6fSHong Zhang 27587a48dd6fSHong Zhang ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2759b635d36bSHong Zhang ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 27600e83c824SBarry Smith ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 27615a8e39fbSHong Zhang ierr = PetscFree(ajtmp);CHKERRQ(ierr); 27627a48dd6fSHong Zhang 27637a48dd6fSHong Zhang /* destroy list of free space and other temporary array(s) */ 27647a48dd6fSHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2765a1a86e44SBarry Smith ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 27662ed133dbSHong Zhang ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2767a1a86e44SBarry Smith ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 27687a48dd6fSHong Zhang 276939e3d630SHong Zhang } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 277039e3d630SHong Zhang 27717a48dd6fSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 27727a48dd6fSHong Zhang 2773f284f12aSHong Zhang b = (Mat_SeqSBAIJ*)fact->data; 27747a48dd6fSHong Zhang b->singlemalloc = PETSC_FALSE; 27757a48dd6fSHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 27767a48dd6fSHong Zhang b->j = uj; 27777a48dd6fSHong Zhang b->i = ui; 27785f44c854SHong Zhang b->diag = udiag; 27797f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 27807a48dd6fSHong Zhang b->ilen = 0; 27817a48dd6fSHong Zhang b->imax = 0; 27827a48dd6fSHong Zhang b->row = perm; 2783b635d36bSHong Zhang b->col = perm; 2784b635d36bSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2785b635d36bSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2786b635d36bSHong Zhang b->icol = iperm; 27877a48dd6fSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 27887a48dd6fSHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2789f284f12aSHong Zhang ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 27907a48dd6fSHong Zhang b->maxnz = b->nz = ui[am]; 2791e6b907acSBarry Smith b->free_a = PETSC_TRUE; 2792e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 27937a48dd6fSHong Zhang 2794f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2795f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 27967a48dd6fSHong Zhang if (ai[am] != 0) { 2797f284f12aSHong Zhang fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 27987a48dd6fSHong Zhang } else { 2799f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 28007a48dd6fSHong Zhang } 28010a3c351aSHong Zhang fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2802a6175056SHong Zhang PetscFunctionReturn(0); 2803a6175056SHong Zhang } 2804d5d45c9bSBarry Smith 2805177d4faaSHong Zhang #undef __FUNCT__ 2806177d4faaSHong Zhang #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 280735233d99SShri Abhyankar PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 28080be760fbSHong Zhang { 28090be760fbSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 28100be760fbSHong Zhang Mat_SeqSBAIJ *b; 28110be760fbSHong Zhang PetscErrorCode ierr; 2812ace3abfcSBarry Smith PetscBool perm_identity; 28130be760fbSHong Zhang PetscReal fill = info->fill; 28140be760fbSHong Zhang const PetscInt *rip,*riip; 28150be760fbSHong Zhang PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 28160be760fbSHong Zhang PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 28170be760fbSHong Zhang PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 28180be760fbSHong Zhang PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 28190be760fbSHong Zhang PetscBT lnkbt; 28200be760fbSHong Zhang IS iperm; 28210be760fbSHong Zhang 28220be760fbSHong Zhang PetscFunctionBegin; 2823e32f2f54SBarry Smith if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 28240be760fbSHong Zhang /* check whether perm is the identity mapping */ 28250be760fbSHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 28260be760fbSHong Zhang ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 28270be760fbSHong Zhang ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 28280be760fbSHong Zhang ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 28290be760fbSHong Zhang 28300be760fbSHong Zhang /* initialization */ 28310be760fbSHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 28320be760fbSHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 28330be760fbSHong Zhang ui[0] = 0; 28340be760fbSHong Zhang 28350be760fbSHong Zhang /* jl: linked list for storing indices of the pivot rows 28360be760fbSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 28370be760fbSHong Zhang ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 28380be760fbSHong Zhang for (i=0; i<am; i++) { 28390be760fbSHong Zhang jl[i] = am; il[i] = 0; 28400be760fbSHong Zhang } 28410be760fbSHong Zhang 28420be760fbSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 28430be760fbSHong Zhang nlnk = am + 1; 28440be760fbSHong Zhang ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 28450be760fbSHong Zhang 284695b5ac22SHong Zhang /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 284795b5ac22SHong Zhang ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 28480be760fbSHong Zhang current_space = free_space; 28490be760fbSHong Zhang 28500be760fbSHong Zhang for (k=0; k<am; k++) { /* for each active row k */ 28510be760fbSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 28520be760fbSHong Zhang nzk = 0; 28530be760fbSHong Zhang ncols = ai[rip[k]+1] - ai[rip[k]]; 2854e32f2f54SBarry Smith if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 28550be760fbSHong Zhang ncols_upper = 0; 28560be760fbSHong Zhang for (j=0; j<ncols; j++) { 28570be760fbSHong Zhang i = riip[*(aj + ai[rip[k]] + j)]; 28580be760fbSHong Zhang if (i >= k) { /* only take upper triangular entry */ 28590be760fbSHong Zhang cols[ncols_upper] = i; 28600be760fbSHong Zhang ncols_upper++; 28610be760fbSHong Zhang } 28620be760fbSHong Zhang } 28630be760fbSHong Zhang ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 28640be760fbSHong Zhang nzk += nlnk; 28650be760fbSHong Zhang 28660be760fbSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 28670be760fbSHong Zhang prow = jl[k]; /* 1st pivot row */ 28680be760fbSHong Zhang 28690be760fbSHong Zhang while (prow < k) { 28700be760fbSHong Zhang nextprow = jl[prow]; 28710be760fbSHong Zhang /* merge prow into k-th row */ 28720be760fbSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 28730be760fbSHong Zhang jmax = ui[prow+1]; 28740be760fbSHong Zhang ncols = jmax-jmin; 28750be760fbSHong Zhang uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 28760be760fbSHong Zhang ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 28770be760fbSHong Zhang nzk += nlnk; 28780be760fbSHong Zhang 28790be760fbSHong Zhang /* update il and jl for prow */ 28800be760fbSHong Zhang if (jmin < jmax) { 28810be760fbSHong Zhang il[prow] = jmin; 28820be760fbSHong Zhang j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 28830be760fbSHong Zhang } 28840be760fbSHong Zhang prow = nextprow; 28850be760fbSHong Zhang } 28860be760fbSHong Zhang 28870be760fbSHong Zhang /* if free space is not available, make more free space */ 28880be760fbSHong Zhang if (current_space->local_remaining<nzk) { 28890be760fbSHong Zhang i = am - k + 1; /* num of unfactored rows */ 2890c6ae9bfcSHong Zhang i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 28910be760fbSHong Zhang ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 28920be760fbSHong Zhang reallocs++; 28930be760fbSHong Zhang } 28940be760fbSHong Zhang 28950be760fbSHong Zhang /* copy data into free space, then initialize lnk */ 28960be760fbSHong Zhang ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 28970be760fbSHong Zhang 28980be760fbSHong Zhang /* add the k-th row into il and jl */ 28997b056e98SHong Zhang if (nzk > 1) { 29000be760fbSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 29010be760fbSHong Zhang jl[k] = jl[i]; jl[i] = k; 29020be760fbSHong Zhang il[k] = ui[k] + 1; 29030be760fbSHong Zhang } 29040be760fbSHong Zhang ui_ptr[k] = current_space->array; 29050be760fbSHong Zhang current_space->array += nzk; 29060be760fbSHong Zhang current_space->local_used += nzk; 29070be760fbSHong Zhang current_space->local_remaining -= nzk; 29080be760fbSHong Zhang 29090be760fbSHong Zhang ui[k+1] = ui[k] + nzk; 29100be760fbSHong Zhang } 29110be760fbSHong Zhang 29120be760fbSHong Zhang ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 29130be760fbSHong Zhang ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 29140be760fbSHong Zhang ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 29150be760fbSHong Zhang 29169263d837SHong Zhang /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 29170be760fbSHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 291835233d99SShri Abhyankar ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 29190be760fbSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 29200be760fbSHong Zhang 29210be760fbSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 29220be760fbSHong Zhang 2923f284f12aSHong Zhang b = (Mat_SeqSBAIJ*)fact->data; 29240be760fbSHong Zhang b->singlemalloc = PETSC_FALSE; 29250be760fbSHong Zhang b->free_a = PETSC_TRUE; 29260be760fbSHong Zhang b->free_ij = PETSC_TRUE; 29270be760fbSHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 29280be760fbSHong Zhang b->j = uj; 29290be760fbSHong Zhang b->i = ui; 29300be760fbSHong Zhang b->diag = udiag; 29310be760fbSHong Zhang b->free_diag = PETSC_TRUE; 29320be760fbSHong Zhang b->ilen = 0; 29330be760fbSHong Zhang b->imax = 0; 29340be760fbSHong Zhang b->row = perm; 29350be760fbSHong Zhang b->col = perm; 29360be760fbSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 29370be760fbSHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 29380be760fbSHong Zhang b->icol = iperm; 29390be760fbSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 29400be760fbSHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 29410860e08cSHong Zhang ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 29420be760fbSHong Zhang b->maxnz = b->nz = ui[am]; 29430be760fbSHong Zhang 2944f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 2945f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 29460be760fbSHong Zhang if (ai[am] != 0) { 29476a69fef8SHong Zhang /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 294895b5ac22SHong Zhang fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 29490be760fbSHong Zhang } else { 2950f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 29510be760fbSHong Zhang } 29529263d837SHong Zhang #if defined(PETSC_USE_INFO) 29539263d837SHong Zhang if (ai[am] != 0) { 29549263d837SHong Zhang PetscReal af = fact->info.fill_ratio_needed; 29559263d837SHong Zhang ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 29569263d837SHong Zhang ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 29579263d837SHong Zhang ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 29589263d837SHong Zhang } else { 29599263d837SHong Zhang ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 29609263d837SHong Zhang } 29619263d837SHong Zhang #endif 296235233d99SShri Abhyankar fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 29630be760fbSHong Zhang PetscFunctionReturn(0); 29640be760fbSHong Zhang } 29650be760fbSHong Zhang 2966f76d2b81SHong Zhang #undef __FUNCT__ 2967ad04f41aSHong Zhang #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace" 2968ad04f41aSHong Zhang PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2969f76d2b81SHong Zhang { 2970f76d2b81SHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 297110c27e3dSHong Zhang Mat_SeqSBAIJ *b; 2972dfbe8321SBarry Smith PetscErrorCode ierr; 2973ace3abfcSBarry Smith PetscBool perm_identity; 297410c27e3dSHong Zhang PetscReal fill = info->fill; 29755d0c19d7SBarry Smith const PetscInt *rip,*riip; 29765d0c19d7SBarry Smith PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 297710c27e3dSHong Zhang PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 29782ed133dbSHong Zhang PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2979a1a86e44SBarry Smith PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 298010c27e3dSHong Zhang PetscBT lnkbt; 29812ed133dbSHong Zhang IS iperm; 2982f76d2b81SHong Zhang 2983f76d2b81SHong Zhang PetscFunctionBegin; 2984e32f2f54SBarry Smith if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 29852ed133dbSHong Zhang /* check whether perm is the identity mapping */ 2986f76d2b81SHong Zhang ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 29872ed133dbSHong Zhang ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 29882ed133dbSHong Zhang ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 29899bfd6278SHong Zhang ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 299010c27e3dSHong Zhang 299110c27e3dSHong Zhang /* initialization */ 29921393bc97SHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 299310c27e3dSHong Zhang ui[0] = 0; 299410c27e3dSHong Zhang 299510c27e3dSHong Zhang /* jl: linked list for storing indices of the pivot rows 29961393bc97SHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 29970e83c824SBarry Smith ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 29981393bc97SHong Zhang for (i=0; i<am; i++) { 29991393bc97SHong Zhang jl[i] = am; il[i] = 0; 3000f76d2b81SHong Zhang } 300110c27e3dSHong Zhang 300210c27e3dSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */ 30031393bc97SHong Zhang nlnk = am + 1; 30041393bc97SHong Zhang ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 300510c27e3dSHong Zhang 30061393bc97SHong Zhang /* initial FreeSpace size is fill*(ai[am]+1) */ 3007a1a86e44SBarry Smith ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 300810c27e3dSHong Zhang current_space = free_space; 300910c27e3dSHong Zhang 30101393bc97SHong Zhang for (k=0; k<am; k++) { /* for each active row k */ 301110c27e3dSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */ 301210c27e3dSHong Zhang nzk = 0; 30132ed133dbSHong Zhang ncols = ai[rip[k]+1] - ai[rip[k]]; 3014e32f2f54SBarry Smith if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 30152ed133dbSHong Zhang ncols_upper = 0; 30162ed133dbSHong Zhang for (j=0; j<ncols; j++) { 30179bfd6278SHong Zhang i = riip[*(aj + ai[rip[k]] + j)]; 30182ed133dbSHong Zhang if (i >= k) { /* only take upper triangular entry */ 30192ed133dbSHong Zhang cols[ncols_upper] = i; 30202ed133dbSHong Zhang ncols_upper++; 30212ed133dbSHong Zhang } 30222ed133dbSHong Zhang } 30231393bc97SHong Zhang ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 302410c27e3dSHong Zhang nzk += nlnk; 302510c27e3dSHong Zhang 302610c27e3dSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */ 302710c27e3dSHong Zhang prow = jl[k]; /* 1st pivot row */ 302810c27e3dSHong Zhang 302910c27e3dSHong Zhang while (prow < k) { 303010c27e3dSHong Zhang nextprow = jl[prow]; 303110c27e3dSHong Zhang /* merge prow into k-th row */ 30321393bc97SHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 303310c27e3dSHong Zhang jmax = ui[prow+1]; 303410c27e3dSHong Zhang ncols = jmax-jmin; 30351393bc97SHong Zhang uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 30365a8e39fbSHong Zhang ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 303710c27e3dSHong Zhang nzk += nlnk; 303810c27e3dSHong Zhang 303910c27e3dSHong Zhang /* update il and jl for prow */ 304010c27e3dSHong Zhang if (jmin < jmax) { 304110c27e3dSHong Zhang il[prow] = jmin; 30422ed133dbSHong Zhang j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 304310c27e3dSHong Zhang } 304410c27e3dSHong Zhang prow = nextprow; 304510c27e3dSHong Zhang } 304610c27e3dSHong Zhang 304710c27e3dSHong Zhang /* if free space is not available, make more free space */ 304810c27e3dSHong Zhang if (current_space->local_remaining<nzk) { 30491393bc97SHong Zhang i = am - k + 1; /* num of unfactored rows */ 305010c27e3dSHong Zhang i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3051a1a86e44SBarry Smith ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 305210c27e3dSHong Zhang reallocs++; 305310c27e3dSHong Zhang } 305410c27e3dSHong Zhang 305510c27e3dSHong Zhang /* copy data into free space, then initialize lnk */ 30561393bc97SHong Zhang ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 305710c27e3dSHong Zhang 305810c27e3dSHong Zhang /* add the k-th row into il and jl */ 305910c27e3dSHong Zhang if (nzk-1 > 0) { 30601393bc97SHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 306110c27e3dSHong Zhang jl[k] = jl[i]; jl[i] = k; 306210c27e3dSHong Zhang il[k] = ui[k] + 1; 306310c27e3dSHong Zhang } 30642ed133dbSHong Zhang ui_ptr[k] = current_space->array; 306510c27e3dSHong Zhang current_space->array += nzk; 306610c27e3dSHong Zhang current_space->local_used += nzk; 306710c27e3dSHong Zhang current_space->local_remaining -= nzk; 306810c27e3dSHong Zhang 306910c27e3dSHong Zhang ui[k+1] = ui[k] + nzk; 307010c27e3dSHong Zhang } 307110c27e3dSHong Zhang 30726cf91177SBarry Smith #if defined(PETSC_USE_INFO) 30731393bc97SHong Zhang if (ai[am] != 0) { 30741393bc97SHong Zhang PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3075ae15b995SBarry Smith ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 3076ae15b995SBarry Smith ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 3077ae15b995SBarry Smith ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 307810c27e3dSHong Zhang } else { 3079ae15b995SBarry Smith ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 308010c27e3dSHong Zhang } 308163ba0a88SBarry Smith #endif 308210c27e3dSHong Zhang 308310c27e3dSHong Zhang ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 30849bfd6278SHong Zhang ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 30850e83c824SBarry Smith ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 308610c27e3dSHong Zhang 308710c27e3dSHong Zhang /* destroy list of free space and other temporary array(s) */ 30881393bc97SHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 3089a1a86e44SBarry Smith ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 309010c27e3dSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 309110c27e3dSHong Zhang 309210c27e3dSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */ 309310c27e3dSHong Zhang 3094f284f12aSHong Zhang b = (Mat_SeqSBAIJ*)fact->data; 309510c27e3dSHong Zhang b->singlemalloc = PETSC_FALSE; 3096e6b907acSBarry Smith b->free_a = PETSC_TRUE; 3097e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 30981393bc97SHong Zhang ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 309910c27e3dSHong Zhang b->j = uj; 310010c27e3dSHong Zhang b->i = ui; 310110c27e3dSHong Zhang b->diag = 0; 310210c27e3dSHong Zhang b->ilen = 0; 310310c27e3dSHong Zhang b->imax = 0; 310410c27e3dSHong Zhang b->row = perm; 31059bfd6278SHong Zhang b->col = perm; 31069bfd6278SHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 31079bfd6278SHong Zhang ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 31089bfd6278SHong Zhang b->icol = iperm; 310910c27e3dSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 31101393bc97SHong Zhang ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3111719d5645SBarry Smith ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 31121393bc97SHong Zhang b->maxnz = b->nz = ui[am]; 311310c27e3dSHong Zhang 3114f284f12aSHong Zhang fact->info.factor_mallocs = reallocs; 3115f284f12aSHong Zhang fact->info.fill_ratio_given = fill; 31161393bc97SHong Zhang if (ai[am] != 0) { 3117f284f12aSHong Zhang fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 311810c27e3dSHong Zhang } else { 3119f284f12aSHong Zhang fact->info.fill_ratio_needed = 0.0; 312010c27e3dSHong Zhang } 31210a3c351aSHong Zhang fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3122f76d2b81SHong Zhang PetscFunctionReturn(0); 3123f76d2b81SHong Zhang } 3124599ef60dSHong Zhang 31251d098868SHong Zhang #undef __FUNCT__ 312635233d99SShri Abhyankar #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 312735233d99SShri Abhyankar PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3128813a1f2bSShri Abhyankar { 3129813a1f2bSShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3130813a1f2bSShri Abhyankar PetscErrorCode ierr; 3131813a1f2bSShri Abhyankar PetscInt n = A->rmap->n; 3132813a1f2bSShri Abhyankar const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3133813a1f2bSShri Abhyankar PetscScalar *x,sum; 3134813a1f2bSShri Abhyankar const PetscScalar *b; 3135813a1f2bSShri Abhyankar const MatScalar *aa = a->a,*v; 3136813a1f2bSShri Abhyankar PetscInt i,nz; 3137813a1f2bSShri Abhyankar 3138813a1f2bSShri Abhyankar PetscFunctionBegin; 3139813a1f2bSShri Abhyankar if (!n) PetscFunctionReturn(0); 3140813a1f2bSShri Abhyankar 31413649974fSBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3142813a1f2bSShri Abhyankar ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3143813a1f2bSShri Abhyankar 3144813a1f2bSShri Abhyankar /* forward solve the lower triangular */ 3145813a1f2bSShri Abhyankar x[0] = b[0]; 3146813a1f2bSShri Abhyankar v = aa; 3147813a1f2bSShri Abhyankar vi = aj; 3148813a1f2bSShri Abhyankar for (i=1; i<n; i++) { 3149813a1f2bSShri Abhyankar nz = ai[i+1] - ai[i]; 3150813a1f2bSShri Abhyankar sum = b[i]; 3151813a1f2bSShri Abhyankar PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3152813a1f2bSShri Abhyankar v += nz; 3153813a1f2bSShri Abhyankar vi += nz; 3154813a1f2bSShri Abhyankar x[i] = sum; 3155813a1f2bSShri Abhyankar } 3156813a1f2bSShri Abhyankar 3157813a1f2bSShri Abhyankar /* backward solve the upper triangular */ 315862fbd6afSShri Abhyankar for (i=n-1; i>=0; i--) { 31593c0119dfSShri Abhyankar v = aa + adiag[i+1] + 1; 31603c0119dfSShri Abhyankar vi = aj + adiag[i+1] + 1; 3161813a1f2bSShri Abhyankar nz = adiag[i] - adiag[i+1]-1; 3162813a1f2bSShri Abhyankar sum = x[i]; 3163813a1f2bSShri Abhyankar PetscSparseDenseMinusDot(sum,x,v,vi,nz); 31643c0119dfSShri Abhyankar x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3165813a1f2bSShri Abhyankar } 3166813a1f2bSShri Abhyankar 3167813a1f2bSShri Abhyankar ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 31683649974fSBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3169813a1f2bSShri Abhyankar ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3170813a1f2bSShri Abhyankar PetscFunctionReturn(0); 3171813a1f2bSShri Abhyankar } 3172813a1f2bSShri Abhyankar 31731d098868SHong Zhang #undef __FUNCT__ 317435233d99SShri Abhyankar #define __FUNCT__ "MatSolve_SeqAIJ" 317535233d99SShri Abhyankar PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3176f268cba8SShri Abhyankar { 3177f268cba8SShri Abhyankar Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3178f268cba8SShri Abhyankar IS iscol = a->col,isrow = a->row; 3179f268cba8SShri Abhyankar PetscErrorCode ierr; 3180f268cba8SShri Abhyankar PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3181f268cba8SShri Abhyankar const PetscInt *rout,*cout,*r,*c; 3182f268cba8SShri Abhyankar PetscScalar *x,*tmp,sum; 3183f268cba8SShri Abhyankar const PetscScalar *b; 3184f268cba8SShri Abhyankar const MatScalar *aa = a->a,*v; 3185f268cba8SShri Abhyankar 3186f268cba8SShri Abhyankar PetscFunctionBegin; 3187f268cba8SShri Abhyankar if (!n) PetscFunctionReturn(0); 3188f268cba8SShri Abhyankar 31893649974fSBarry Smith ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3190f268cba8SShri Abhyankar ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3191f268cba8SShri Abhyankar tmp = a->solve_work; 3192f268cba8SShri Abhyankar 3193f268cba8SShri Abhyankar ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3194f268cba8SShri Abhyankar ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3195f268cba8SShri Abhyankar 3196f268cba8SShri Abhyankar /* forward solve the lower triangular */ 3197f268cba8SShri Abhyankar tmp[0] = b[r[0]]; 3198f268cba8SShri Abhyankar v = aa; 3199f268cba8SShri Abhyankar vi = aj; 3200f268cba8SShri Abhyankar for (i=1; i<n; i++) { 3201f268cba8SShri Abhyankar nz = ai[i+1] - ai[i]; 3202f268cba8SShri Abhyankar sum = b[r[i]]; 3203f268cba8SShri Abhyankar PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3204f268cba8SShri Abhyankar tmp[i] = sum; 3205f268cba8SShri Abhyankar v += nz; vi += nz; 3206f268cba8SShri Abhyankar } 3207f268cba8SShri Abhyankar 3208f268cba8SShri Abhyankar /* backward solve the upper triangular */ 3209f268cba8SShri Abhyankar for (i=n-1; i>=0; i--) { 32103c0119dfSShri Abhyankar v = aa + adiag[i+1]+1; 32113c0119dfSShri Abhyankar vi = aj + adiag[i+1]+1; 3212f268cba8SShri Abhyankar nz = adiag[i]-adiag[i+1]-1; 3213f268cba8SShri Abhyankar sum = tmp[i]; 3214f268cba8SShri Abhyankar PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3215f268cba8SShri Abhyankar x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3216f268cba8SShri Abhyankar } 3217f268cba8SShri Abhyankar 3218f268cba8SShri Abhyankar ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3219f268cba8SShri Abhyankar ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 32203649974fSBarry Smith ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3221f268cba8SShri Abhyankar ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3222f268cba8SShri Abhyankar ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3223f268cba8SShri Abhyankar PetscFunctionReturn(0); 3224f268cba8SShri Abhyankar } 3225f268cba8SShri Abhyankar 3226f268cba8SShri Abhyankar #undef __FUNCT__ 32271d098868SHong Zhang #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3228fe97e370SBarry Smith /* 3229fe97e370SBarry Smith This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3230fe97e370SBarry Smith */ 32311d098868SHong Zhang PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 32321d098868SHong Zhang { 32331d098868SHong Zhang Mat B = *fact; 3234599ef60dSHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3235599ef60dSHong Zhang IS isicol; 3236599ef60dSHong Zhang PetscErrorCode ierr; 32371d098868SHong Zhang const PetscInt *r,*ic; 32381d098868SHong Zhang PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3239dc3a2fd3SHong Zhang PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3240f61a948aSLisandro Dalcin PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 32411d098868SHong Zhang PetscInt nlnk,*lnk; 32421d098868SHong Zhang PetscBT lnkbt; 3243ace3abfcSBarry Smith PetscBool row_identity,icol_identity; 32448aee2decSHong Zhang MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 32451d098868SHong Zhang const PetscInt *ics; 32461d098868SHong Zhang PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3247a2ea699eSBarry Smith PetscReal dt=info->dt,shift=info->shiftamount; 3248f61a948aSLisandro Dalcin PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3249ace3abfcSBarry Smith PetscBool missing; 3250599ef60dSHong Zhang 3251599ef60dSHong Zhang PetscFunctionBegin; 3252f61a948aSLisandro Dalcin 3253f61a948aSLisandro Dalcin if (dt == PETSC_DEFAULT) dt = 0.005; 3254f61a948aSLisandro Dalcin if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3255f61a948aSLisandro Dalcin 32561d098868SHong Zhang /* ------- symbolic factorization, can be reused ---------*/ 32571d098868SHong Zhang ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3258e32f2f54SBarry Smith if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 32591d098868SHong Zhang adiag=a->diag; 3260599ef60dSHong Zhang 3261599ef60dSHong Zhang ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3262599ef60dSHong Zhang 3263599ef60dSHong Zhang /* bdiag is location of diagonal in factor */ 32641bfa06eaSJed Brown ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 32651bfa06eaSJed Brown ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3266599ef60dSHong Zhang 32671d098868SHong Zhang /* allocate row pointers bi */ 32688fc3a347SHong Zhang ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 32691d098868SHong Zhang 32701d098868SHong Zhang /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3271393d3a68SHong Zhang if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 32721d098868SHong Zhang nnz_max = ai[n]+2*n*dtcount+2; 32738fc3a347SHong Zhang 32748fc3a347SHong Zhang ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 32758fc3a347SHong Zhang ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3276599ef60dSHong Zhang 3277599ef60dSHong Zhang /* put together the new matrix */ 3278599ef60dSHong Zhang ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 3279599ef60dSHong Zhang ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 3280f284f12aSHong Zhang b = (Mat_SeqAIJ*)B->data; 3281599ef60dSHong Zhang b->free_a = PETSC_TRUE; 3282599ef60dSHong Zhang b->free_ij = PETSC_TRUE; 3283599ef60dSHong Zhang b->singlemalloc = PETSC_FALSE; 3284599ef60dSHong Zhang b->a = ba; 3285599ef60dSHong Zhang b->j = bj; 3286599ef60dSHong Zhang b->i = bi; 3287599ef60dSHong Zhang b->diag = bdiag; 3288599ef60dSHong Zhang b->ilen = 0; 3289599ef60dSHong Zhang b->imax = 0; 3290599ef60dSHong Zhang b->row = isrow; 3291599ef60dSHong Zhang b->col = iscol; 3292599ef60dSHong Zhang ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3293599ef60dSHong Zhang ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3294599ef60dSHong Zhang b->icol = isicol; 3295599ef60dSHong Zhang ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3296599ef60dSHong Zhang 32971d098868SHong Zhang ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 32981d098868SHong Zhang b->maxnz = nnz_max; 3299599ef60dSHong Zhang 3300d5f3da31SBarry Smith B->factortype = MAT_FACTOR_ILUDT; 3301f284f12aSHong Zhang B->info.factor_mallocs = 0; 3302f284f12aSHong Zhang B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 33031d098868SHong Zhang CHKMEMQ; 33041d098868SHong Zhang /* ------- end of symbolic factorization ---------*/ 3305599ef60dSHong Zhang 3306599ef60dSHong Zhang ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3307599ef60dSHong Zhang ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3308599ef60dSHong Zhang ics = ic; 3309599ef60dSHong Zhang 3310599ef60dSHong Zhang /* linked list for storing column indices of the active row */ 3311599ef60dSHong Zhang nlnk = n + 1; 3312599ef60dSHong Zhang ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 33131d098868SHong Zhang 33141d098868SHong Zhang /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 33150e83c824SBarry Smith ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr); 33161d098868SHong Zhang /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 33170e83c824SBarry Smith ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr); 33180e83c824SBarry Smith ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3319599ef60dSHong Zhang 3320599ef60dSHong Zhang bi[0] = 0; 33218fc3a347SHong Zhang bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3322dc3a2fd3SHong Zhang bdiag_rev[n] = bdiag[0]; 33238fc3a347SHong Zhang bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3324599ef60dSHong Zhang for (i=0; i<n; i++) { 3325599ef60dSHong Zhang /* copy initial fill into linked list */ 33268369b28dSHong Zhang nzi = ai[r[i]+1] - ai[r[i]]; 3327e32f2f54SBarry Smith if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 33288369b28dSHong Zhang nzi_al = adiag[r[i]] - ai[r[i]]; 33298369b28dSHong Zhang nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3330599ef60dSHong Zhang ajtmp = aj + ai[r[i]]; 33318369b28dSHong Zhang ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3332599ef60dSHong Zhang 3333599ef60dSHong Zhang /* load in initial (unfactored row) */ 33341d098868SHong Zhang aatmp = a->a + ai[r[i]]; 33358369b28dSHong Zhang for (j=0; j<nzi; j++) { 33361d098868SHong Zhang rtmp[ics[*ajtmp++]] = *aatmp++; 3337599ef60dSHong Zhang } 3338599ef60dSHong Zhang 3339599ef60dSHong Zhang /* add pivot rows into linked list */ 3340599ef60dSHong Zhang row = lnk[n]; 3341599ef60dSHong Zhang while (row < i ) { 33421d098868SHong Zhang nzi_bl = bi[row+1] - bi[row] + 1; 33431d098868SHong Zhang bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 33441d098868SHong Zhang ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3345599ef60dSHong Zhang nzi += nlnk; 3346599ef60dSHong Zhang row = lnk[row]; 3347599ef60dSHong Zhang } 3348599ef60dSHong Zhang 33498369b28dSHong Zhang /* copy data from lnk into jtmp, then initialize lnk */ 33508369b28dSHong Zhang ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3351599ef60dSHong Zhang 3352599ef60dSHong Zhang /* numerical factorization */ 33538369b28dSHong Zhang bjtmp = jtmp; 3354599ef60dSHong Zhang row = *bjtmp++; /* 1st pivot row */ 3355599ef60dSHong Zhang while ( row < i ) { 3356599ef60dSHong Zhang pc = rtmp + row; 33573c2a7987SHong Zhang pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 33583c2a7987SHong Zhang multiplier = (*pc) * (*pv); 3359599ef60dSHong Zhang *pc = multiplier; 33603c2a7987SHong Zhang if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 33611d098868SHong Zhang pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 33621d098868SHong Zhang pv = ba + bdiag[row+1] + 1; 33631d098868SHong Zhang /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 33641d098868SHong Zhang nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 33651d098868SHong Zhang for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 336628b1a77fSLisandro Dalcin ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3367599ef60dSHong Zhang } 3368599ef60dSHong Zhang row = *bjtmp++; 3369599ef60dSHong Zhang } 3370599ef60dSHong Zhang 33718369b28dSHong Zhang /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 33728aee2decSHong Zhang diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 33738369b28dSHong Zhang nzi_bl = 0; j = 0; 33748369b28dSHong Zhang while (jtmp[j] < i) { /* Note: jtmp is sorted */ 33758aee2decSHong Zhang vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 33768369b28dSHong Zhang nzi_bl++; j++; 33778369b28dSHong Zhang } 33788369b28dSHong Zhang nzi_bu = nzi - nzi_bl -1; 33798369b28dSHong Zhang while (j < nzi) { 33808aee2decSHong Zhang vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 33818369b28dSHong Zhang j++; 33828369b28dSHong Zhang } 33838369b28dSHong Zhang 3384599ef60dSHong Zhang bjtmp = bj + bi[i]; 3385599ef60dSHong Zhang batmp = ba + bi[i]; 33868369b28dSHong Zhang /* apply level dropping rule to L part */ 33878369b28dSHong Zhang ncut = nzi_al + dtcount; 33888369b28dSHong Zhang if (ncut < nzi_bl) { 33898369b28dSHong Zhang ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 33908369b28dSHong Zhang ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3391599ef60dSHong Zhang } else { 33928369b28dSHong Zhang ncut = nzi_bl; 33938369b28dSHong Zhang } 33948369b28dSHong Zhang for (j=0; j<ncut; j++) { 33958369b28dSHong Zhang bjtmp[j] = jtmp[j]; 33968369b28dSHong Zhang batmp[j] = vtmp[j]; 33976da515a1SHong Zhang /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 33988369b28dSHong Zhang } 33991d098868SHong Zhang bi[i+1] = bi[i] + ncut; 34008369b28dSHong Zhang nzi = ncut + 1; 34018369b28dSHong Zhang 34028369b28dSHong Zhang /* apply level dropping rule to U part */ 34038369b28dSHong Zhang ncut = nzi_au + dtcount; 34048369b28dSHong Zhang if (ncut < nzi_bu) { 34058369b28dSHong Zhang ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 34068369b28dSHong Zhang ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 34078369b28dSHong Zhang } else { 34088369b28dSHong Zhang ncut = nzi_bu; 34098369b28dSHong Zhang } 34108369b28dSHong Zhang nzi += ncut; 34111d098868SHong Zhang 34121d098868SHong Zhang /* mark bdiagonal */ 34131d098868SHong Zhang bdiag[i+1] = bdiag[i] - (ncut + 1); 3414dc3a2fd3SHong Zhang bdiag_rev[n-i-1] = bdiag[i+1]; 34158fc3a347SHong Zhang bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 34161d098868SHong Zhang bjtmp = bj + bdiag[i]; 34171d098868SHong Zhang batmp = ba + bdiag[i]; 34181d098868SHong Zhang *bjtmp = i; 34198aee2decSHong Zhang *batmp = diag_tmp; /* rtmp[i]; */ 3420c9c72f8fSHong Zhang if (*batmp == 0.0) { 3421c9c72f8fSHong Zhang *batmp = dt+shift; 3422bd1bc851SBarry Smith /* printf(" row %d add shift %g\n",i,shift); */ 3423c9c72f8fSHong Zhang } 3424b78a477dSHong Zhang *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 34256da515a1SHong Zhang /* printf(" (%d,%g),",*bjtmp,*batmp); */ 34261d098868SHong Zhang 34271d098868SHong Zhang bjtmp = bj + bdiag[i+1]+1; 34281d098868SHong Zhang batmp = ba + bdiag[i+1]+1; 34298369b28dSHong Zhang for (k=0; k<ncut; k++) { 34301d098868SHong Zhang bjtmp[k] = jtmp[nzi_bl+1+k]; 34311d098868SHong Zhang batmp[k] = vtmp[nzi_bl+1+k]; 34326da515a1SHong Zhang /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3433599ef60dSHong Zhang } 34346da515a1SHong Zhang /* printf("\n"); */ 3435599ef60dSHong Zhang 3436599ef60dSHong Zhang im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3437599ef60dSHong Zhang /* 34381d098868SHong Zhang printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 34398369b28dSHong Zhang printf(" ----------------------------\n"); 3440599ef60dSHong Zhang */ 34418369b28dSHong Zhang } /* for (i=0; i<n; i++) */ 34421d098868SHong Zhang /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3443e32f2f54SBarry Smith if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3444599ef60dSHong Zhang 3445599ef60dSHong Zhang ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3446599ef60dSHong Zhang ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3447599ef60dSHong Zhang 3448599ef60dSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 34490e83c824SBarry Smith ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 34500e83c824SBarry Smith ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 34511bfa06eaSJed Brown ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3452599ef60dSHong Zhang 3453599ef60dSHong Zhang ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 34541d098868SHong Zhang b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3455599ef60dSHong Zhang 3456599ef60dSHong Zhang ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3457599ef60dSHong Zhang ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3458599ef60dSHong Zhang if (row_identity && icol_identity) { 345935233d99SShri Abhyankar B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3460599ef60dSHong Zhang } else { 346135233d99SShri Abhyankar B->ops->solve = MatSolve_SeqAIJ; 3462599ef60dSHong Zhang } 3463599ef60dSHong Zhang 34641d098868SHong Zhang B->ops->solveadd = 0; 34651d098868SHong Zhang B->ops->solvetranspose = 0; 34661d098868SHong Zhang B->ops->solvetransposeadd = 0; 34671d098868SHong Zhang B->ops->matsolve = 0; 3468599ef60dSHong Zhang B->assembled = PETSC_TRUE; 3469599ef60dSHong Zhang B->preallocated = PETSC_TRUE; 3470599ef60dSHong Zhang PetscFunctionReturn(0); 3471599ef60dSHong Zhang } 3472599ef60dSHong Zhang 34733c2a7987SHong Zhang /* a wraper of MatILUDTFactor_SeqAIJ() */ 34743c2a7987SHong Zhang #undef __FUNCT__ 34753c2a7987SHong Zhang #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3476fe97e370SBarry Smith /* 3477fe97e370SBarry Smith This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3478fe97e370SBarry Smith */ 3479fe97e370SBarry Smith 34807087cfbeSBarry Smith PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 34813c2a7987SHong Zhang { 34823c2a7987SHong Zhang PetscErrorCode ierr; 34833c2a7987SHong Zhang 34843c2a7987SHong Zhang PetscFunctionBegin; 34853c2a7987SHong Zhang ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 34863c2a7987SHong Zhang PetscFunctionReturn(0); 34873c2a7987SHong Zhang } 34883c2a7987SHong Zhang 34893c2a7987SHong Zhang /* 34903c2a7987SHong Zhang same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 34913c2a7987SHong Zhang - intend to replace existing MatLUFactorNumeric_SeqAIJ() 34923c2a7987SHong Zhang */ 34933c2a7987SHong Zhang #undef __FUNCT__ 34943c2a7987SHong Zhang #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3495fe97e370SBarry Smith /* 3496fe97e370SBarry Smith This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3497fe97e370SBarry Smith */ 3498fe97e370SBarry Smith 34997087cfbeSBarry Smith PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 35003c2a7987SHong Zhang { 35013c2a7987SHong Zhang Mat C=fact; 35023c2a7987SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 35033c2a7987SHong Zhang IS isrow = b->row,isicol = b->icol; 35043c2a7987SHong Zhang PetscErrorCode ierr; 35053c2a7987SHong Zhang const PetscInt *r,*ic,*ics; 3506b78a477dSHong Zhang PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3507b78a477dSHong Zhang PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 35083c2a7987SHong Zhang MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3509182b8fbaSHong Zhang PetscReal dt=info->dt,shift=info->shiftamount; 3510ace3abfcSBarry Smith PetscBool row_identity, col_identity; 35113c2a7987SHong Zhang 35123c2a7987SHong Zhang PetscFunctionBegin; 35133c2a7987SHong Zhang ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 35143c2a7987SHong Zhang ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3515b78a477dSHong Zhang ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 35163c2a7987SHong Zhang ics = ic; 35173c2a7987SHong Zhang 35183c2a7987SHong Zhang for (i=0; i<n; i++) { 3519b78a477dSHong Zhang /* initialize rtmp array */ 3520b78a477dSHong Zhang nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 35213c2a7987SHong Zhang bjtmp = bj + bi[i]; 3522b78a477dSHong Zhang for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3523b78a477dSHong Zhang rtmp[i] = 0.0; 3524b78a477dSHong Zhang nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3525b78a477dSHong Zhang bjtmp = bj + bdiag[i+1] + 1; 3526b78a477dSHong Zhang for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 35273c2a7987SHong Zhang 3528b78a477dSHong Zhang /* load in initial unfactored row of A */ 35296da515a1SHong Zhang /* printf("row %d\n",i); */ 35303c2a7987SHong Zhang nz = ai[r[i]+1] - ai[r[i]]; 35313c2a7987SHong Zhang ajtmp = aj + ai[r[i]]; 35323c2a7987SHong Zhang v = aa + ai[r[i]]; 35333c2a7987SHong Zhang for (j=0; j<nz; j++) { 3534b78a477dSHong Zhang rtmp[ics[*ajtmp++]] = v[j]; 35356da515a1SHong Zhang /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 35363c2a7987SHong Zhang } 35376da515a1SHong Zhang /* printf("\n"); */ 35383c2a7987SHong Zhang 3539b78a477dSHong Zhang /* numerical factorization */ 3540b78a477dSHong Zhang bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3541b78a477dSHong Zhang nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3542b78a477dSHong Zhang k = 0; 3543b78a477dSHong Zhang while (k < nzl) { 35443c2a7987SHong Zhang row = *bjtmp++; 35456da515a1SHong Zhang /* printf(" prow %d\n",row); */ 35463c2a7987SHong Zhang pc = rtmp + row; 3547b78a477dSHong Zhang pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3548b78a477dSHong Zhang multiplier = (*pc) * (*pv); 35493c2a7987SHong Zhang *pc = multiplier; 3550b78a477dSHong Zhang if (PetscAbsScalar(multiplier) > dt) { 3551b78a477dSHong Zhang pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3552b78a477dSHong Zhang pv = b->a + bdiag[row+1] + 1; 3553b78a477dSHong Zhang nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3554b78a477dSHong Zhang for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 355528b1a77fSLisandro Dalcin ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 35563c2a7987SHong Zhang } 3557b78a477dSHong Zhang k++; 35583c2a7987SHong Zhang } 35593c2a7987SHong Zhang 3560b78a477dSHong Zhang /* finished row so stick it into b->a */ 3561b78a477dSHong Zhang /* L-part */ 3562b78a477dSHong Zhang pv = b->a + bi[i] ; 3563b78a477dSHong Zhang pj = bj + bi[i] ; 3564b78a477dSHong Zhang nzl = bi[i+1] - bi[i]; 3565b78a477dSHong Zhang for (j=0; j<nzl; j++) { 3566b78a477dSHong Zhang pv[j] = rtmp[pj[j]]; 35676da515a1SHong Zhang /* printf(" (%d,%g),",pj[j],pv[j]); */ 35683c2a7987SHong Zhang } 3569b78a477dSHong Zhang 3570b78a477dSHong Zhang /* diagonal: invert diagonal entries for simplier triangular solves */ 3571b78a477dSHong Zhang if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3572b78a477dSHong Zhang b->a[bdiag[i]] = 1.0/rtmp[i]; 35736da515a1SHong Zhang /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3574b78a477dSHong Zhang 3575b78a477dSHong Zhang /* U-part */ 3576b78a477dSHong Zhang pv = b->a + bdiag[i+1] + 1; 3577b78a477dSHong Zhang pj = bj + bdiag[i+1] + 1; 3578b78a477dSHong Zhang nzu = bdiag[i] - bdiag[i+1] - 1; 3579b78a477dSHong Zhang for (j=0; j<nzu; j++) { 3580b78a477dSHong Zhang pv[j] = rtmp[pj[j]]; 35816da515a1SHong Zhang /* printf(" (%d,%g),",pj[j],pv[j]); */ 3582b78a477dSHong Zhang } 35836da515a1SHong Zhang /* printf("\n"); */ 3584b78a477dSHong Zhang } 3585b78a477dSHong Zhang 35863c2a7987SHong Zhang ierr = PetscFree(rtmp);CHKERRQ(ierr); 35873c2a7987SHong Zhang ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 35883c2a7987SHong Zhang ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3589b78a477dSHong Zhang 35903c2a7987SHong Zhang ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 35913c2a7987SHong Zhang ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 35923c2a7987SHong Zhang if (row_identity && col_identity) { 359335233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 35943c2a7987SHong Zhang } else { 359535233d99SShri Abhyankar C->ops->solve = MatSolve_SeqAIJ; 35963c2a7987SHong Zhang } 3597b78a477dSHong Zhang C->ops->solveadd = 0; 3598b78a477dSHong Zhang C->ops->solvetranspose = 0; 3599b78a477dSHong Zhang C->ops->solvetransposeadd = 0; 3600b78a477dSHong Zhang C->ops->matsolve = 0; 36013c2a7987SHong Zhang C->assembled = PETSC_TRUE; 36023c2a7987SHong Zhang C->preallocated = PETSC_TRUE; 36033c2a7987SHong Zhang ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 36043c2a7987SHong Zhang PetscFunctionReturn(0); 36053c2a7987SHong Zhang } 3606