1be1d678aSKris Buschelman #define PETSCMAT_DLL 2be1d678aSKris Buschelman 383287d42SBarry Smith /* 483287d42SBarry Smith Factorization code for BAIJ format. 583287d42SBarry Smith */ 683287d42SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 783287d42SBarry Smith #include "src/inline/ilu.h" 883287d42SBarry Smith 9db4efbfdSBarry Smith #undef __FUNCT__ 10db4efbfdSBarry Smith #define __FUNCT__ "MatSeqBAIJSetNumericFactorization" 11db4efbfdSBarry Smith /* 12db4efbfdSBarry Smith This is used to set the numeric factorization for both LU and ILU symbolic factorization 13db4efbfdSBarry Smith */ 14db4efbfdSBarry Smith PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat inA,PetscTruth natural) 15db4efbfdSBarry Smith { 16db4efbfdSBarry Smith PetscFunctionBegin; 17db4efbfdSBarry Smith if (natural) { 18db4efbfdSBarry Smith switch (inA->rmap->bs) { 19db4efbfdSBarry Smith case 1: 20db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 21db4efbfdSBarry Smith break; 22db4efbfdSBarry Smith case 2: 23db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 24db4efbfdSBarry Smith break; 25db4efbfdSBarry Smith case 3: 26db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 27db4efbfdSBarry Smith break; 28db4efbfdSBarry Smith case 4: 29db4efbfdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 30db4efbfdSBarry Smith { 31db4efbfdSBarry Smith PetscTruth sse_enabled_local; 32db4efbfdSBarry Smith PetscErrorCode ierr; 33db4efbfdSBarry Smith ierr = PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);CHKERRQ(ierr); 34db4efbfdSBarry Smith if (sse_enabled_local) { 35db4efbfdSBarry Smith # if defined(PETSC_HAVE_SSE) 36db4efbfdSBarry Smith int i,*AJ=a->j,nz=a->nz,n=a->mbs; 37db4efbfdSBarry Smith if (n==(unsigned short)n) { 38db4efbfdSBarry Smith unsigned short *aj=(unsigned short *)AJ; 39db4efbfdSBarry Smith for (i=0;i<nz;i++) { 40db4efbfdSBarry Smith aj[i] = (unsigned short)AJ[i]; 41db4efbfdSBarry Smith } 42db4efbfdSBarry Smith inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj; 43db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj; 44db4efbfdSBarry Smith ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");CHKERRQ(ierr); 45db4efbfdSBarry Smith } else { 46db4efbfdSBarry Smith /* Scale the column indices for easier indexing in MatSolve. */ 47db4efbfdSBarry Smith /* for (i=0;i<nz;i++) { */ 48db4efbfdSBarry Smith /* AJ[i] = AJ[i]*4; */ 49db4efbfdSBarry Smith /* } */ 50db4efbfdSBarry Smith inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE; 51db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 52db4efbfdSBarry Smith ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");CHKERRQ(ierr); 53db4efbfdSBarry Smith } 54db4efbfdSBarry Smith # else 55db4efbfdSBarry Smith /* This should never be reached. If so, problem in PetscSSEIsEnabled. */ 56db4efbfdSBarry Smith SETERRQ(PETSC_ERR_SUP,"SSE Hardware unavailable"); 57db4efbfdSBarry Smith # endif 58db4efbfdSBarry Smith } else { 59db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 60db4efbfdSBarry Smith } 61db4efbfdSBarry Smith } 62db4efbfdSBarry Smith #else 63db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 64db4efbfdSBarry Smith #endif 65db4efbfdSBarry Smith break; 66db4efbfdSBarry Smith case 5: 67db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 68db4efbfdSBarry Smith break; 69db4efbfdSBarry Smith case 6: 70db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 71db4efbfdSBarry Smith break; 72db4efbfdSBarry Smith case 7: 73db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 74db4efbfdSBarry Smith break; 75db4efbfdSBarry Smith default: 76db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 77db4efbfdSBarry Smith break; 78db4efbfdSBarry Smith } 79db4efbfdSBarry Smith } else { 80db4efbfdSBarry Smith switch (inA->rmap->bs) { 81db4efbfdSBarry Smith case 1: 82db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 83db4efbfdSBarry Smith break; 84db4efbfdSBarry Smith case 2: 85db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 86db4efbfdSBarry Smith break; 87db4efbfdSBarry Smith case 3: 88db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 89db4efbfdSBarry Smith break; 90db4efbfdSBarry Smith case 4: 91db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 92db4efbfdSBarry Smith break; 93db4efbfdSBarry Smith case 5: 94db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 95db4efbfdSBarry Smith break; 96db4efbfdSBarry Smith case 6: 97db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 98db4efbfdSBarry Smith break; 99db4efbfdSBarry Smith case 7: 100db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 101db4efbfdSBarry Smith break; 102db4efbfdSBarry Smith default: 103db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 104db4efbfdSBarry Smith break; 105db4efbfdSBarry Smith } 106db4efbfdSBarry Smith } 107db4efbfdSBarry Smith PetscFunctionReturn(0); 108db4efbfdSBarry Smith } 109db4efbfdSBarry Smith 11083287d42SBarry Smith /* 11183287d42SBarry Smith The symbolic factorization code is identical to that for AIJ format, 11283287d42SBarry Smith except for very small changes since this is now a SeqBAIJ datastructure. 11383287d42SBarry Smith NOT good code reuse. 11483287d42SBarry Smith */ 1154a2ae208SSatish Balay #undef __FUNCT__ 1164a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ" 117*719d5645SBarry Smith PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,MatFactorInfo *info) 11883287d42SBarry Smith { 11983287d42SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 12083287d42SBarry Smith IS isicol; 1216849ba73SBarry Smith PetscErrorCode ierr; 122690b6cddSBarry Smith PetscInt *r,*ic,i,n = a->mbs,*ai = a->i,*aj = a->j; 123d0f46423SBarry Smith PetscInt *ainew,*ajnew,jmax,*fill,*ajtmp,nz,bs = A->rmap->bs,bs2=a->bs2; 124418422e8SSatish Balay PetscInt *idnew,idx,row,m,fm,nnz,nzi,reallocs = 0,nzbd,*im; 12583287d42SBarry Smith PetscReal f = 1.0; 126db4efbfdSBarry Smith PetscTruth row_identity,col_identity; 12783287d42SBarry Smith 12883287d42SBarry Smith PetscFunctionBegin; 129d0f46423SBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 13083287d42SBarry Smith ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 13183287d42SBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 13283287d42SBarry Smith ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 13383287d42SBarry Smith 134d3d32019SBarry Smith f = info->fill; 13583287d42SBarry Smith /* get new row pointers */ 136690b6cddSBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscInt),&ainew);CHKERRQ(ierr); 13783287d42SBarry Smith ainew[0] = 0; 13883287d42SBarry Smith /* don't know how many column pointers are needed so estimate */ 139690b6cddSBarry Smith jmax = (PetscInt)(f*ai[n] + 1); 140690b6cddSBarry Smith ierr = PetscMalloc((jmax)*sizeof(PetscInt),&ajnew);CHKERRQ(ierr); 14183287d42SBarry Smith /* fill is a linked list of nonzeros in active row */ 142690b6cddSBarry Smith ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&fill);CHKERRQ(ierr); 14383287d42SBarry Smith im = fill + n + 1; 14483287d42SBarry Smith /* idnew is location of diagonal in factor */ 145690b6cddSBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscInt),&idnew);CHKERRQ(ierr); 14683287d42SBarry Smith idnew[0] = 0; 14783287d42SBarry Smith 14883287d42SBarry Smith for (i=0; i<n; i++) { 14983287d42SBarry Smith /* first copy previous fill into linked list */ 15083287d42SBarry Smith nnz = nz = ai[r[i]+1] - ai[r[i]]; 15183287d42SBarry Smith if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 15283287d42SBarry Smith ajtmp = aj + ai[r[i]]; 15383287d42SBarry Smith fill[n] = n; 15483287d42SBarry Smith while (nz--) { 15583287d42SBarry Smith fm = n; 15683287d42SBarry Smith idx = ic[*ajtmp++]; 15783287d42SBarry Smith do { 15883287d42SBarry Smith m = fm; 15983287d42SBarry Smith fm = fill[m]; 16083287d42SBarry Smith } while (fm < idx); 16183287d42SBarry Smith fill[m] = idx; 16283287d42SBarry Smith fill[idx] = fm; 16383287d42SBarry Smith } 16483287d42SBarry Smith row = fill[n]; 16583287d42SBarry Smith while (row < i) { 16683287d42SBarry Smith ajtmp = ajnew + idnew[row] + 1; 16783287d42SBarry Smith nzbd = 1 + idnew[row] - ainew[row]; 16883287d42SBarry Smith nz = im[row] - nzbd; 16983287d42SBarry Smith fm = row; 17083287d42SBarry Smith while (nz-- > 0) { 17183287d42SBarry Smith idx = *ajtmp++; 17283287d42SBarry Smith nzbd++; 17383287d42SBarry Smith if (idx == i) im[row] = nzbd; 17483287d42SBarry Smith do { 17583287d42SBarry Smith m = fm; 17683287d42SBarry Smith fm = fill[m]; 17783287d42SBarry Smith } while (fm < idx); 17883287d42SBarry Smith if (fm != idx) { 17983287d42SBarry Smith fill[m] = idx; 18083287d42SBarry Smith fill[idx] = fm; 18183287d42SBarry Smith fm = idx; 18283287d42SBarry Smith nnz++; 18383287d42SBarry Smith } 18483287d42SBarry Smith } 18583287d42SBarry Smith row = fill[row]; 18683287d42SBarry Smith } 18783287d42SBarry Smith /* copy new filled row into permanent storage */ 18883287d42SBarry Smith ainew[i+1] = ainew[i] + nnz; 18983287d42SBarry Smith if (ainew[i+1] > jmax) { 19083287d42SBarry Smith 19183287d42SBarry Smith /* estimate how much additional space we will need */ 19283287d42SBarry Smith /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 19383287d42SBarry Smith /* just double the memory each time */ 194690b6cddSBarry Smith PetscInt maxadd = jmax; 19583287d42SBarry Smith /* maxadd = (int)((f*(ai[n]+1)*(n-i+5))/n); */ 19683287d42SBarry Smith if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 19783287d42SBarry Smith jmax += maxadd; 19883287d42SBarry Smith 19983287d42SBarry Smith /* allocate a longer ajnew */ 200690b6cddSBarry Smith ierr = PetscMalloc(jmax*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 201690b6cddSBarry Smith ierr = PetscMemcpy(ajtmp,ajnew,ainew[i]*sizeof(PetscInt));CHKERRQ(ierr); 20283287d42SBarry Smith ierr = PetscFree(ajnew);CHKERRQ(ierr); 20383287d42SBarry Smith ajnew = ajtmp; 204418422e8SSatish Balay reallocs++; /* count how many times we realloc */ 20583287d42SBarry Smith } 20683287d42SBarry Smith ajtmp = ajnew + ainew[i]; 20783287d42SBarry Smith fm = fill[n]; 20883287d42SBarry Smith nzi = 0; 20983287d42SBarry Smith im[i] = nnz; 21083287d42SBarry Smith while (nnz--) { 21183287d42SBarry Smith if (fm < i) nzi++; 21283287d42SBarry Smith *ajtmp++ = fm; 21383287d42SBarry Smith fm = fill[fm]; 21483287d42SBarry Smith } 21583287d42SBarry Smith idnew[i] = ainew[i] + nzi; 21683287d42SBarry Smith } 21783287d42SBarry Smith 2186cf91177SBarry Smith #if defined(PETSC_USE_INFO) 21983287d42SBarry Smith if (ai[n] != 0) { 22083287d42SBarry Smith PetscReal af = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 221ae15b995SBarry Smith ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 222ae15b995SBarry Smith ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 223ae15b995SBarry Smith ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 224ae15b995SBarry Smith ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 22583287d42SBarry Smith } else { 226ae15b995SBarry Smith ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 22783287d42SBarry Smith } 22863ba0a88SBarry Smith #endif 22983287d42SBarry Smith 23083287d42SBarry Smith ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 23183287d42SBarry Smith ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 23283287d42SBarry Smith 23383287d42SBarry Smith ierr = PetscFree(fill);CHKERRQ(ierr); 23483287d42SBarry Smith 23583287d42SBarry Smith /* put together the new matrix */ 236*719d5645SBarry Smith ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 237*719d5645SBarry Smith ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 238*719d5645SBarry Smith b = (Mat_SeqBAIJ*)(B)->data; 23983287d42SBarry Smith b->singlemalloc = PETSC_FALSE; 240e6b907acSBarry Smith b->free_a = PETSC_TRUE; 241e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 24282502324SSatish Balay ierr = PetscMalloc((ainew[n]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr); 24383287d42SBarry Smith b->j = ajnew; 24483287d42SBarry Smith b->i = ainew; 24583287d42SBarry Smith b->diag = idnew; 24683287d42SBarry Smith b->ilen = 0; 24783287d42SBarry Smith b->imax = 0; 24883287d42SBarry Smith b->row = isrow; 24983287d42SBarry Smith b->col = iscol; 250d3d32019SBarry Smith b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 25183287d42SBarry Smith ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 25283287d42SBarry Smith ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 25383287d42SBarry Smith b->icol = isicol; 25487828ca2SBarry Smith ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 25583287d42SBarry Smith /* In b structure: Free imax, ilen, old a, old j. 25683287d42SBarry Smith Allocate idnew, solve_work, new a, new j */ 257*719d5645SBarry Smith ierr = PetscLogObjectMemory(B,(ainew[n]-n)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 25883287d42SBarry Smith b->maxnz = b->nz = ainew[n]; 25983287d42SBarry Smith 260*719d5645SBarry Smith (B)->info.factor_mallocs = reallocs; 261*719d5645SBarry Smith (B)->info.fill_ratio_given = f; 26283287d42SBarry Smith if (ai[n] != 0) { 263*719d5645SBarry Smith (B)->info.fill_ratio_needed = ((PetscReal)ainew[n])/((PetscReal)ai[n]); 26483287d42SBarry Smith } else { 265*719d5645SBarry Smith (B)->info.fill_ratio_needed = 0.0; 26683287d42SBarry Smith } 267db4efbfdSBarry Smith ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 268db4efbfdSBarry Smith ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 269*719d5645SBarry Smith ierr = MatSeqBAIJSetNumericFactorization(B,row_identity && col_identity);CHKERRQ(ierr); 27083287d42SBarry Smith PetscFunctionReturn(0); 27183287d42SBarry Smith } 272db4efbfdSBarry Smith 273