170c3da92SBarry Smith 2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 3*525d23c0SHong Zhang #include <../src/mat/impls/baij/seq/baij.h> 4*525d23c0SHong Zhang 5*525d23c0SHong Zhang /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */ 6*525d23c0SHong Zhang /* #define JACOBIANCOLOROPT */ 7*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 8*525d23c0SHong Zhang #include <petsctime.h> 9*525d23c0SHong Zhang #endif 10*525d23c0SHong Zhang #undef __FUNCT__ 11*525d23c0SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqAIJ" 12*525d23c0SHong Zhang PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 13*525d23c0SHong Zhang { 14*525d23c0SHong Zhang PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 15*525d23c0SHong Zhang PetscErrorCode ierr; 16*525d23c0SHong Zhang PetscInt k,l,row,col,N; 17*525d23c0SHong Zhang PetscScalar dx,*y,*xx,*w3_array; 18*525d23c0SHong Zhang PetscScalar *vscale_array; 19*525d23c0SHong Zhang PetscReal epsilon=coloring->error_rel,umin = coloring->umin,unorm; 20*525d23c0SHong Zhang Vec w1=coloring->w1,w2=coloring->w2,w3; 21*525d23c0SHong Zhang void *fctx=coloring->fctx; 22*525d23c0SHong Zhang PetscBool flg=PETSC_FALSE; 23*525d23c0SHong Zhang Mat_SeqAIJ *csp=(Mat_SeqAIJ*)J->data; 24*525d23c0SHong Zhang PetscScalar *ca=csp->a; 25*525d23c0SHong Zhang const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 26*525d23c0SHong Zhang PetscInt **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx; 27*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 28*525d23c0SHong Zhang PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11; 29*525d23c0SHong Zhang #endif 30*525d23c0SHong Zhang 31*525d23c0SHong Zhang PetscFunctionBegin; 32*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 33*525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 34*525d23c0SHong Zhang #endif 35*525d23c0SHong Zhang ierr = MatSetUnfactored(J);CHKERRQ(ierr); 36*525d23c0SHong Zhang ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 37*525d23c0SHong Zhang if (flg) { 38*525d23c0SHong Zhang ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 39*525d23c0SHong Zhang } else { 40*525d23c0SHong Zhang PetscBool assembled; 41*525d23c0SHong Zhang ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 42*525d23c0SHong Zhang if (assembled) { 43*525d23c0SHong Zhang ierr = MatZeroEntries(J);CHKERRQ(ierr); 44*525d23c0SHong Zhang } 45*525d23c0SHong Zhang } 46*525d23c0SHong Zhang 47*525d23c0SHong Zhang if (!coloring->vscale) { 48*525d23c0SHong Zhang ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 49*525d23c0SHong Zhang } 50*525d23c0SHong Zhang 51*525d23c0SHong Zhang if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 52*525d23c0SHong Zhang ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 53*525d23c0SHong Zhang } 54*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 55*525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 56*525d23c0SHong Zhang t_init += t1 - t0; 57*525d23c0SHong Zhang #endif 58*525d23c0SHong Zhang 59*525d23c0SHong Zhang /* Set w1 = F(x1) */ 60*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 61*525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 62*525d23c0SHong Zhang #endif 63*525d23c0SHong Zhang if (!coloring->fset) { 64*525d23c0SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 65*525d23c0SHong Zhang ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 66*525d23c0SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 67*525d23c0SHong Zhang } else { 68*525d23c0SHong Zhang coloring->fset = PETSC_FALSE; 69*525d23c0SHong Zhang } 70*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 71*525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 72*525d23c0SHong Zhang t_setvals += t1 - t0; 73*525d23c0SHong Zhang #endif 74*525d23c0SHong Zhang 75*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 76*525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 77*525d23c0SHong Zhang #endif 78*525d23c0SHong Zhang if (!coloring->w3) { 79*525d23c0SHong Zhang ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 80*525d23c0SHong Zhang ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 81*525d23c0SHong Zhang } 82*525d23c0SHong Zhang w3 = coloring->w3; 83*525d23c0SHong Zhang 84*525d23c0SHong Zhang /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */ 85*525d23c0SHong Zhang ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 86*525d23c0SHong Zhang ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 87*525d23c0SHong Zhang ierr = VecGetSize(x1,&N);CHKERRQ(ierr); 88*525d23c0SHong Zhang for (col=0; col<N; col++) { 89*525d23c0SHong Zhang if (coloring->htype[0] == 'w') { 90*525d23c0SHong Zhang dx = 1.0 + unorm; 91*525d23c0SHong Zhang } else { 92*525d23c0SHong Zhang dx = xx[col]; 93*525d23c0SHong Zhang } 94*525d23c0SHong Zhang if (dx == (PetscScalar)0.0) dx = 1.0; 95*525d23c0SHong Zhang if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 96*525d23c0SHong Zhang else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 97*525d23c0SHong Zhang dx *= epsilon; 98*525d23c0SHong Zhang vscale_array[col] = (PetscScalar)dx; 99*525d23c0SHong Zhang } 100*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 101*525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 102*525d23c0SHong Zhang t_init += t1 - t0; 103*525d23c0SHong Zhang #endif 104*525d23c0SHong Zhang 105*525d23c0SHong Zhang nz = 0; 106*525d23c0SHong Zhang for (k=0; k<ncolors; k++) { /* loop over colors */ 107*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 108*525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 109*525d23c0SHong Zhang #endif 110*525d23c0SHong Zhang coloring->currentcolor = k; 111*525d23c0SHong Zhang 112*525d23c0SHong Zhang /* 113*525d23c0SHong Zhang Loop over each column associated with color 114*525d23c0SHong Zhang adding the perturbation to the vector w3 = x1 + dx. 115*525d23c0SHong Zhang */ 116*525d23c0SHong Zhang ierr = VecCopy(x1,w3);CHKERRQ(ierr); 117*525d23c0SHong Zhang ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 118*525d23c0SHong Zhang ncolumns_k = ncolumns[k]; 119*525d23c0SHong Zhang for (l=0; l<ncolumns_k; l++) { /* loop over columns */ 120*525d23c0SHong Zhang col = columns[k][l]; 121*525d23c0SHong Zhang w3_array[col] += vscale_array[col]; 122*525d23c0SHong Zhang } 123*525d23c0SHong Zhang ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 124*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 125*525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 126*525d23c0SHong Zhang t_dx += t1 - t0; 127*525d23c0SHong Zhang #endif 128*525d23c0SHong Zhang 129*525d23c0SHong Zhang /* 130*525d23c0SHong Zhang Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 131*525d23c0SHong Zhang w2 = F(x1 + dx) - F(x1) 132*525d23c0SHong Zhang */ 133*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 134*525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 135*525d23c0SHong Zhang #endif 136*525d23c0SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 137*525d23c0SHong Zhang ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 138*525d23c0SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 139*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 140*525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 141*525d23c0SHong Zhang t_func += t1 - t0; 142*525d23c0SHong Zhang #endif 143*525d23c0SHong Zhang 144*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 145*525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 146*525d23c0SHong Zhang #endif 147*525d23c0SHong Zhang ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 148*525d23c0SHong Zhang 149*525d23c0SHong Zhang /* 150*525d23c0SHong Zhang Loop over rows of vector, putting w2/dx into Jacobian matrix 151*525d23c0SHong Zhang */ 152*525d23c0SHong Zhang ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 153*525d23c0SHong Zhang nrows_k = nrows[k]; 154*525d23c0SHong Zhang for (l=0; l<nrows_k; l++) { /* loop over rows */ 155*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 156*525d23c0SHong Zhang ierr = PetscTime(&t00);CHKERRQ(ierr); 157*525d23c0SHong Zhang #endif 158*525d23c0SHong Zhang row = coloring->rowcolden2sp3[nz++]; 159*525d23c0SHong Zhang col = coloring->rowcolden2sp3[nz++]; 160*525d23c0SHong Zhang spidx = coloring->rowcolden2sp3[nz++]; 161*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 162*525d23c0SHong Zhang ierr = PetscTime(&t11);CHKERRQ(ierr); 163*525d23c0SHong Zhang t_kl += t11 - t00; 164*525d23c0SHong Zhang #endif 165*525d23c0SHong Zhang ca[spidx] = y[row]/vscale_array[col]; 166*525d23c0SHong Zhang } 167*525d23c0SHong Zhang ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 168*525d23c0SHong Zhang 169*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 170*525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 171*525d23c0SHong Zhang t_setvals += t1 - t0; 172*525d23c0SHong Zhang #endif 173*525d23c0SHong Zhang } /* endof for each color */ 174*525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 175*525d23c0SHong Zhang printf(" FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx); 176*525d23c0SHong Zhang printf(" FDColorApply time: kl %g\n",t_kl); 177*525d23c0SHong Zhang #endif 178*525d23c0SHong Zhang ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 179*525d23c0SHong Zhang ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 180*525d23c0SHong Zhang 181*525d23c0SHong Zhang coloring->currentcolor = -1; 182*525d23c0SHong Zhang PetscFunctionReturn(0); 183*525d23c0SHong Zhang } 184639f9d9dSBarry Smith 18579c1e64dSHong Zhang #undef __FUNCT__ 18679c1e64dSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 18779c1e64dSHong Zhang /* 18879c1e64dSHong Zhang This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 18979c1e64dSHong Zhang */ 19079c1e64dSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 19179c1e64dSHong Zhang { 19279c1e64dSHong Zhang PetscErrorCode ierr; 19379c1e64dSHong Zhang PetscInt i,n,nrows,N,j,k,m,ncols,col; 19479c1e64dSHong Zhang const PetscInt *is,*rows,*ci,*cj; 19552011a10SHong Zhang PetscInt nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz; 19679c1e64dSHong Zhang IS *isa; 197*525d23c0SHong Zhang PetscBool isBAIJ; 19879c1e64dSHong Zhang Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 19979c1e64dSHong Zhang 20079c1e64dSHong Zhang PetscFunctionBegin; 20179c1e64dSHong Zhang ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 20252011a10SHong Zhang 203476e0d0aSHong Zhang /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 204476e0d0aSHong Zhang SeqBAIJ calls this routine, thus check it */ 20579c1e64dSHong Zhang ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 206*525d23c0SHong Zhang ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 207*525d23c0SHong Zhang if (!isBAIJ) { 20852011a10SHong Zhang bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 20979c1e64dSHong Zhang } 21079c1e64dSHong Zhang N = mat->cmap->N/bs; 21179c1e64dSHong Zhang c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 21279c1e64dSHong Zhang c->N = mat->cmap->N/bs; 21379c1e64dSHong Zhang c->m = mat->rmap->N/bs; 21479c1e64dSHong Zhang c->rstart = 0; 21579c1e64dSHong Zhang 21679c1e64dSHong Zhang c->ncolors = nis; 21779c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 21879c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 21979c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 22085740eacSHong Zhang ierr = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr); 22179c1e64dSHong Zhang 222*525d23c0SHong Zhang if (isBAIJ) { 223*525d23c0SHong Zhang ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 224*525d23c0SHong Zhang } else { 2256378f32dSHong Zhang ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 226*525d23c0SHong Zhang } 22779c1e64dSHong Zhang 228476e0d0aSHong Zhang ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr); 22979c1e64dSHong Zhang ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 23079c1e64dSHong Zhang 23117a7dee1SHong Zhang nz = 0; 23279c1e64dSHong Zhang for (i=0; i<nis; i++) { /* loop over colors */ 23379c1e64dSHong Zhang ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 23479c1e64dSHong Zhang ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 23579c1e64dSHong Zhang 23679c1e64dSHong Zhang c->ncolumns[i] = n; 23779c1e64dSHong Zhang if (n) { 23879c1e64dSHong Zhang ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 23979c1e64dSHong Zhang ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 24079c1e64dSHong Zhang } else { 24179c1e64dSHong Zhang c->columns[i] = 0; 24279c1e64dSHong Zhang } 24379c1e64dSHong Zhang 24479c1e64dSHong Zhang /* fast, crude version requires O(N*N) work */ 245476e0d0aSHong Zhang nrows = 0; 24679c1e64dSHong Zhang for (j=0; j<n; j++) { /* loop over columns */ 24779c1e64dSHong Zhang col = is[j]; 24879c1e64dSHong Zhang rows = cj + ci[col]; 24979c1e64dSHong Zhang m = ci[col+1] - ci[col]; 25079c1e64dSHong Zhang for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 25179c1e64dSHong Zhang rowhit[*rows] = col + 1; 252476e0d0aSHong Zhang spidxhit[*rows++] = spidx[ci[col] + k]; 253476e0d0aSHong Zhang nrows++; 25479c1e64dSHong Zhang } 25579c1e64dSHong Zhang } 256476e0d0aSHong Zhang c->nrows[i] = nrows; /* total num of rows for this color */ 25779c1e64dSHong Zhang 25879c1e64dSHong Zhang nrows = 0; 25979c1e64dSHong Zhang for (j=0; j<N; j++) { /* loop over rows */ 26079c1e64dSHong Zhang if (rowhit[j]) { 26117a7dee1SHong Zhang c->rowcolden2sp3[nz++] = j; /* row index */ 26217a7dee1SHong Zhang c->rowcolden2sp3[nz++] = rowhit[j] - 1; /* column index */ 26317a7dee1SHong Zhang c->rowcolden2sp3[nz++] = spidxhit[j]; /* den2sp index */ 26479c1e64dSHong Zhang rowhit[j] = 0.0; /* zero rowhit for reuse */ 26579c1e64dSHong Zhang } 26679c1e64dSHong Zhang } 26779c1e64dSHong Zhang ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 26879c1e64dSHong Zhang } 26917a7dee1SHong Zhang if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz); 27085740eacSHong Zhang 271*525d23c0SHong Zhang if (isBAIJ) { 272*525d23c0SHong Zhang ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 273*525d23c0SHong Zhang } else { 2746378f32dSHong Zhang ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 275*525d23c0SHong Zhang } 276476e0d0aSHong Zhang ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr); 27779c1e64dSHong Zhang ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 278476e0d0aSHong Zhang 2794737c552SHong Zhang c->ctype = IS_COLORING_GHOSTED; 28079c1e64dSHong Zhang mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 28152011a10SHong Zhang ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 28279c1e64dSHong Zhang PetscFunctionReturn(0); 28379c1e64dSHong Zhang } 28479c1e64dSHong Zhang 2854a2ae208SSatish Balay #undef __FUNCT__ 2864a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 2873acb8795SBarry Smith /* 2883acb8795SBarry Smith This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 2893acb8795SBarry Smith This is why it has the ugly code with the MatGetBlockSize() 2903acb8795SBarry Smith */ 291dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 292639f9d9dSBarry Smith { 2936849ba73SBarry Smith PetscErrorCode ierr; 2941a83f524SJed Brown PetscInt i,n,nrows,N,j,k,m,ncols,col; 2951a83f524SJed Brown const PetscInt *is,*rows,*ci,*cj; 2963acb8795SBarry Smith PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 297b9617806SBarry Smith IS *isa; 298ace3abfcSBarry Smith PetscBool done,flg = PETSC_FALSE; 299476e0d0aSHong Zhang PetscBool flg1; 30079c1e64dSHong Zhang PetscBool new_impl=PETSC_FALSE; 301639f9d9dSBarry Smith 3023a40ed3dSBarry Smith PetscFunctionBegin; 30379c1e64dSHong Zhang ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 30479c1e64dSHong Zhang if (new_impl){ 30579c1e64dSHong Zhang ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 30679c1e64dSHong Zhang PetscFunctionReturn(0); 30779c1e64dSHong Zhang } 308e7e72b3dSBarry Smith if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 309522c5e43SBarry Smith 310b9617806SBarry Smith ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 311476e0d0aSHong Zhang /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 312476e0d0aSHong Zhang SeqBAIJ calls this routine, thus check it */ 313251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 314476e0d0aSHong Zhang if (flg1) { 3153acb8795SBarry Smith ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3163acb8795SBarry Smith } 3173acb8795SBarry Smith 3183acb8795SBarry Smith N = mat->cmap->N/bs; 3193acb8795SBarry Smith c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 3203acb8795SBarry Smith c->N = mat->cmap->N/bs; 3213acb8795SBarry Smith c->m = mat->rmap->N/bs; 322005c665bSBarry Smith c->rstart = 0; 323005c665bSBarry Smith 324639f9d9dSBarry Smith c->ncolors = nis; 32538baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 32638baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 32738baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 32838baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 32938baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 33043a90d84SBarry Smith 3313acb8795SBarry Smith ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 332ce94432eSBarry Smith if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 33370c3da92SBarry Smith 33470c3da92SBarry Smith /* 335639f9d9dSBarry Smith Temporary option to allow for debugging/testing 33670c3da92SBarry Smith */ 3370298fd71SBarry Smith ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 33870c3da92SBarry Smith 33938baddfdSBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 34038baddfdSBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 34170c3da92SBarry Smith 342639f9d9dSBarry Smith for (i=0; i<nis; i++) { 343b9b97703SBarry Smith ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 344639f9d9dSBarry Smith ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 3452205254eSKarl Rupp 346639f9d9dSBarry Smith c->ncolumns[i] = n; 347639f9d9dSBarry Smith if (n) { 34838baddfdSBarry Smith ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 34938baddfdSBarry Smith ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 35070c3da92SBarry Smith } else { 351639f9d9dSBarry Smith c->columns[i] = 0; 35270c3da92SBarry Smith } 35370c3da92SBarry Smith 3543a7fca6bSBarry Smith if (!flg) { /* ------------------------------------------------------------------------------*/ 3553a7fca6bSBarry Smith /* fast, crude version requires O(N*N) work */ 35638baddfdSBarry Smith ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 357639f9d9dSBarry Smith /* loop over columns*/ 358639f9d9dSBarry Smith for (j=0; j<n; j++) { 359639f9d9dSBarry Smith col = is[j]; 360639f9d9dSBarry Smith rows = cj + ci[col]; 361639f9d9dSBarry Smith m = ci[col+1] - ci[col]; 362639f9d9dSBarry Smith /* loop over columns marking them in rowhit */ 363639f9d9dSBarry Smith for (k=0; k<m; k++) { 364639f9d9dSBarry Smith rowhit[*rows++] = col + 1; 36570c3da92SBarry Smith } 36670c3da92SBarry Smith } 367639f9d9dSBarry Smith /* count the number of hits */ 368639f9d9dSBarry Smith nrows = 0; 36970c3da92SBarry Smith for (j=0; j<N; j++) { 370639f9d9dSBarry Smith if (rowhit[j]) nrows++; 371639f9d9dSBarry Smith } 372639f9d9dSBarry Smith c->nrows[i] = nrows; 37338baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 37438baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 375639f9d9dSBarry Smith nrows = 0; 376639f9d9dSBarry Smith for (j=0; j<N; j++) { 377639f9d9dSBarry Smith if (rowhit[j]) { 378639f9d9dSBarry Smith c->rows[i][nrows] = j; 379639f9d9dSBarry Smith c->columnsforrow[i][nrows] = rowhit[j] - 1; 380639f9d9dSBarry Smith nrows++; 38170c3da92SBarry Smith } 38270c3da92SBarry Smith } 383639f9d9dSBarry Smith } else { /*-------------------------------------------------------------------------------*/ 3843a7fca6bSBarry Smith /* slow version, using rowhit as a linked list */ 38538baddfdSBarry Smith PetscInt currentcol,fm,mfm; 386639f9d9dSBarry Smith rowhit[N] = N; 387639f9d9dSBarry Smith nrows = 0; 388639f9d9dSBarry Smith /* loop over columns */ 389639f9d9dSBarry Smith for (j=0; j<n; j++) { 390639f9d9dSBarry Smith col = is[j]; 391639f9d9dSBarry Smith rows = cj + ci[col]; 392639f9d9dSBarry Smith m = ci[col+1] - ci[col]; 393639f9d9dSBarry Smith /* loop over columns marking them in rowhit */ 394639f9d9dSBarry Smith fm = N; /* fm points to first entry in linked list */ 395639f9d9dSBarry Smith for (k=0; k<m; k++) { 396639f9d9dSBarry Smith currentcol = *rows++; 397639f9d9dSBarry Smith /* is it already in the list? */ 398639f9d9dSBarry Smith do { 399639f9d9dSBarry Smith mfm = fm; 400639f9d9dSBarry Smith fm = rowhit[fm]; 401639f9d9dSBarry Smith } while (fm < currentcol); 402639f9d9dSBarry Smith /* not in list so add it */ 403639f9d9dSBarry Smith if (fm != currentcol) { 404639f9d9dSBarry Smith nrows++; 405639f9d9dSBarry Smith columnsforrow[currentcol] = col; 406639f9d9dSBarry Smith /* next three lines insert new entry into linked list */ 407639f9d9dSBarry Smith rowhit[mfm] = currentcol; 408639f9d9dSBarry Smith rowhit[currentcol] = fm; 409639f9d9dSBarry Smith fm = currentcol; 410639f9d9dSBarry Smith /* fm points to present position in list since we know the columns are sorted */ 41171cd77b2SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 412639f9d9dSBarry Smith } 413639f9d9dSBarry Smith } 414639f9d9dSBarry Smith c->nrows[i] = nrows; 41538baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 41638baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 417639f9d9dSBarry Smith /* now store the linked list of rows into c->rows[i] */ 418639f9d9dSBarry Smith nrows = 0; 419639f9d9dSBarry Smith fm = rowhit[N]; 420639f9d9dSBarry Smith do { 421639f9d9dSBarry Smith c->rows[i][nrows] = fm; 422639f9d9dSBarry Smith c->columnsforrow[i][nrows++] = columnsforrow[fm]; 423639f9d9dSBarry Smith fm = rowhit[fm]; 424639f9d9dSBarry Smith } while (fm < N); 425639f9d9dSBarry Smith } /* ---------------------------------------------------------------------------------------*/ 426639f9d9dSBarry Smith ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 427639f9d9dSBarry Smith } 4283acb8795SBarry Smith ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 429639f9d9dSBarry Smith 430606d414cSSatish Balay ierr = PetscFree(rowhit);CHKERRQ(ierr); 431606d414cSSatish Balay ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 432639f9d9dSBarry Smith 43330b34957SBarry Smith /* Optimize by adding the vscale, and scaleforrow[][] fields */ 43430b34957SBarry Smith /* 43530b34957SBarry Smith see the version for MPIAIJ 43630b34957SBarry Smith */ 437ce94432eSBarry Smith ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 43838baddfdSBarry Smith ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 43930b34957SBarry Smith for (k=0; k<c->ncolors; k++) { 44038baddfdSBarry Smith ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 44130b34957SBarry Smith for (l=0; l<c->nrows[k]; l++) { 44230b34957SBarry Smith col = c->columnsforrow[k][l]; 4432205254eSKarl Rupp 44430b34957SBarry Smith c->vscaleforrow[k][l] = col; 44530b34957SBarry Smith } 44630b34957SBarry Smith } 447b9617806SBarry Smith ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 4483a40ed3dSBarry Smith PetscFunctionReturn(0); 44970c3da92SBarry Smith } 45079c1e64dSHong Zhang 451