170c3da92SBarry Smith 2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 3525d23c0SHong Zhang #include <../src/mat/impls/baij/seq/baij.h> 4525d23c0SHong Zhang 5b582cc96SHong Zhang #undef __FUNCT__ 6b582cc96SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqBAIJ" 7b582cc96SHong Zhang PetscErrorCode MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 8b582cc96SHong Zhang { 9b582cc96SHong Zhang PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 10b582cc96SHong Zhang PetscErrorCode ierr; 11b7799381SHong Zhang PetscInt bs=J->rmap->bs,bs2=bs*bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz; 12b7799381SHong Zhang PetscScalar dx,*dy_i,*xx,*w3_array,*dy=coloring->dy; 13b582cc96SHong Zhang PetscScalar *vscale_array; 14b582cc96SHong Zhang PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 15b582cc96SHong Zhang Vec w1 = coloring->w1,w2=coloring->w2,w3; 16b582cc96SHong Zhang void *fctx = coloring->fctx; 17b582cc96SHong Zhang PetscBool flg = PETSC_FALSE; 18b582cc96SHong Zhang Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 19d6752224SHong Zhang PetscScalar *ca = csp->a,*ca_l; 20b582cc96SHong Zhang 21b582cc96SHong Zhang PetscFunctionBegin; 22b582cc96SHong Zhang ierr = MatSetUnfactored(J);CHKERRQ(ierr); 23b582cc96SHong Zhang ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 24b582cc96SHong Zhang if (flg) { 25b582cc96SHong Zhang ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 26b582cc96SHong Zhang } else { 27b582cc96SHong Zhang PetscBool assembled; 28b582cc96SHong Zhang ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 29b582cc96SHong Zhang if (assembled) { 30b582cc96SHong Zhang ierr = MatZeroEntries(J);CHKERRQ(ierr); 31b582cc96SHong Zhang } 32b582cc96SHong Zhang } 33b582cc96SHong Zhang if (!coloring->vscale) { 34b582cc96SHong Zhang ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 35b582cc96SHong Zhang } 36b582cc96SHong Zhang 37b582cc96SHong Zhang /* Set w1 = F(x1) */ 38b582cc96SHong Zhang if (!coloring->fset) { 39b582cc96SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 40b582cc96SHong Zhang ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 41b582cc96SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 42b582cc96SHong Zhang } else { 43b582cc96SHong Zhang coloring->fset = PETSC_FALSE; 44b582cc96SHong Zhang } 45b582cc96SHong Zhang 46b582cc96SHong Zhang if (!coloring->w3) { 47b582cc96SHong Zhang ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 48b582cc96SHong Zhang ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 49b582cc96SHong Zhang } 50b582cc96SHong Zhang w3 = coloring->w3; 51b582cc96SHong Zhang 52b7799381SHong Zhang /* Compute local scale factors: vscale = dx */ 53b582cc96SHong Zhang if (coloring->htype[0] == 'w') { 54b7799381SHong Zhang /* tacky test; need to make systematic if we add other approaches to computing h*/ 55b7799381SHong Zhang ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 56b7799381SHong Zhang dx = (1.0 + unorm)*epsilon; 57b7799381SHong Zhang ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr); 58b582cc96SHong Zhang } else { 59b7799381SHong Zhang ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 60b7799381SHong Zhang ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 61b7799381SHong Zhang for (col=0; col<N; col++) { 62b582cc96SHong Zhang dx = xx[col]; 63b7799381SHong Zhang if (dx == 0.0) dx = 1.0; 64b582cc96SHong Zhang if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 65b582cc96SHong Zhang else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 66b582cc96SHong Zhang dx *= epsilon; 67b7799381SHong Zhang vscale_array[col] = dx; 68b582cc96SHong Zhang } 69b7799381SHong Zhang ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 70b7799381SHong Zhang ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 7187d34202SHong Zhang } 7287d34202SHong Zhang 73b582cc96SHong Zhang nz = 0; 74b7799381SHong Zhang ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 75b582cc96SHong Zhang for (k=0; k<coloring->ncolors; k++) { /* Loop over each color */ 76b582cc96SHong Zhang coloring->currentcolor = k; 7787d34202SHong Zhang 7887d34202SHong Zhang /* Compute w3 = x1 + dx */ 79b582cc96SHong Zhang ierr = VecCopy(x1,w3);CHKERRQ(ierr); 80b7799381SHong Zhang dy_i = dy; 81b7799381SHong Zhang for (i=0; i<bs; i++) { /* Loop over a block of columns */ 82b582cc96SHong Zhang ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 83b582cc96SHong Zhang for (l=0; l<coloring->ncolumns[k]; l++) { 84b582cc96SHong Zhang col = i + bs*coloring->columns[k][l]; 85b582cc96SHong Zhang w3_array[col] += vscale_array[col]; 86b7799381SHong Zhang if (i) { 87b7799381SHong Zhang w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */ 88b7799381SHong Zhang } 89b582cc96SHong Zhang } 90b582cc96SHong Zhang ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 91b582cc96SHong Zhang 92b7799381SHong Zhang /* Evaluate function w2 = F(w3) - F(x1) */ 93b582cc96SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 94b7799381SHong Zhang ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */ 95b7799381SHong Zhang ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 96b582cc96SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 97b7799381SHong Zhang ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 98b7799381SHong Zhang ierr = VecResetArray(w2);CHKERRQ(ierr); 99b7799381SHong Zhang dy_i += N; /* points to dy+i*N */ 10087d34202SHong Zhang } 101b582cc96SHong Zhang 102b7799381SHong Zhang /* Loop over row blocks, putting dy/dx into Jacobian matrix */ 103b582cc96SHong Zhang for (l=0; l<coloring->nrows[k]; l++) { 104b7799381SHong Zhang row = bs*coloring->rowcolden2sp3[nz++]; 105b7799381SHong Zhang col = bs*coloring->rowcolden2sp3[nz++]; 106b7799381SHong Zhang ca_l = ca + bs2*coloring->rowcolden2sp3[nz++]; 10726cb46bfSHong Zhang spidx = 0; 108b7799381SHong Zhang dy_i = dy; 10926cb46bfSHong Zhang for (i=0; i<bs; i++) { /* column of the block */ 11026cb46bfSHong Zhang for (j=0; j<bs; j++) { /* row of the block */ 111b7799381SHong Zhang ca_l[spidx++] = dy_i[row+j]/vscale_array[col+i]; 112b582cc96SHong Zhang } 113b7799381SHong Zhang dy_i += N; /* points to dy+i*N */ 11426cb46bfSHong Zhang } 11526cb46bfSHong Zhang } 116b582cc96SHong Zhang } /* endof for each color */ 117b582cc96SHong Zhang ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 118b7799381SHong Zhang 119b582cc96SHong Zhang coloring->currentcolor = -1; 120b582cc96SHong Zhang PetscFunctionReturn(0); 121b582cc96SHong Zhang } 122b582cc96SHong Zhang 123525d23c0SHong Zhang /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */ 124525d23c0SHong Zhang #undef __FUNCT__ 125525d23c0SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqAIJ" 126525d23c0SHong Zhang PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 127525d23c0SHong Zhang { 128525d23c0SHong Zhang PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 129525d23c0SHong Zhang PetscErrorCode ierr; 130*0944192fSHong Zhang PetscInt k,l,row,col,N; 131525d23c0SHong Zhang PetscScalar dx,*y,*xx,*w3_array; 132525d23c0SHong Zhang PetscScalar *vscale_array; 133525d23c0SHong Zhang PetscReal epsilon=coloring->error_rel,umin = coloring->umin,unorm; 134525d23c0SHong Zhang Vec w1=coloring->w1,w2=coloring->w2,w3; 135525d23c0SHong Zhang void *fctx=coloring->fctx; 136*0944192fSHong Zhang PetscBool flg=PETSC_FALSE; 137*0944192fSHong Zhang Mat_SeqAIJ *csp=(Mat_SeqAIJ*)J->data; 138*0944192fSHong Zhang PetscScalar *ca=csp->a; 139525d23c0SHong Zhang const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 140525d23c0SHong Zhang PetscInt **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx; 141525d23c0SHong Zhang 142525d23c0SHong Zhang PetscFunctionBegin; 143525d23c0SHong Zhang ierr = MatSetUnfactored(J);CHKERRQ(ierr); 144525d23c0SHong Zhang ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 145525d23c0SHong Zhang if (flg) { 146525d23c0SHong Zhang ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 147525d23c0SHong Zhang } else { 148525d23c0SHong Zhang PetscBool assembled; 149525d23c0SHong Zhang ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 150525d23c0SHong Zhang if (assembled) { 151525d23c0SHong Zhang ierr = MatZeroEntries(J);CHKERRQ(ierr); 152525d23c0SHong Zhang } 153525d23c0SHong Zhang } 154525d23c0SHong Zhang if (!coloring->vscale) { 155525d23c0SHong Zhang ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 156525d23c0SHong Zhang } 157525d23c0SHong Zhang 158525d23c0SHong Zhang /* Set w1 = F(x1) */ 159525d23c0SHong Zhang if (!coloring->fset) { 160525d23c0SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 161525d23c0SHong Zhang ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 162525d23c0SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 163525d23c0SHong Zhang } else { 164525d23c0SHong Zhang coloring->fset = PETSC_FALSE; 165525d23c0SHong Zhang } 166525d23c0SHong Zhang 167525d23c0SHong Zhang if (!coloring->w3) { 168525d23c0SHong Zhang ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 169525d23c0SHong Zhang ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 170525d23c0SHong Zhang } 171525d23c0SHong Zhang w3 = coloring->w3; 172525d23c0SHong Zhang 173*0944192fSHong Zhang /* Compute scale factors: vscale = dx */ 174*0944192fSHong Zhang if (coloring->htype[0] == 'w') { 175*0944192fSHong Zhang /* tacky test; need to make systematic if we add other approaches to computing h*/ 176*0944192fSHong Zhang ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 177*0944192fSHong Zhang dx = (1.0 + unorm)*epsilon; 178*0944192fSHong Zhang ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr); 179*0944192fSHong Zhang } else { 180525d23c0SHong Zhang ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 181525d23c0SHong Zhang ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 182525d23c0SHong Zhang ierr = VecGetSize(x1,&N);CHKERRQ(ierr); 183525d23c0SHong Zhang for (col=0; col<N; col++) { 184525d23c0SHong Zhang dx = xx[col]; 185*0944192fSHong Zhang if (dx == 0.0) dx = 1.0; 186525d23c0SHong Zhang if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 187525d23c0SHong Zhang else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 188525d23c0SHong Zhang dx *= epsilon; 189*0944192fSHong Zhang vscale_array[col] = dx; 190525d23c0SHong Zhang } 191*0944192fSHong Zhang ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 192*0944192fSHong Zhang ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 193*0944192fSHong Zhang } 194525d23c0SHong Zhang 195525d23c0SHong Zhang nz = 0; 196*0944192fSHong Zhang ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 197525d23c0SHong Zhang for (k=0; k<ncolors; k++) { /* loop over colors */ 198525d23c0SHong Zhang coloring->currentcolor = k; 199525d23c0SHong Zhang 200525d23c0SHong Zhang /* 201525d23c0SHong Zhang Loop over each column associated with color 202525d23c0SHong Zhang adding the perturbation to the vector w3 = x1 + dx. 203525d23c0SHong Zhang */ 204525d23c0SHong Zhang ierr = VecCopy(x1,w3);CHKERRQ(ierr); 205525d23c0SHong Zhang ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 206525d23c0SHong Zhang ncolumns_k = ncolumns[k]; 207525d23c0SHong Zhang for (l=0; l<ncolumns_k; l++) { /* loop over columns */ 208525d23c0SHong Zhang col = columns[k][l]; 209525d23c0SHong Zhang w3_array[col] += vscale_array[col]; 210525d23c0SHong Zhang } 211525d23c0SHong Zhang ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 212525d23c0SHong Zhang 213525d23c0SHong Zhang /* 214525d23c0SHong Zhang Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 215525d23c0SHong Zhang w2 = F(x1 + dx) - F(x1) 216525d23c0SHong Zhang */ 217525d23c0SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 218525d23c0SHong Zhang ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 219525d23c0SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 220525d23c0SHong Zhang ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 221525d23c0SHong Zhang 222525d23c0SHong Zhang /* 223525d23c0SHong Zhang Loop over rows of vector, putting w2/dx into Jacobian matrix 224525d23c0SHong Zhang */ 225525d23c0SHong Zhang ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 226525d23c0SHong Zhang nrows_k = nrows[k]; 227525d23c0SHong Zhang for (l=0; l<nrows_k; l++) { /* loop over rows */ 228525d23c0SHong Zhang row = coloring->rowcolden2sp3[nz++]; 229525d23c0SHong Zhang col = coloring->rowcolden2sp3[nz++]; 230525d23c0SHong Zhang spidx = coloring->rowcolden2sp3[nz++]; 231*0944192fSHong Zhang ca[spidx] = y[row]/vscale_array[col]; 232525d23c0SHong Zhang } 233525d23c0SHong Zhang ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 234525d23c0SHong Zhang } /* endof for each color */ 235525d23c0SHong Zhang ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 236525d23c0SHong Zhang 237525d23c0SHong Zhang coloring->currentcolor = -1; 238525d23c0SHong Zhang PetscFunctionReturn(0); 239525d23c0SHong Zhang } 240639f9d9dSBarry Smith 24179c1e64dSHong Zhang #undef __FUNCT__ 24279c1e64dSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 24379c1e64dSHong Zhang /* 24479c1e64dSHong Zhang This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 24579c1e64dSHong Zhang */ 24679c1e64dSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 24779c1e64dSHong Zhang { 24879c1e64dSHong Zhang PetscErrorCode ierr; 24979c1e64dSHong Zhang PetscInt i,n,nrows,N,j,k,m,ncols,col; 25079c1e64dSHong Zhang const PetscInt *is,*rows,*ci,*cj; 25152011a10SHong Zhang PetscInt nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz; 25279c1e64dSHong Zhang IS *isa; 253525d23c0SHong Zhang PetscBool isBAIJ; 25479c1e64dSHong Zhang Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 25579c1e64dSHong Zhang 25679c1e64dSHong Zhang PetscFunctionBegin; 25779c1e64dSHong Zhang ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 25852011a10SHong Zhang 259476e0d0aSHong Zhang /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 260476e0d0aSHong Zhang SeqBAIJ calls this routine, thus check it */ 26179c1e64dSHong Zhang ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 262525d23c0SHong Zhang ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 263525d23c0SHong Zhang if (!isBAIJ) { 26452011a10SHong Zhang bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 26579c1e64dSHong Zhang } 26679c1e64dSHong Zhang N = mat->cmap->N/bs; 26779c1e64dSHong Zhang c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 26879c1e64dSHong Zhang c->N = mat->cmap->N/bs; 26979c1e64dSHong Zhang c->m = mat->rmap->N/bs; 27079c1e64dSHong Zhang c->rstart = 0; 27179c1e64dSHong Zhang 27279c1e64dSHong Zhang c->ncolors = nis; 27379c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 27479c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 27579c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 27685740eacSHong Zhang ierr = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr); 27779c1e64dSHong Zhang 278525d23c0SHong Zhang if (isBAIJ) { 279525d23c0SHong Zhang ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 280525d23c0SHong Zhang } else { 2816378f32dSHong Zhang ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 282525d23c0SHong Zhang } 28379c1e64dSHong Zhang 284476e0d0aSHong Zhang ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr); 28579c1e64dSHong Zhang ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 28679c1e64dSHong Zhang 28717a7dee1SHong Zhang nz = 0; 28879c1e64dSHong Zhang for (i=0; i<nis; i++) { /* loop over colors */ 28979c1e64dSHong Zhang ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 29079c1e64dSHong Zhang ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 29179c1e64dSHong Zhang 29279c1e64dSHong Zhang c->ncolumns[i] = n; 29379c1e64dSHong Zhang if (n) { 29479c1e64dSHong Zhang ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 29579c1e64dSHong Zhang ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 29679c1e64dSHong Zhang } else { 29779c1e64dSHong Zhang c->columns[i] = 0; 29879c1e64dSHong Zhang } 29979c1e64dSHong Zhang 30079c1e64dSHong Zhang /* fast, crude version requires O(N*N) work */ 301476e0d0aSHong Zhang nrows = 0; 30279c1e64dSHong Zhang for (j=0; j<n; j++) { /* loop over columns */ 30379c1e64dSHong Zhang col = is[j]; 30479c1e64dSHong Zhang rows = cj + ci[col]; 30579c1e64dSHong Zhang m = ci[col+1] - ci[col]; 30679c1e64dSHong Zhang for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 30779c1e64dSHong Zhang rowhit[*rows] = col + 1; 308476e0d0aSHong Zhang spidxhit[*rows++] = spidx[ci[col] + k]; 309476e0d0aSHong Zhang nrows++; 31079c1e64dSHong Zhang } 31179c1e64dSHong Zhang } 312476e0d0aSHong Zhang c->nrows[i] = nrows; /* total num of rows for this color */ 31379c1e64dSHong Zhang 31479c1e64dSHong Zhang nrows = 0; 31579c1e64dSHong Zhang for (j=0; j<N; j++) { /* loop over rows */ 31679c1e64dSHong Zhang if (rowhit[j]) { 31717a7dee1SHong Zhang c->rowcolden2sp3[nz++] = j; /* row index */ 31817a7dee1SHong Zhang c->rowcolden2sp3[nz++] = rowhit[j] - 1; /* column index */ 31917a7dee1SHong Zhang c->rowcolden2sp3[nz++] = spidxhit[j]; /* den2sp index */ 32079c1e64dSHong Zhang rowhit[j] = 0.0; /* zero rowhit for reuse */ 32179c1e64dSHong Zhang } 32279c1e64dSHong Zhang } 32379c1e64dSHong Zhang ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 32479c1e64dSHong Zhang } 32517a7dee1SHong Zhang if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz); 32685740eacSHong Zhang 327525d23c0SHong Zhang if (isBAIJ) { 328525d23c0SHong Zhang ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 329525d23c0SHong Zhang } else { 3306378f32dSHong Zhang ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 331525d23c0SHong Zhang } 332476e0d0aSHong Zhang ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr); 33379c1e64dSHong Zhang ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 334476e0d0aSHong Zhang 3354737c552SHong Zhang c->ctype = IS_COLORING_GHOSTED; 336b582cc96SHong Zhang if (isBAIJ) { 337b582cc96SHong Zhang mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ; 338b7799381SHong Zhang ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr); 339b582cc96SHong Zhang } else { 34079c1e64dSHong Zhang mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 341b582cc96SHong Zhang } 34252011a10SHong Zhang ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 34379c1e64dSHong Zhang PetscFunctionReturn(0); 34479c1e64dSHong Zhang } 34579c1e64dSHong Zhang 3464a2ae208SSatish Balay #undef __FUNCT__ 3474a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 3483acb8795SBarry Smith /* 3493acb8795SBarry Smith This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 3503acb8795SBarry Smith This is why it has the ugly code with the MatGetBlockSize() 3513acb8795SBarry Smith */ 352dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 353639f9d9dSBarry Smith { 3546849ba73SBarry Smith PetscErrorCode ierr; 3551a83f524SJed Brown PetscInt i,n,nrows,N,j,k,m,ncols,col; 3561a83f524SJed Brown const PetscInt *is,*rows,*ci,*cj; 3573acb8795SBarry Smith PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 358b9617806SBarry Smith IS *isa; 359ace3abfcSBarry Smith PetscBool done,flg = PETSC_FALSE; 360476e0d0aSHong Zhang PetscBool flg1; 36179c1e64dSHong Zhang PetscBool new_impl=PETSC_FALSE; 362639f9d9dSBarry Smith 3633a40ed3dSBarry Smith PetscFunctionBegin; 36479c1e64dSHong Zhang ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 36579c1e64dSHong Zhang if (new_impl){ 36679c1e64dSHong Zhang ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 36779c1e64dSHong Zhang PetscFunctionReturn(0); 36879c1e64dSHong Zhang } 369e7e72b3dSBarry Smith if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 370522c5e43SBarry Smith 371b9617806SBarry Smith ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 372476e0d0aSHong Zhang /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 373476e0d0aSHong Zhang SeqBAIJ calls this routine, thus check it */ 374251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 375476e0d0aSHong Zhang if (flg1) { 3763acb8795SBarry Smith ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3773acb8795SBarry Smith } 3783acb8795SBarry Smith 3793acb8795SBarry Smith N = mat->cmap->N/bs; 3803acb8795SBarry Smith c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 3813acb8795SBarry Smith c->N = mat->cmap->N/bs; 3823acb8795SBarry Smith c->m = mat->rmap->N/bs; 383005c665bSBarry Smith c->rstart = 0; 384005c665bSBarry Smith 385639f9d9dSBarry Smith c->ncolors = nis; 38638baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 38738baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 38838baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 38938baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 39038baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 39143a90d84SBarry Smith 3923acb8795SBarry Smith ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 393ce94432eSBarry Smith if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 39470c3da92SBarry Smith 39570c3da92SBarry Smith /* 396639f9d9dSBarry Smith Temporary option to allow for debugging/testing 39770c3da92SBarry Smith */ 3980298fd71SBarry Smith ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 39970c3da92SBarry Smith 40038baddfdSBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 40138baddfdSBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 40270c3da92SBarry Smith 403639f9d9dSBarry Smith for (i=0; i<nis; i++) { 404b9b97703SBarry Smith ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 405639f9d9dSBarry Smith ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 4062205254eSKarl Rupp 407639f9d9dSBarry Smith c->ncolumns[i] = n; 408639f9d9dSBarry Smith if (n) { 40938baddfdSBarry Smith ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 41038baddfdSBarry Smith ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 41170c3da92SBarry Smith } else { 412639f9d9dSBarry Smith c->columns[i] = 0; 41370c3da92SBarry Smith } 41470c3da92SBarry Smith 4153a7fca6bSBarry Smith if (!flg) { /* ------------------------------------------------------------------------------*/ 4163a7fca6bSBarry Smith /* fast, crude version requires O(N*N) work */ 41738baddfdSBarry Smith ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 418639f9d9dSBarry Smith /* loop over columns*/ 419639f9d9dSBarry Smith for (j=0; j<n; j++) { 420639f9d9dSBarry Smith col = is[j]; 421639f9d9dSBarry Smith rows = cj + ci[col]; 422639f9d9dSBarry Smith m = ci[col+1] - ci[col]; 423639f9d9dSBarry Smith /* loop over columns marking them in rowhit */ 424639f9d9dSBarry Smith for (k=0; k<m; k++) { 425639f9d9dSBarry Smith rowhit[*rows++] = col + 1; 42670c3da92SBarry Smith } 42770c3da92SBarry Smith } 428639f9d9dSBarry Smith /* count the number of hits */ 429639f9d9dSBarry Smith nrows = 0; 43070c3da92SBarry Smith for (j=0; j<N; j++) { 431639f9d9dSBarry Smith if (rowhit[j]) nrows++; 432639f9d9dSBarry Smith } 433639f9d9dSBarry Smith c->nrows[i] = nrows; 43438baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 43538baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 436639f9d9dSBarry Smith nrows = 0; 437639f9d9dSBarry Smith for (j=0; j<N; j++) { 438639f9d9dSBarry Smith if (rowhit[j]) { 439639f9d9dSBarry Smith c->rows[i][nrows] = j; 440639f9d9dSBarry Smith c->columnsforrow[i][nrows] = rowhit[j] - 1; 441639f9d9dSBarry Smith nrows++; 44270c3da92SBarry Smith } 44370c3da92SBarry Smith } 444639f9d9dSBarry Smith } else { /*-------------------------------------------------------------------------------*/ 4453a7fca6bSBarry Smith /* slow version, using rowhit as a linked list */ 44638baddfdSBarry Smith PetscInt currentcol,fm,mfm; 447639f9d9dSBarry Smith rowhit[N] = N; 448639f9d9dSBarry Smith nrows = 0; 449639f9d9dSBarry Smith /* loop over columns */ 450639f9d9dSBarry Smith for (j=0; j<n; j++) { 451639f9d9dSBarry Smith col = is[j]; 452639f9d9dSBarry Smith rows = cj + ci[col]; 453639f9d9dSBarry Smith m = ci[col+1] - ci[col]; 454639f9d9dSBarry Smith /* loop over columns marking them in rowhit */ 455639f9d9dSBarry Smith fm = N; /* fm points to first entry in linked list */ 456639f9d9dSBarry Smith for (k=0; k<m; k++) { 457639f9d9dSBarry Smith currentcol = *rows++; 458639f9d9dSBarry Smith /* is it already in the list? */ 459639f9d9dSBarry Smith do { 460639f9d9dSBarry Smith mfm = fm; 461639f9d9dSBarry Smith fm = rowhit[fm]; 462639f9d9dSBarry Smith } while (fm < currentcol); 463639f9d9dSBarry Smith /* not in list so add it */ 464639f9d9dSBarry Smith if (fm != currentcol) { 465639f9d9dSBarry Smith nrows++; 466639f9d9dSBarry Smith columnsforrow[currentcol] = col; 467639f9d9dSBarry Smith /* next three lines insert new entry into linked list */ 468639f9d9dSBarry Smith rowhit[mfm] = currentcol; 469639f9d9dSBarry Smith rowhit[currentcol] = fm; 470639f9d9dSBarry Smith fm = currentcol; 471639f9d9dSBarry Smith /* fm points to present position in list since we know the columns are sorted */ 47271cd77b2SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 473639f9d9dSBarry Smith } 474639f9d9dSBarry Smith } 475639f9d9dSBarry Smith c->nrows[i] = nrows; 47638baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 47738baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 478639f9d9dSBarry Smith /* now store the linked list of rows into c->rows[i] */ 479639f9d9dSBarry Smith nrows = 0; 480639f9d9dSBarry Smith fm = rowhit[N]; 481639f9d9dSBarry Smith do { 482639f9d9dSBarry Smith c->rows[i][nrows] = fm; 483639f9d9dSBarry Smith c->columnsforrow[i][nrows++] = columnsforrow[fm]; 484639f9d9dSBarry Smith fm = rowhit[fm]; 485639f9d9dSBarry Smith } while (fm < N); 486639f9d9dSBarry Smith } /* ---------------------------------------------------------------------------------------*/ 487639f9d9dSBarry Smith ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 488639f9d9dSBarry Smith } 4893acb8795SBarry Smith ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 490639f9d9dSBarry Smith 491606d414cSSatish Balay ierr = PetscFree(rowhit);CHKERRQ(ierr); 492606d414cSSatish Balay ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 493639f9d9dSBarry Smith 49430b34957SBarry Smith /* Optimize by adding the vscale, and scaleforrow[][] fields */ 49530b34957SBarry Smith /* 49630b34957SBarry Smith see the version for MPIAIJ 49730b34957SBarry Smith */ 498ce94432eSBarry Smith ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 49938baddfdSBarry Smith ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 50030b34957SBarry Smith for (k=0; k<c->ncolors; k++) { 50138baddfdSBarry Smith ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 50230b34957SBarry Smith for (l=0; l<c->nrows[k]; l++) { 50330b34957SBarry Smith col = c->columnsforrow[k][l]; 5042205254eSKarl Rupp 50530b34957SBarry Smith c->vscaleforrow[k][l] = col; 50630b34957SBarry Smith } 50730b34957SBarry Smith } 508b9617806SBarry Smith ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 5093a40ed3dSBarry Smith PetscFunctionReturn(0); 51070c3da92SBarry Smith } 51179c1e64dSHong Zhang 512