170c3da92SBarry Smith 2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 3525d23c0SHong Zhang #include <../src/mat/impls/baij/seq/baij.h> 4525d23c0SHong Zhang 5b582cc96SHong Zhang #undef __FUNCT__ 6b582cc96SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqBAIJ" 7b582cc96SHong Zhang PetscErrorCode MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 8b582cc96SHong Zhang { 9b582cc96SHong Zhang PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 10b582cc96SHong Zhang PetscErrorCode ierr; 119a09c712SHong Zhang PetscInt bs=J->rmap->bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz; 12b7799381SHong Zhang PetscScalar dx,*dy_i,*xx,*w3_array,*dy=coloring->dy; 13b582cc96SHong Zhang PetscScalar *vscale_array; 14b582cc96SHong Zhang PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 15b582cc96SHong Zhang Vec w1 = coloring->w1,w2=coloring->w2,w3; 16b582cc96SHong Zhang void *fctx = coloring->fctx; 17b582cc96SHong Zhang PetscBool flg = PETSC_FALSE; 189a09c712SHong Zhang PetscScalar *valaddr; 199a09c712SHong Zhang MatEntry *Jentry=coloring->matentry; 20b582cc96SHong Zhang 21b582cc96SHong Zhang PetscFunctionBegin; 22b582cc96SHong Zhang ierr = MatSetUnfactored(J);CHKERRQ(ierr); 23b582cc96SHong Zhang ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 24b582cc96SHong Zhang if (flg) { 25b582cc96SHong Zhang ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 26b582cc96SHong Zhang } else { 27b582cc96SHong Zhang PetscBool assembled; 28b582cc96SHong Zhang ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 29b582cc96SHong Zhang if (assembled) { 30b582cc96SHong Zhang ierr = MatZeroEntries(J);CHKERRQ(ierr); 31b582cc96SHong Zhang } 32b582cc96SHong Zhang } 33b582cc96SHong Zhang if (!coloring->vscale) { 34b582cc96SHong Zhang ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 35b582cc96SHong Zhang } 36b582cc96SHong Zhang 37b582cc96SHong Zhang /* Set w1 = F(x1) */ 38b582cc96SHong Zhang if (!coloring->fset) { 39b582cc96SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 40b582cc96SHong Zhang ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 41b582cc96SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 42b582cc96SHong Zhang } else { 43b582cc96SHong Zhang coloring->fset = PETSC_FALSE; 44b582cc96SHong Zhang } 45b582cc96SHong Zhang 46b582cc96SHong Zhang if (!coloring->w3) { 47b582cc96SHong Zhang ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 48b582cc96SHong Zhang ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 49b582cc96SHong Zhang } 50b582cc96SHong Zhang w3 = coloring->w3; 51b582cc96SHong Zhang 52b7799381SHong Zhang /* Compute local scale factors: vscale = dx */ 53b582cc96SHong Zhang if (coloring->htype[0] == 'w') { 54b7799381SHong Zhang /* tacky test; need to make systematic if we add other approaches to computing h*/ 55b7799381SHong Zhang ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 56b7799381SHong Zhang dx = (1.0 + unorm)*epsilon; 57b7799381SHong Zhang ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr); 58b582cc96SHong Zhang } else { 59b7799381SHong Zhang ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 60b7799381SHong Zhang ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 61b7799381SHong Zhang for (col=0; col<N; col++) { 62b582cc96SHong Zhang dx = xx[col]; 63b7799381SHong Zhang if (dx == 0.0) dx = 1.0; 64b582cc96SHong Zhang if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 65b582cc96SHong Zhang else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 66b582cc96SHong Zhang dx *= epsilon; 67b7799381SHong Zhang vscale_array[col] = dx; 68b582cc96SHong Zhang } 69b7799381SHong Zhang ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 70b7799381SHong Zhang ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 7187d34202SHong Zhang } 7287d34202SHong Zhang 73b582cc96SHong Zhang nz = 0; 74b7799381SHong Zhang ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 75b582cc96SHong Zhang for (k=0; k<coloring->ncolors; k++) { /* Loop over each color */ 76b582cc96SHong Zhang coloring->currentcolor = k; 7787d34202SHong Zhang 7887d34202SHong Zhang /* Compute w3 = x1 + dx */ 79b582cc96SHong Zhang ierr = VecCopy(x1,w3);CHKERRQ(ierr); 80b7799381SHong Zhang dy_i = dy; 81b7799381SHong Zhang for (i=0; i<bs; i++) { /* Loop over a block of columns */ 82b582cc96SHong Zhang ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 83b582cc96SHong Zhang for (l=0; l<coloring->ncolumns[k]; l++) { 84b582cc96SHong Zhang col = i + bs*coloring->columns[k][l]; 85b582cc96SHong Zhang w3_array[col] += vscale_array[col]; 86b7799381SHong Zhang if (i) { 87b7799381SHong Zhang w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */ 88b7799381SHong Zhang } 89b582cc96SHong Zhang } 90b582cc96SHong Zhang ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 91b582cc96SHong Zhang 92b7799381SHong Zhang /* Evaluate function w2 = F(w3) - F(x1) */ 93b582cc96SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 94b7799381SHong Zhang ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */ 95b7799381SHong Zhang ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 96b582cc96SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 97b7799381SHong Zhang ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 98b7799381SHong Zhang ierr = VecResetArray(w2);CHKERRQ(ierr); 99b7799381SHong Zhang dy_i += N; /* points to dy+i*N */ 10087d34202SHong Zhang } 101b582cc96SHong Zhang 102b7799381SHong Zhang /* Loop over row blocks, putting dy/dx into Jacobian matrix */ 103b582cc96SHong Zhang for (l=0; l<coloring->nrows[k]; l++) { 1049a09c712SHong Zhang row = bs*Jentry[nz].row; 1059a09c712SHong Zhang col = bs*Jentry[nz].col; 1069a09c712SHong Zhang valaddr = Jentry[nz].valaddr; 1079a09c712SHong Zhang nz++; 10826cb46bfSHong Zhang spidx = 0; 109b7799381SHong Zhang dy_i = dy; 11026cb46bfSHong Zhang for (i=0; i<bs; i++) { /* column of the block */ 11126cb46bfSHong Zhang for (j=0; j<bs; j++) { /* row of the block */ 1129a09c712SHong Zhang valaddr[spidx++] = dy_i[row+j]/vscale_array[col+i]; 113b582cc96SHong Zhang } 114b7799381SHong Zhang dy_i += N; /* points to dy+i*N */ 11526cb46bfSHong Zhang } 11626cb46bfSHong Zhang } 117b582cc96SHong Zhang } /* endof for each color */ 118b582cc96SHong Zhang ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 119b7799381SHong Zhang 120b582cc96SHong Zhang coloring->currentcolor = -1; 121b582cc96SHong Zhang PetscFunctionReturn(0); 122b582cc96SHong Zhang } 123b582cc96SHong Zhang 124525d23c0SHong Zhang /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */ 125525d23c0SHong Zhang #undef __FUNCT__ 126525d23c0SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqAIJ" 127525d23c0SHong Zhang PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 128525d23c0SHong Zhang { 129525d23c0SHong Zhang PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 130525d23c0SHong Zhang PetscErrorCode ierr; 1310944192fSHong Zhang PetscInt k,l,row,col,N; 132525d23c0SHong Zhang PetscScalar dx,*y,*xx,*w3_array; 133525d23c0SHong Zhang PetscScalar *vscale_array; 134525d23c0SHong Zhang PetscReal epsilon=coloring->error_rel,umin = coloring->umin,unorm; 135525d23c0SHong Zhang Vec w1=coloring->w1,w2=coloring->w2,w3; 136525d23c0SHong Zhang void *fctx=coloring->fctx; 1370944192fSHong Zhang PetscBool flg=PETSC_FALSE; 138525d23c0SHong Zhang const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 1399a09c712SHong Zhang PetscInt **columns=coloring->columns,ncolumns_k,nrows_k,nz; 1409a09c712SHong Zhang MatEntry *Jentry=coloring->matentry; 141525d23c0SHong Zhang 142525d23c0SHong Zhang PetscFunctionBegin; 143525d23c0SHong Zhang ierr = MatSetUnfactored(J);CHKERRQ(ierr); 144525d23c0SHong Zhang ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 145525d23c0SHong Zhang if (flg) { 146525d23c0SHong Zhang ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 147525d23c0SHong Zhang } else { 148525d23c0SHong Zhang PetscBool assembled; 149525d23c0SHong Zhang ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 150525d23c0SHong Zhang if (assembled) { 151525d23c0SHong Zhang ierr = MatZeroEntries(J);CHKERRQ(ierr); 152525d23c0SHong Zhang } 153525d23c0SHong Zhang } 154525d23c0SHong Zhang if (!coloring->vscale) { 155525d23c0SHong Zhang ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 156525d23c0SHong Zhang } 157525d23c0SHong Zhang 158525d23c0SHong Zhang /* Set w1 = F(x1) */ 159525d23c0SHong Zhang if (!coloring->fset) { 160525d23c0SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 161525d23c0SHong Zhang ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 162525d23c0SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 163525d23c0SHong Zhang } else { 164525d23c0SHong Zhang coloring->fset = PETSC_FALSE; 165525d23c0SHong Zhang } 166525d23c0SHong Zhang 167525d23c0SHong Zhang if (!coloring->w3) { 168525d23c0SHong Zhang ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 169525d23c0SHong Zhang ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 170525d23c0SHong Zhang } 171525d23c0SHong Zhang w3 = coloring->w3; 172525d23c0SHong Zhang 1730944192fSHong Zhang /* Compute scale factors: vscale = dx */ 1740944192fSHong Zhang if (coloring->htype[0] == 'w') { 1750944192fSHong Zhang /* tacky test; need to make systematic if we add other approaches to computing h*/ 1760944192fSHong Zhang ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 1770944192fSHong Zhang dx = (1.0 + unorm)*epsilon; 1780944192fSHong Zhang ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr); 1790944192fSHong Zhang } else { 180525d23c0SHong Zhang ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 181525d23c0SHong Zhang ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 182525d23c0SHong Zhang ierr = VecGetSize(x1,&N);CHKERRQ(ierr); 183525d23c0SHong Zhang for (col=0; col<N; col++) { 184525d23c0SHong Zhang dx = xx[col]; 1850944192fSHong Zhang if (dx == 0.0) dx = 1.0; 186525d23c0SHong Zhang if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 187525d23c0SHong Zhang else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 188525d23c0SHong Zhang dx *= epsilon; 1890944192fSHong Zhang vscale_array[col] = dx; 190525d23c0SHong Zhang } 1910944192fSHong Zhang ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 1920944192fSHong Zhang ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 1930944192fSHong Zhang } 194525d23c0SHong Zhang 195525d23c0SHong Zhang nz = 0; 1960944192fSHong Zhang ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 197525d23c0SHong Zhang for (k=0; k<ncolors; k++) { /* loop over colors */ 198525d23c0SHong Zhang coloring->currentcolor = k; 199525d23c0SHong Zhang 200525d23c0SHong Zhang /* 201525d23c0SHong Zhang Loop over each column associated with color 202525d23c0SHong Zhang adding the perturbation to the vector w3 = x1 + dx. 203525d23c0SHong Zhang */ 204525d23c0SHong Zhang ierr = VecCopy(x1,w3);CHKERRQ(ierr); 205525d23c0SHong Zhang ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 206525d23c0SHong Zhang ncolumns_k = ncolumns[k]; 207525d23c0SHong Zhang for (l=0; l<ncolumns_k; l++) { /* loop over columns */ 208525d23c0SHong Zhang col = columns[k][l]; 209525d23c0SHong Zhang w3_array[col] += vscale_array[col]; 210525d23c0SHong Zhang } 211525d23c0SHong Zhang ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 212525d23c0SHong Zhang 213525d23c0SHong Zhang /* 214525d23c0SHong Zhang Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 215525d23c0SHong Zhang w2 = F(x1 + dx) - F(x1) 216525d23c0SHong Zhang */ 217525d23c0SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 218525d23c0SHong Zhang ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 219525d23c0SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 220525d23c0SHong Zhang ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 221525d23c0SHong Zhang 222525d23c0SHong Zhang /* 223525d23c0SHong Zhang Loop over rows of vector, putting w2/dx into Jacobian matrix 224525d23c0SHong Zhang */ 225525d23c0SHong Zhang ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 226525d23c0SHong Zhang nrows_k = nrows[k]; 2279a09c712SHong Zhang nrows_k = nrows[k]; 228525d23c0SHong Zhang for (l=0; l<nrows_k; l++) { /* loop over rows */ 2299a09c712SHong Zhang row = Jentry[nz].row; 2309a09c712SHong Zhang col = Jentry[nz].col; 2319a09c712SHong Zhang *(Jentry[nz++].valaddr) = y[row]/vscale_array[col]; 232525d23c0SHong Zhang } 233525d23c0SHong Zhang ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 234525d23c0SHong Zhang } /* endof for each color */ 235525d23c0SHong Zhang ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 236525d23c0SHong Zhang 237525d23c0SHong Zhang coloring->currentcolor = -1; 238525d23c0SHong Zhang PetscFunctionReturn(0); 239525d23c0SHong Zhang } 240639f9d9dSBarry Smith 24179c1e64dSHong Zhang #undef __FUNCT__ 242*4b2e90caSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_new" 24379c1e64dSHong Zhang /* 24479c1e64dSHong Zhang This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 24579c1e64dSHong Zhang */ 246*4b2e90caSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_new(Mat mat,ISColoring iscoloring,MatFDColoring c) 24779c1e64dSHong Zhang { 24879c1e64dSHong Zhang PetscErrorCode ierr; 24979c1e64dSHong Zhang PetscInt i,n,nrows,N,j,k,m,ncols,col; 250*4b2e90caSHong Zhang const PetscInt *is,*row,*ci,*cj; 251*4b2e90caSHong Zhang PetscInt nis=iscoloring->n,*rowhit,bs,bs2,*spidx,nz; 25279c1e64dSHong Zhang IS *isa; 253525d23c0SHong Zhang PetscBool isBAIJ; 254*4b2e90caSHong Zhang Mat_SeqAIJ *spA = (Mat_SeqAIJ*)mat->data; 255*4b2e90caSHong Zhang PetscScalar *A_val=spA->a; 2569a09c712SHong Zhang PetscScalar **valaddrhit; 2579a09c712SHong Zhang MatEntry *Jentry; 25879c1e64dSHong Zhang 25979c1e64dSHong Zhang PetscFunctionBegin; 26079c1e64dSHong Zhang ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 26152011a10SHong Zhang 262476e0d0aSHong Zhang /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 263476e0d0aSHong Zhang SeqBAIJ calls this routine, thus check it */ 26479c1e64dSHong Zhang ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 265525d23c0SHong Zhang ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 266525d23c0SHong Zhang if (!isBAIJ) { 26752011a10SHong Zhang bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 26879c1e64dSHong Zhang } 26979c1e64dSHong Zhang N = mat->cmap->N/bs; 27079c1e64dSHong Zhang c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 27179c1e64dSHong Zhang c->N = mat->cmap->N/bs; 27279c1e64dSHong Zhang c->m = mat->rmap->N/bs; 27379c1e64dSHong Zhang c->rstart = 0; 27479c1e64dSHong Zhang 27579c1e64dSHong Zhang c->ncolors = nis; 27679c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 27779c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 27879c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 279*4b2e90caSHong Zhang ierr = PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));CHKERRQ(ierr); 2809a09c712SHong Zhang 281*4b2e90caSHong Zhang ierr = PetscMalloc(spA->nz*sizeof(MatEntry),&Jentry);CHKERRQ(ierr); 282*4b2e90caSHong Zhang ierr = PetscLogObjectMemory((PetscObject)c,spA->nz*sizeof(MatEntry));CHKERRQ(ierr); 2839a09c712SHong Zhang c->matentry = Jentry; 28479c1e64dSHong Zhang 285525d23c0SHong Zhang if (isBAIJ) { 286525d23c0SHong Zhang ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 287525d23c0SHong Zhang } else { 2886378f32dSHong Zhang ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 289525d23c0SHong Zhang } 29079c1e64dSHong Zhang 291*4b2e90caSHong Zhang ierr = PetscMalloc2(c->m,PetscInt,&rowhit,c->m,PetscScalar*,&valaddrhit);CHKERRQ(ierr); 29279c1e64dSHong Zhang ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 29379c1e64dSHong Zhang 29417a7dee1SHong Zhang nz = 0; 29579c1e64dSHong Zhang for (i=0; i<nis; i++) { /* loop over colors */ 29679c1e64dSHong Zhang ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 29779c1e64dSHong Zhang ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 29879c1e64dSHong Zhang 29979c1e64dSHong Zhang c->ncolumns[i] = n; 30079c1e64dSHong Zhang if (n) { 30179c1e64dSHong Zhang ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 302*4b2e90caSHong Zhang ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr); 30379c1e64dSHong Zhang ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 30479c1e64dSHong Zhang } else { 30579c1e64dSHong Zhang c->columns[i] = 0; 30679c1e64dSHong Zhang } 30779c1e64dSHong Zhang 30879c1e64dSHong Zhang /* fast, crude version requires O(N*N) work */ 3099a09c712SHong Zhang bs2 = bs*bs; 310476e0d0aSHong Zhang nrows = 0; 31179c1e64dSHong Zhang for (j=0; j<n; j++) { /* loop over columns */ 31279c1e64dSHong Zhang col = is[j]; 313*4b2e90caSHong Zhang row = cj + ci[col]; 31479c1e64dSHong Zhang m = ci[col+1] - ci[col]; 315*4b2e90caSHong Zhang nrows += m; 31679c1e64dSHong Zhang for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 317*4b2e90caSHong Zhang rowhit[*row] = col + 1; 318*4b2e90caSHong Zhang valaddrhit[*row++] = &A_val[bs2*spidx[ci[col] + k]]; 31979c1e64dSHong Zhang } 32079c1e64dSHong Zhang } 321476e0d0aSHong Zhang c->nrows[i] = nrows; /* total num of rows for this color */ 32279c1e64dSHong Zhang 32379c1e64dSHong Zhang for (j=0; j<N; j++) { /* loop over rows */ 32479c1e64dSHong Zhang if (rowhit[j]) { 3259a09c712SHong Zhang Jentry[nz].row = j; /* local row index */ 3269a09c712SHong Zhang Jentry[nz].col = rowhit[j] - 1; /* local column index */ 3279a09c712SHong Zhang Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 3289a09c712SHong Zhang nz++; 32979c1e64dSHong Zhang rowhit[j] = 0.0; /* zero rowhit for reuse */ 33079c1e64dSHong Zhang } 33179c1e64dSHong Zhang } 33279c1e64dSHong Zhang ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 33379c1e64dSHong Zhang } 334*4b2e90caSHong Zhang if (nz != spA->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz,spA->nz); 33585740eacSHong Zhang 336525d23c0SHong Zhang if (isBAIJ) { 337525d23c0SHong Zhang ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 338525d23c0SHong Zhang } else { 3396378f32dSHong Zhang ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 340525d23c0SHong Zhang } 341*4b2e90caSHong Zhang ierr = PetscFree2(rowhit,valaddrhit);CHKERRQ(ierr); 34279c1e64dSHong Zhang ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 343476e0d0aSHong Zhang 3444737c552SHong Zhang c->ctype = IS_COLORING_GHOSTED; 345b582cc96SHong Zhang if (isBAIJ) { 346b582cc96SHong Zhang mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ; 347b7799381SHong Zhang ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr); 348b582cc96SHong Zhang } else { 34979c1e64dSHong Zhang mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 350b582cc96SHong Zhang } 35152011a10SHong Zhang ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 35279c1e64dSHong Zhang PetscFunctionReturn(0); 35379c1e64dSHong Zhang } 35479c1e64dSHong Zhang 3554a2ae208SSatish Balay #undef __FUNCT__ 3564a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 3573acb8795SBarry Smith /* 3583acb8795SBarry Smith This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 3593acb8795SBarry Smith This is why it has the ugly code with the MatGetBlockSize() 3603acb8795SBarry Smith */ 361dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 362639f9d9dSBarry Smith { 3636849ba73SBarry Smith PetscErrorCode ierr; 3641a83f524SJed Brown PetscInt i,n,nrows,N,j,k,m,ncols,col; 3651a83f524SJed Brown const PetscInt *is,*rows,*ci,*cj; 3663acb8795SBarry Smith PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 367b9617806SBarry Smith IS *isa; 368ace3abfcSBarry Smith PetscBool done,flg = PETSC_FALSE; 369476e0d0aSHong Zhang PetscBool flg1; 37079c1e64dSHong Zhang PetscBool new_impl=PETSC_FALSE; 371639f9d9dSBarry Smith 3723a40ed3dSBarry Smith PetscFunctionBegin; 37379c1e64dSHong Zhang ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 37479c1e64dSHong Zhang if (new_impl){ 375*4b2e90caSHong Zhang ierr = MatFDColoringCreate_SeqAIJ_new(mat,iscoloring,c);CHKERRQ(ierr); 37679c1e64dSHong Zhang PetscFunctionReturn(0); 37779c1e64dSHong Zhang } 378e7e72b3dSBarry Smith if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 379522c5e43SBarry Smith 380b9617806SBarry Smith ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 381476e0d0aSHong Zhang /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 382476e0d0aSHong Zhang SeqBAIJ calls this routine, thus check it */ 383251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 384476e0d0aSHong Zhang if (flg1) { 3853acb8795SBarry Smith ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3863acb8795SBarry Smith } 3873acb8795SBarry Smith 3883acb8795SBarry Smith N = mat->cmap->N/bs; 3893acb8795SBarry Smith c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 3903acb8795SBarry Smith c->N = mat->cmap->N/bs; 3913acb8795SBarry Smith c->m = mat->rmap->N/bs; 392005c665bSBarry Smith c->rstart = 0; 393005c665bSBarry Smith 394639f9d9dSBarry Smith c->ncolors = nis; 39538baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 39638baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 39738baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 39838baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 39938baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 40043a90d84SBarry Smith 4013acb8795SBarry Smith ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 402ce94432eSBarry Smith if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 40370c3da92SBarry Smith 40470c3da92SBarry Smith /* 405639f9d9dSBarry Smith Temporary option to allow for debugging/testing 40670c3da92SBarry Smith */ 4070298fd71SBarry Smith ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 40870c3da92SBarry Smith 40938baddfdSBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 41038baddfdSBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 41170c3da92SBarry Smith 412639f9d9dSBarry Smith for (i=0; i<nis; i++) { 413b9b97703SBarry Smith ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 414639f9d9dSBarry Smith ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 4152205254eSKarl Rupp 416639f9d9dSBarry Smith c->ncolumns[i] = n; 417639f9d9dSBarry Smith if (n) { 41838baddfdSBarry Smith ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 41938baddfdSBarry Smith ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 42070c3da92SBarry Smith } else { 421639f9d9dSBarry Smith c->columns[i] = 0; 42270c3da92SBarry Smith } 42370c3da92SBarry Smith 4243a7fca6bSBarry Smith if (!flg) { /* ------------------------------------------------------------------------------*/ 4253a7fca6bSBarry Smith /* fast, crude version requires O(N*N) work */ 42638baddfdSBarry Smith ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 427639f9d9dSBarry Smith /* loop over columns*/ 428639f9d9dSBarry Smith for (j=0; j<n; j++) { 429639f9d9dSBarry Smith col = is[j]; 430639f9d9dSBarry Smith rows = cj + ci[col]; 431639f9d9dSBarry Smith m = ci[col+1] - ci[col]; 432639f9d9dSBarry Smith /* loop over columns marking them in rowhit */ 433639f9d9dSBarry Smith for (k=0; k<m; k++) { 434639f9d9dSBarry Smith rowhit[*rows++] = col + 1; 43570c3da92SBarry Smith } 43670c3da92SBarry Smith } 437639f9d9dSBarry Smith /* count the number of hits */ 438639f9d9dSBarry Smith nrows = 0; 43970c3da92SBarry Smith for (j=0; j<N; j++) { 440639f9d9dSBarry Smith if (rowhit[j]) nrows++; 441639f9d9dSBarry Smith } 442639f9d9dSBarry Smith c->nrows[i] = nrows; 44338baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 44438baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 445639f9d9dSBarry Smith nrows = 0; 446639f9d9dSBarry Smith for (j=0; j<N; j++) { 447639f9d9dSBarry Smith if (rowhit[j]) { 448639f9d9dSBarry Smith c->rows[i][nrows] = j; 449639f9d9dSBarry Smith c->columnsforrow[i][nrows] = rowhit[j] - 1; 450639f9d9dSBarry Smith nrows++; 45170c3da92SBarry Smith } 45270c3da92SBarry Smith } 453639f9d9dSBarry Smith } else { /*-------------------------------------------------------------------------------*/ 4543a7fca6bSBarry Smith /* slow version, using rowhit as a linked list */ 45538baddfdSBarry Smith PetscInt currentcol,fm,mfm; 456639f9d9dSBarry Smith rowhit[N] = N; 457639f9d9dSBarry Smith nrows = 0; 458639f9d9dSBarry Smith /* loop over columns */ 459639f9d9dSBarry Smith for (j=0; j<n; j++) { 460639f9d9dSBarry Smith col = is[j]; 461639f9d9dSBarry Smith rows = cj + ci[col]; 462639f9d9dSBarry Smith m = ci[col+1] - ci[col]; 463639f9d9dSBarry Smith /* loop over columns marking them in rowhit */ 464639f9d9dSBarry Smith fm = N; /* fm points to first entry in linked list */ 465639f9d9dSBarry Smith for (k=0; k<m; k++) { 466639f9d9dSBarry Smith currentcol = *rows++; 467639f9d9dSBarry Smith /* is it already in the list? */ 468639f9d9dSBarry Smith do { 469639f9d9dSBarry Smith mfm = fm; 470639f9d9dSBarry Smith fm = rowhit[fm]; 471639f9d9dSBarry Smith } while (fm < currentcol); 472639f9d9dSBarry Smith /* not in list so add it */ 473639f9d9dSBarry Smith if (fm != currentcol) { 474639f9d9dSBarry Smith nrows++; 475639f9d9dSBarry Smith columnsforrow[currentcol] = col; 476639f9d9dSBarry Smith /* next three lines insert new entry into linked list */ 477639f9d9dSBarry Smith rowhit[mfm] = currentcol; 478639f9d9dSBarry Smith rowhit[currentcol] = fm; 479639f9d9dSBarry Smith fm = currentcol; 480639f9d9dSBarry Smith /* fm points to present position in list since we know the columns are sorted */ 48171cd77b2SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 482639f9d9dSBarry Smith } 483639f9d9dSBarry Smith } 484639f9d9dSBarry Smith c->nrows[i] = nrows; 48538baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 48638baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 487639f9d9dSBarry Smith /* now store the linked list of rows into c->rows[i] */ 488639f9d9dSBarry Smith nrows = 0; 489639f9d9dSBarry Smith fm = rowhit[N]; 490639f9d9dSBarry Smith do { 491639f9d9dSBarry Smith c->rows[i][nrows] = fm; 492639f9d9dSBarry Smith c->columnsforrow[i][nrows++] = columnsforrow[fm]; 493639f9d9dSBarry Smith fm = rowhit[fm]; 494639f9d9dSBarry Smith } while (fm < N); 495639f9d9dSBarry Smith } /* ---------------------------------------------------------------------------------------*/ 496639f9d9dSBarry Smith ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 497639f9d9dSBarry Smith } 4983acb8795SBarry Smith ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 499639f9d9dSBarry Smith 500606d414cSSatish Balay ierr = PetscFree(rowhit);CHKERRQ(ierr); 501606d414cSSatish Balay ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 502639f9d9dSBarry Smith 50330b34957SBarry Smith /* Optimize by adding the vscale, and scaleforrow[][] fields */ 50430b34957SBarry Smith /* 50530b34957SBarry Smith see the version for MPIAIJ 50630b34957SBarry Smith */ 507ce94432eSBarry Smith ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 50838baddfdSBarry Smith ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 50930b34957SBarry Smith for (k=0; k<c->ncolors; k++) { 51038baddfdSBarry Smith ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 51130b34957SBarry Smith for (l=0; l<c->nrows[k]; l++) { 51230b34957SBarry Smith col = c->columnsforrow[k][l]; 5132205254eSKarl Rupp 51430b34957SBarry Smith c->vscaleforrow[k][l] = col; 51530b34957SBarry Smith } 51630b34957SBarry Smith } 517b9617806SBarry Smith ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 5183a40ed3dSBarry Smith PetscFunctionReturn(0); 51970c3da92SBarry Smith } 52079c1e64dSHong Zhang 521