170c3da92SBarry Smith 2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 3525d23c0SHong Zhang #include <../src/mat/impls/baij/seq/baij.h> 4525d23c0SHong Zhang 5b582cc96SHong Zhang #undef __FUNCT__ 6b582cc96SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqBAIJ" 7b582cc96SHong Zhang PetscErrorCode MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 8b582cc96SHong Zhang { 9b582cc96SHong Zhang PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 10b582cc96SHong Zhang PetscErrorCode ierr; 11*b7799381SHong Zhang PetscInt bs=J->rmap->bs,bs2=bs*bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz; 12*b7799381SHong Zhang PetscScalar dx,*dy_i,*xx,*w3_array,*dy=coloring->dy; 13b582cc96SHong Zhang PetscScalar *vscale_array; 14b582cc96SHong Zhang PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 15b582cc96SHong Zhang Vec w1 = coloring->w1,w2=coloring->w2,w3; 16b582cc96SHong Zhang void *fctx = coloring->fctx; 17b582cc96SHong Zhang PetscBool flg = PETSC_FALSE; 18b582cc96SHong Zhang Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 19d6752224SHong Zhang PetscScalar *ca = csp->a,*ca_l; 20b582cc96SHong Zhang 21b582cc96SHong Zhang PetscFunctionBegin; 22b582cc96SHong Zhang ierr = MatSetUnfactored(J);CHKERRQ(ierr); 23b582cc96SHong Zhang ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 24b582cc96SHong Zhang if (flg) { 25b582cc96SHong Zhang ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 26b582cc96SHong Zhang } else { 27b582cc96SHong Zhang PetscBool assembled; 28b582cc96SHong Zhang ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 29b582cc96SHong Zhang if (assembled) { 30b582cc96SHong Zhang ierr = MatZeroEntries(J);CHKERRQ(ierr); 31b582cc96SHong Zhang } 32b582cc96SHong Zhang } 33b582cc96SHong Zhang if (!coloring->vscale) { 34b582cc96SHong Zhang ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 35b582cc96SHong Zhang } 36b582cc96SHong Zhang 37b582cc96SHong Zhang /* Set w1 = F(x1) */ 38b582cc96SHong Zhang if (!coloring->fset) { 39b582cc96SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 40b582cc96SHong Zhang ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 41b582cc96SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 42b582cc96SHong Zhang } else { 43b582cc96SHong Zhang coloring->fset = PETSC_FALSE; 44b582cc96SHong Zhang } 45b582cc96SHong Zhang 46b582cc96SHong Zhang if (!coloring->w3) { 47b582cc96SHong Zhang ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 48b582cc96SHong Zhang ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 49b582cc96SHong Zhang } 50b582cc96SHong Zhang w3 = coloring->w3; 51b582cc96SHong Zhang 52*b7799381SHong Zhang /* Compute local scale factors: vscale = dx */ 53b582cc96SHong Zhang if (coloring->htype[0] == 'w') { 54*b7799381SHong Zhang /* tacky test; need to make systematic if we add other approaches to computing h*/ 55*b7799381SHong Zhang ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 56*b7799381SHong Zhang dx = (1.0 + unorm)*epsilon; 57*b7799381SHong Zhang ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr); 58b582cc96SHong Zhang } else { 59*b7799381SHong Zhang ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 60*b7799381SHong Zhang ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 61*b7799381SHong Zhang for (col=0; col<N; col++) { 62b582cc96SHong Zhang dx = xx[col]; 63*b7799381SHong Zhang if (dx == 0.0) dx = 1.0; 64b582cc96SHong Zhang if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 65b582cc96SHong Zhang else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 66b582cc96SHong Zhang dx *= epsilon; 67*b7799381SHong Zhang vscale_array[col] = dx; 68b582cc96SHong Zhang } 69*b7799381SHong Zhang ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 70*b7799381SHong Zhang ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 7187d34202SHong Zhang } 7287d34202SHong Zhang 73b582cc96SHong Zhang nz = 0; 74*b7799381SHong Zhang ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 75b582cc96SHong Zhang for (k=0; k<coloring->ncolors; k++) { /* Loop over each color */ 76b582cc96SHong Zhang coloring->currentcolor = k; 7787d34202SHong Zhang 7887d34202SHong Zhang /* Compute w3 = x1 + dx */ 79b582cc96SHong Zhang ierr = VecCopy(x1,w3);CHKERRQ(ierr); 80*b7799381SHong Zhang dy_i = dy; 81*b7799381SHong Zhang for (i=0; i<bs; i++) { /* Loop over a block of columns */ 82b582cc96SHong Zhang ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 83b582cc96SHong Zhang for (l=0; l<coloring->ncolumns[k]; l++) { 84b582cc96SHong Zhang col = i + bs*coloring->columns[k][l]; 85b582cc96SHong Zhang w3_array[col] += vscale_array[col]; 86*b7799381SHong Zhang if (i) { 87*b7799381SHong Zhang w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */ 88*b7799381SHong Zhang } 89b582cc96SHong Zhang } 90b582cc96SHong Zhang ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 91b582cc96SHong Zhang 92*b7799381SHong Zhang /* Evaluate function w2 = F(w3) - F(x1) */ 93b582cc96SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 94*b7799381SHong Zhang ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */ 95*b7799381SHong Zhang ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 96b582cc96SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 97*b7799381SHong Zhang ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 98*b7799381SHong Zhang ierr = VecResetArray(w2);CHKERRQ(ierr); 99*b7799381SHong Zhang dy_i += N; /* points to dy+i*N */ 10087d34202SHong Zhang } 101b582cc96SHong Zhang 102*b7799381SHong Zhang /* Loop over row blocks, putting dy/dx into Jacobian matrix */ 103b582cc96SHong Zhang for (l=0; l<coloring->nrows[k]; l++) { 104*b7799381SHong Zhang row = bs*coloring->rowcolden2sp3[nz++]; 105*b7799381SHong Zhang col = bs*coloring->rowcolden2sp3[nz++]; 106*b7799381SHong Zhang ca_l = ca + bs2*coloring->rowcolden2sp3[nz++]; 10726cb46bfSHong Zhang spidx = 0; 108*b7799381SHong Zhang dy_i = dy; 10926cb46bfSHong Zhang for (i=0; i<bs; i++) { /* column of the block */ 11026cb46bfSHong Zhang for (j=0; j<bs; j++) { /* row of the block */ 111*b7799381SHong Zhang ca_l[spidx++] = dy_i[row+j]/vscale_array[col+i]; 112b582cc96SHong Zhang } 113*b7799381SHong Zhang dy_i += N; /* points to dy+i*N */ 11426cb46bfSHong Zhang } 11526cb46bfSHong Zhang } 116b582cc96SHong Zhang } /* endof for each color */ 117b582cc96SHong Zhang ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 118*b7799381SHong Zhang 119b582cc96SHong Zhang coloring->currentcolor = -1; 120b582cc96SHong Zhang PetscFunctionReturn(0); 121b582cc96SHong Zhang } 122b582cc96SHong Zhang 123525d23c0SHong Zhang /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */ 124525d23c0SHong Zhang /* #define JACOBIANCOLOROPT */ 125525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 126525d23c0SHong Zhang #include <petsctime.h> 127525d23c0SHong Zhang #endif 128525d23c0SHong Zhang #undef __FUNCT__ 129525d23c0SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqAIJ" 130525d23c0SHong Zhang PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 131525d23c0SHong Zhang { 132525d23c0SHong Zhang PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 133525d23c0SHong Zhang PetscErrorCode ierr; 134b582cc96SHong Zhang PetscInt k,l,row,col,N,bs=J->rmap->bs,bs2=bs*bs; 135525d23c0SHong Zhang PetscScalar dx,*y,*xx,*w3_array; 136525d23c0SHong Zhang PetscScalar *vscale_array; 137525d23c0SHong Zhang PetscReal epsilon=coloring->error_rel,umin = coloring->umin,unorm; 138525d23c0SHong Zhang Vec w1=coloring->w1,w2=coloring->w2,w3; 139525d23c0SHong Zhang void *fctx=coloring->fctx; 140b582cc96SHong Zhang PetscBool flg=PETSC_FALSE,isBAIJ=PETSC_FALSE; 141b582cc96SHong Zhang PetscScalar *ca; 142525d23c0SHong Zhang const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 143525d23c0SHong Zhang PetscInt **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx; 144525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 145525d23c0SHong Zhang PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11; 146525d23c0SHong Zhang #endif 147525d23c0SHong Zhang 148525d23c0SHong Zhang PetscFunctionBegin; 149525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 150525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 151525d23c0SHong Zhang #endif 152b582cc96SHong Zhang ierr = PetscObjectTypeCompare((PetscObject)J,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 153b582cc96SHong Zhang if (isBAIJ) { 154b582cc96SHong Zhang Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 155b582cc96SHong Zhang ca = csp->a; 156b582cc96SHong Zhang } else { 157b582cc96SHong Zhang Mat_SeqAIJ *csp=(Mat_SeqAIJ*)J->data; 158b582cc96SHong Zhang ca = csp->a; 159b582cc96SHong Zhang bs = 1; bs2 = 1; 160b582cc96SHong Zhang } 161b582cc96SHong Zhang 162525d23c0SHong Zhang ierr = MatSetUnfactored(J);CHKERRQ(ierr); 163525d23c0SHong Zhang ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 164525d23c0SHong Zhang if (flg) { 165525d23c0SHong Zhang ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 166525d23c0SHong Zhang } else { 167525d23c0SHong Zhang PetscBool assembled; 168525d23c0SHong Zhang ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 169525d23c0SHong Zhang if (assembled) { 170525d23c0SHong Zhang ierr = MatZeroEntries(J);CHKERRQ(ierr); 171525d23c0SHong Zhang } 172525d23c0SHong Zhang } 173525d23c0SHong Zhang 174525d23c0SHong Zhang if (!coloring->vscale) { 175525d23c0SHong Zhang ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 176525d23c0SHong Zhang } 177525d23c0SHong Zhang 178525d23c0SHong Zhang if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 179525d23c0SHong Zhang ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 180525d23c0SHong Zhang } 181525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 182525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 183525d23c0SHong Zhang t_init += t1 - t0; 184525d23c0SHong Zhang #endif 185525d23c0SHong Zhang 186525d23c0SHong Zhang /* Set w1 = F(x1) */ 187525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 188525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 189525d23c0SHong Zhang #endif 190525d23c0SHong Zhang if (!coloring->fset) { 191525d23c0SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 192525d23c0SHong Zhang ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 193525d23c0SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 194525d23c0SHong Zhang } else { 195525d23c0SHong Zhang coloring->fset = PETSC_FALSE; 196525d23c0SHong Zhang } 197525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 198525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 199525d23c0SHong Zhang t_setvals += t1 - t0; 200525d23c0SHong Zhang #endif 201525d23c0SHong Zhang 202525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 203525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 204525d23c0SHong Zhang #endif 205525d23c0SHong Zhang if (!coloring->w3) { 206525d23c0SHong Zhang ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 207525d23c0SHong Zhang ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 208525d23c0SHong Zhang } 209525d23c0SHong Zhang w3 = coloring->w3; 210525d23c0SHong Zhang 211525d23c0SHong Zhang /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */ 212525d23c0SHong Zhang ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 213525d23c0SHong Zhang ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 214525d23c0SHong Zhang ierr = VecGetSize(x1,&N);CHKERRQ(ierr); 215525d23c0SHong Zhang for (col=0; col<N; col++) { 216525d23c0SHong Zhang if (coloring->htype[0] == 'w') { 217525d23c0SHong Zhang dx = 1.0 + unorm; 218525d23c0SHong Zhang } else { 219525d23c0SHong Zhang dx = xx[col]; 220525d23c0SHong Zhang } 221525d23c0SHong Zhang if (dx == (PetscScalar)0.0) dx = 1.0; 222525d23c0SHong Zhang if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 223525d23c0SHong Zhang else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 224525d23c0SHong Zhang dx *= epsilon; 225525d23c0SHong Zhang vscale_array[col] = (PetscScalar)dx; 226525d23c0SHong Zhang } 227525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 228525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 229525d23c0SHong Zhang t_init += t1 - t0; 230525d23c0SHong Zhang #endif 231525d23c0SHong Zhang 232525d23c0SHong Zhang nz = 0; 233525d23c0SHong Zhang for (k=0; k<ncolors; k++) { /* loop over colors */ 234525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 235525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 236525d23c0SHong Zhang #endif 237525d23c0SHong Zhang coloring->currentcolor = k; 238525d23c0SHong Zhang 239525d23c0SHong Zhang /* 240525d23c0SHong Zhang Loop over each column associated with color 241525d23c0SHong Zhang adding the perturbation to the vector w3 = x1 + dx. 242525d23c0SHong Zhang */ 243525d23c0SHong Zhang ierr = VecCopy(x1,w3);CHKERRQ(ierr); 244525d23c0SHong Zhang ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 245525d23c0SHong Zhang ncolumns_k = ncolumns[k]; 246525d23c0SHong Zhang for (l=0; l<ncolumns_k; l++) { /* loop over columns */ 247525d23c0SHong Zhang col = columns[k][l]; 248525d23c0SHong Zhang w3_array[col] += vscale_array[col]; 249525d23c0SHong Zhang } 250525d23c0SHong Zhang ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 251525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 252525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 253525d23c0SHong Zhang t_dx += t1 - t0; 254525d23c0SHong Zhang #endif 255525d23c0SHong Zhang 256525d23c0SHong Zhang /* 257525d23c0SHong Zhang Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 258525d23c0SHong Zhang w2 = F(x1 + dx) - F(x1) 259525d23c0SHong Zhang */ 260525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 261525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 262525d23c0SHong Zhang #endif 263525d23c0SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 264525d23c0SHong Zhang ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 265525d23c0SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 266525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 267525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 268525d23c0SHong Zhang t_func += t1 - t0; 269525d23c0SHong Zhang #endif 270525d23c0SHong Zhang 271525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 272525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 273525d23c0SHong Zhang #endif 274525d23c0SHong Zhang ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 275525d23c0SHong Zhang 276525d23c0SHong Zhang /* 277525d23c0SHong Zhang Loop over rows of vector, putting w2/dx into Jacobian matrix 278525d23c0SHong Zhang */ 279525d23c0SHong Zhang ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 280525d23c0SHong Zhang nrows_k = nrows[k]; 281525d23c0SHong Zhang for (l=0; l<nrows_k; l++) { /* loop over rows */ 282525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 283525d23c0SHong Zhang ierr = PetscTime(&t00);CHKERRQ(ierr); 284525d23c0SHong Zhang #endif 285525d23c0SHong Zhang row = coloring->rowcolden2sp3[nz++]; 286525d23c0SHong Zhang col = coloring->rowcolden2sp3[nz++]; 287525d23c0SHong Zhang spidx = coloring->rowcolden2sp3[nz++]; 288525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 289525d23c0SHong Zhang ierr = PetscTime(&t11);CHKERRQ(ierr); 290525d23c0SHong Zhang t_kl += t11 - t00; 291525d23c0SHong Zhang #endif 292b582cc96SHong Zhang //printf(" row col val 293b582cc96SHong Zhang ca[spidx*bs2] = y[row*bs]/vscale_array[col*bs]; 294b582cc96SHong Zhang //printf(" (%d, %d, %g)\n", row*bs,col*bs,ca[spidx*bs2]); 295525d23c0SHong Zhang } 296525d23c0SHong Zhang ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 297525d23c0SHong Zhang 298525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 299525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 300525d23c0SHong Zhang t_setvals += t1 - t0; 301525d23c0SHong Zhang #endif 302525d23c0SHong Zhang } /* endof for each color */ 303525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 304525d23c0SHong Zhang printf(" FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx); 305525d23c0SHong Zhang printf(" FDColorApply time: kl %g\n",t_kl); 306525d23c0SHong Zhang #endif 307525d23c0SHong Zhang ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 308525d23c0SHong Zhang ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 309525d23c0SHong Zhang 310525d23c0SHong Zhang coloring->currentcolor = -1; 311525d23c0SHong Zhang PetscFunctionReturn(0); 312525d23c0SHong Zhang } 313639f9d9dSBarry Smith 31479c1e64dSHong Zhang #undef __FUNCT__ 31579c1e64dSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 31679c1e64dSHong Zhang /* 31779c1e64dSHong Zhang This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 31879c1e64dSHong Zhang */ 31979c1e64dSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 32079c1e64dSHong Zhang { 32179c1e64dSHong Zhang PetscErrorCode ierr; 32279c1e64dSHong Zhang PetscInt i,n,nrows,N,j,k,m,ncols,col; 32379c1e64dSHong Zhang const PetscInt *is,*rows,*ci,*cj; 32452011a10SHong Zhang PetscInt nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz; 32579c1e64dSHong Zhang IS *isa; 326525d23c0SHong Zhang PetscBool isBAIJ; 32779c1e64dSHong Zhang Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 32879c1e64dSHong Zhang 32979c1e64dSHong Zhang PetscFunctionBegin; 33079c1e64dSHong Zhang ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 33152011a10SHong Zhang 332476e0d0aSHong Zhang /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 333476e0d0aSHong Zhang SeqBAIJ calls this routine, thus check it */ 33479c1e64dSHong Zhang ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 335525d23c0SHong Zhang ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 336525d23c0SHong Zhang if (!isBAIJ) { 33752011a10SHong Zhang bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 33879c1e64dSHong Zhang } 33979c1e64dSHong Zhang N = mat->cmap->N/bs; 34079c1e64dSHong Zhang c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 34179c1e64dSHong Zhang c->N = mat->cmap->N/bs; 34279c1e64dSHong Zhang c->m = mat->rmap->N/bs; 34379c1e64dSHong Zhang c->rstart = 0; 34479c1e64dSHong Zhang 34579c1e64dSHong Zhang c->ncolors = nis; 34679c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 34779c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 34879c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 34985740eacSHong Zhang ierr = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr); 35079c1e64dSHong Zhang 351525d23c0SHong Zhang if (isBAIJ) { 352525d23c0SHong Zhang ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 353525d23c0SHong Zhang } else { 3546378f32dSHong Zhang ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 355525d23c0SHong Zhang } 35679c1e64dSHong Zhang 357476e0d0aSHong Zhang ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr); 35879c1e64dSHong Zhang ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 35979c1e64dSHong Zhang 36017a7dee1SHong Zhang nz = 0; 36179c1e64dSHong Zhang for (i=0; i<nis; i++) { /* loop over colors */ 36279c1e64dSHong Zhang ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 36379c1e64dSHong Zhang ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 36479c1e64dSHong Zhang 36579c1e64dSHong Zhang c->ncolumns[i] = n; 36679c1e64dSHong Zhang if (n) { 36779c1e64dSHong Zhang ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 36879c1e64dSHong Zhang ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 36979c1e64dSHong Zhang } else { 37079c1e64dSHong Zhang c->columns[i] = 0; 37179c1e64dSHong Zhang } 37279c1e64dSHong Zhang 37379c1e64dSHong Zhang /* fast, crude version requires O(N*N) work */ 374476e0d0aSHong Zhang nrows = 0; 37579c1e64dSHong Zhang for (j=0; j<n; j++) { /* loop over columns */ 37679c1e64dSHong Zhang col = is[j]; 37779c1e64dSHong Zhang rows = cj + ci[col]; 37879c1e64dSHong Zhang m = ci[col+1] - ci[col]; 37979c1e64dSHong Zhang for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 38079c1e64dSHong Zhang rowhit[*rows] = col + 1; 381476e0d0aSHong Zhang spidxhit[*rows++] = spidx[ci[col] + k]; 382476e0d0aSHong Zhang nrows++; 38379c1e64dSHong Zhang } 38479c1e64dSHong Zhang } 385476e0d0aSHong Zhang c->nrows[i] = nrows; /* total num of rows for this color */ 38679c1e64dSHong Zhang 38779c1e64dSHong Zhang nrows = 0; 38879c1e64dSHong Zhang for (j=0; j<N; j++) { /* loop over rows */ 38979c1e64dSHong Zhang if (rowhit[j]) { 39017a7dee1SHong Zhang c->rowcolden2sp3[nz++] = j; /* row index */ 39117a7dee1SHong Zhang c->rowcolden2sp3[nz++] = rowhit[j] - 1; /* column index */ 39217a7dee1SHong Zhang c->rowcolden2sp3[nz++] = spidxhit[j]; /* den2sp index */ 39379c1e64dSHong Zhang rowhit[j] = 0.0; /* zero rowhit for reuse */ 39479c1e64dSHong Zhang } 39579c1e64dSHong Zhang } 39679c1e64dSHong Zhang ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 39779c1e64dSHong Zhang } 39817a7dee1SHong Zhang if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz); 39985740eacSHong Zhang 400525d23c0SHong Zhang if (isBAIJ) { 401525d23c0SHong Zhang ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 402525d23c0SHong Zhang } else { 4036378f32dSHong Zhang ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 404525d23c0SHong Zhang } 405476e0d0aSHong Zhang ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr); 40679c1e64dSHong Zhang ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 407476e0d0aSHong Zhang 4084737c552SHong Zhang c->ctype = IS_COLORING_GHOSTED; 409b582cc96SHong Zhang if (isBAIJ) { 410b582cc96SHong Zhang mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ; 411*b7799381SHong Zhang ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr); 412b582cc96SHong Zhang } else { 41379c1e64dSHong Zhang mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 414b582cc96SHong Zhang } 41552011a10SHong Zhang ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 41679c1e64dSHong Zhang PetscFunctionReturn(0); 41779c1e64dSHong Zhang } 41879c1e64dSHong Zhang 4194a2ae208SSatish Balay #undef __FUNCT__ 4204a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 4213acb8795SBarry Smith /* 4223acb8795SBarry Smith This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 4233acb8795SBarry Smith This is why it has the ugly code with the MatGetBlockSize() 4243acb8795SBarry Smith */ 425dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 426639f9d9dSBarry Smith { 4276849ba73SBarry Smith PetscErrorCode ierr; 4281a83f524SJed Brown PetscInt i,n,nrows,N,j,k,m,ncols,col; 4291a83f524SJed Brown const PetscInt *is,*rows,*ci,*cj; 4303acb8795SBarry Smith PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 431b9617806SBarry Smith IS *isa; 432ace3abfcSBarry Smith PetscBool done,flg = PETSC_FALSE; 433476e0d0aSHong Zhang PetscBool flg1; 43479c1e64dSHong Zhang PetscBool new_impl=PETSC_FALSE; 435639f9d9dSBarry Smith 4363a40ed3dSBarry Smith PetscFunctionBegin; 43779c1e64dSHong Zhang ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 43879c1e64dSHong Zhang if (new_impl){ 43979c1e64dSHong Zhang ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 44079c1e64dSHong Zhang PetscFunctionReturn(0); 44179c1e64dSHong Zhang } 442e7e72b3dSBarry Smith if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 443522c5e43SBarry Smith 444b9617806SBarry Smith ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 445476e0d0aSHong Zhang /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 446476e0d0aSHong Zhang SeqBAIJ calls this routine, thus check it */ 447251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 448476e0d0aSHong Zhang if (flg1) { 4493acb8795SBarry Smith ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 4503acb8795SBarry Smith } 4513acb8795SBarry Smith 4523acb8795SBarry Smith N = mat->cmap->N/bs; 4533acb8795SBarry Smith c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 4543acb8795SBarry Smith c->N = mat->cmap->N/bs; 4553acb8795SBarry Smith c->m = mat->rmap->N/bs; 456005c665bSBarry Smith c->rstart = 0; 457005c665bSBarry Smith 458639f9d9dSBarry Smith c->ncolors = nis; 45938baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 46038baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 46138baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 46238baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 46338baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 46443a90d84SBarry Smith 4653acb8795SBarry Smith ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 466ce94432eSBarry Smith if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 46770c3da92SBarry Smith 46870c3da92SBarry Smith /* 469639f9d9dSBarry Smith Temporary option to allow for debugging/testing 47070c3da92SBarry Smith */ 4710298fd71SBarry Smith ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 47270c3da92SBarry Smith 47338baddfdSBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 47438baddfdSBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 47570c3da92SBarry Smith 476639f9d9dSBarry Smith for (i=0; i<nis; i++) { 477b9b97703SBarry Smith ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 478639f9d9dSBarry Smith ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 4792205254eSKarl Rupp 480639f9d9dSBarry Smith c->ncolumns[i] = n; 481639f9d9dSBarry Smith if (n) { 48238baddfdSBarry Smith ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 48338baddfdSBarry Smith ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 48470c3da92SBarry Smith } else { 485639f9d9dSBarry Smith c->columns[i] = 0; 48670c3da92SBarry Smith } 48770c3da92SBarry Smith 4883a7fca6bSBarry Smith if (!flg) { /* ------------------------------------------------------------------------------*/ 4893a7fca6bSBarry Smith /* fast, crude version requires O(N*N) work */ 49038baddfdSBarry Smith ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 491639f9d9dSBarry Smith /* loop over columns*/ 492639f9d9dSBarry Smith for (j=0; j<n; j++) { 493639f9d9dSBarry Smith col = is[j]; 494639f9d9dSBarry Smith rows = cj + ci[col]; 495639f9d9dSBarry Smith m = ci[col+1] - ci[col]; 496639f9d9dSBarry Smith /* loop over columns marking them in rowhit */ 497639f9d9dSBarry Smith for (k=0; k<m; k++) { 498639f9d9dSBarry Smith rowhit[*rows++] = col + 1; 49970c3da92SBarry Smith } 50070c3da92SBarry Smith } 501639f9d9dSBarry Smith /* count the number of hits */ 502639f9d9dSBarry Smith nrows = 0; 50370c3da92SBarry Smith for (j=0; j<N; j++) { 504639f9d9dSBarry Smith if (rowhit[j]) nrows++; 505639f9d9dSBarry Smith } 506639f9d9dSBarry Smith c->nrows[i] = nrows; 50738baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 50838baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 509639f9d9dSBarry Smith nrows = 0; 510639f9d9dSBarry Smith for (j=0; j<N; j++) { 511639f9d9dSBarry Smith if (rowhit[j]) { 512639f9d9dSBarry Smith c->rows[i][nrows] = j; 513639f9d9dSBarry Smith c->columnsforrow[i][nrows] = rowhit[j] - 1; 514639f9d9dSBarry Smith nrows++; 51570c3da92SBarry Smith } 51670c3da92SBarry Smith } 517639f9d9dSBarry Smith } else { /*-------------------------------------------------------------------------------*/ 5183a7fca6bSBarry Smith /* slow version, using rowhit as a linked list */ 51938baddfdSBarry Smith PetscInt currentcol,fm,mfm; 520639f9d9dSBarry Smith rowhit[N] = N; 521639f9d9dSBarry Smith nrows = 0; 522639f9d9dSBarry Smith /* loop over columns */ 523639f9d9dSBarry Smith for (j=0; j<n; j++) { 524639f9d9dSBarry Smith col = is[j]; 525639f9d9dSBarry Smith rows = cj + ci[col]; 526639f9d9dSBarry Smith m = ci[col+1] - ci[col]; 527639f9d9dSBarry Smith /* loop over columns marking them in rowhit */ 528639f9d9dSBarry Smith fm = N; /* fm points to first entry in linked list */ 529639f9d9dSBarry Smith for (k=0; k<m; k++) { 530639f9d9dSBarry Smith currentcol = *rows++; 531639f9d9dSBarry Smith /* is it already in the list? */ 532639f9d9dSBarry Smith do { 533639f9d9dSBarry Smith mfm = fm; 534639f9d9dSBarry Smith fm = rowhit[fm]; 535639f9d9dSBarry Smith } while (fm < currentcol); 536639f9d9dSBarry Smith /* not in list so add it */ 537639f9d9dSBarry Smith if (fm != currentcol) { 538639f9d9dSBarry Smith nrows++; 539639f9d9dSBarry Smith columnsforrow[currentcol] = col; 540639f9d9dSBarry Smith /* next three lines insert new entry into linked list */ 541639f9d9dSBarry Smith rowhit[mfm] = currentcol; 542639f9d9dSBarry Smith rowhit[currentcol] = fm; 543639f9d9dSBarry Smith fm = currentcol; 544639f9d9dSBarry Smith /* fm points to present position in list since we know the columns are sorted */ 54571cd77b2SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 546639f9d9dSBarry Smith } 547639f9d9dSBarry Smith } 548639f9d9dSBarry Smith c->nrows[i] = nrows; 54938baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 55038baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 551639f9d9dSBarry Smith /* now store the linked list of rows into c->rows[i] */ 552639f9d9dSBarry Smith nrows = 0; 553639f9d9dSBarry Smith fm = rowhit[N]; 554639f9d9dSBarry Smith do { 555639f9d9dSBarry Smith c->rows[i][nrows] = fm; 556639f9d9dSBarry Smith c->columnsforrow[i][nrows++] = columnsforrow[fm]; 557639f9d9dSBarry Smith fm = rowhit[fm]; 558639f9d9dSBarry Smith } while (fm < N); 559639f9d9dSBarry Smith } /* ---------------------------------------------------------------------------------------*/ 560639f9d9dSBarry Smith ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 561639f9d9dSBarry Smith } 5623acb8795SBarry Smith ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 563639f9d9dSBarry Smith 564606d414cSSatish Balay ierr = PetscFree(rowhit);CHKERRQ(ierr); 565606d414cSSatish Balay ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 566639f9d9dSBarry Smith 56730b34957SBarry Smith /* Optimize by adding the vscale, and scaleforrow[][] fields */ 56830b34957SBarry Smith /* 56930b34957SBarry Smith see the version for MPIAIJ 57030b34957SBarry Smith */ 571ce94432eSBarry Smith ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 57238baddfdSBarry Smith ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 57330b34957SBarry Smith for (k=0; k<c->ncolors; k++) { 57438baddfdSBarry Smith ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 57530b34957SBarry Smith for (l=0; l<c->nrows[k]; l++) { 57630b34957SBarry Smith col = c->columnsforrow[k][l]; 5772205254eSKarl Rupp 57830b34957SBarry Smith c->vscaleforrow[k][l] = col; 57930b34957SBarry Smith } 58030b34957SBarry Smith } 581b9617806SBarry Smith ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 5823a40ed3dSBarry Smith PetscFunctionReturn(0); 58370c3da92SBarry Smith } 58479c1e64dSHong Zhang 585