170c3da92SBarry Smith 2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 3525d23c0SHong Zhang #include <../src/mat/impls/baij/seq/baij.h> 4525d23c0SHong Zhang 5b582cc96SHong Zhang #undef __FUNCT__ 6b582cc96SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqBAIJ" 7b582cc96SHong Zhang PetscErrorCode MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 8b582cc96SHong Zhang { 9b582cc96SHong Zhang PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 10b582cc96SHong Zhang PetscErrorCode ierr; 11d6752224SHong Zhang PetscInt bs=J->rmap->bs,bs2=bs*bs,i,j,k,l,row,col,N; 12b582cc96SHong Zhang PetscScalar dx,*y,*xx,*w3_array; 13b582cc96SHong Zhang PetscScalar *vscale_array; 14b582cc96SHong Zhang PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 15b582cc96SHong Zhang Vec w1 = coloring->w1,w2=coloring->w2,w3; 16b582cc96SHong Zhang void *fctx = coloring->fctx; 17b582cc96SHong Zhang PetscBool flg = PETSC_FALSE; 18b582cc96SHong Zhang Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 19d6752224SHong Zhang PetscScalar *ca = csp->a,*ca_l; 20*26cb46bfSHong Zhang PetscInt brow,bcol,bspidx,spidx,nz; 21b582cc96SHong Zhang 22b582cc96SHong Zhang PetscFunctionBegin; 23b582cc96SHong Zhang ierr = MatSetUnfactored(J);CHKERRQ(ierr); 24b582cc96SHong Zhang ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 25b582cc96SHong Zhang if (flg) { 26b582cc96SHong Zhang ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 27b582cc96SHong Zhang } else { 28b582cc96SHong Zhang PetscBool assembled; 29b582cc96SHong Zhang ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 30b582cc96SHong Zhang if (assembled) { 31b582cc96SHong Zhang ierr = MatZeroEntries(J);CHKERRQ(ierr); 32b582cc96SHong Zhang } 33b582cc96SHong Zhang } 34b582cc96SHong Zhang 35b582cc96SHong Zhang if (!coloring->vscale) { 36b582cc96SHong Zhang ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 37b582cc96SHong Zhang } 38b582cc96SHong Zhang 39b582cc96SHong Zhang if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 40b582cc96SHong Zhang ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 41b582cc96SHong Zhang } 42b582cc96SHong Zhang 43b582cc96SHong Zhang /* Set w1 = F(x1) */ 44b582cc96SHong Zhang if (!coloring->fset) { 45b582cc96SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 46b582cc96SHong Zhang ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 47b582cc96SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 48b582cc96SHong Zhang } else { 49b582cc96SHong Zhang coloring->fset = PETSC_FALSE; 50b582cc96SHong Zhang } 51b582cc96SHong Zhang 52b582cc96SHong Zhang if (!coloring->w3) { 53b582cc96SHong Zhang ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 54b582cc96SHong Zhang ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 55b582cc96SHong Zhang } 56b582cc96SHong Zhang w3 = coloring->w3; 57b582cc96SHong Zhang 58b582cc96SHong Zhang /* Compute all the local scale factors: vscale = 1./dx = 1./(epsilon*xx) */ 59b582cc96SHong Zhang ierr = VecGetLocalSize(x1,&N);CHKERRQ(ierr); 60b582cc96SHong Zhang ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 61b582cc96SHong Zhang ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 62b582cc96SHong Zhang 63b582cc96SHong Zhang /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 64b582cc96SHong Zhang for (col=0; col<N; col++) { 65b582cc96SHong Zhang if (coloring->htype[0] == 'w') { 66b582cc96SHong Zhang dx = 1.0 + unorm; 67b582cc96SHong Zhang } else { 68b582cc96SHong Zhang dx = xx[col]; 69b582cc96SHong Zhang } 70b582cc96SHong Zhang if (dx == (PetscScalar)0.0) dx = 1.0; 71b582cc96SHong Zhang if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 72b582cc96SHong Zhang else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 73b582cc96SHong Zhang dx *= epsilon; 74b582cc96SHong Zhang vscale_array[col] = (PetscScalar)dx; 75b582cc96SHong Zhang } 76b582cc96SHong Zhang 7787d34202SHong Zhang /* Create bs vectors w2s and w3s */ 7887d34202SHong Zhang Vec w2s[bs]; 7987d34202SHong Zhang for (i=0; i<bs; i++) { 8087d34202SHong Zhang ierr = VecDuplicate(w2,&w2s[i]);CHKERRQ(ierr); 8187d34202SHong Zhang } 8287d34202SHong Zhang 83b582cc96SHong Zhang nz = 0; 84b582cc96SHong Zhang for (k=0; k<coloring->ncolors; k++) { /* Loop over each color */ 85b582cc96SHong Zhang coloring->currentcolor = k; 8687d34202SHong Zhang 8787d34202SHong Zhang /* Compute w3 = x1 + dx */ 88b582cc96SHong Zhang for (i=0; i<bs; i++) { 89b582cc96SHong Zhang ierr = VecCopy(x1,w3);CHKERRQ(ierr); 90b582cc96SHong Zhang ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 91b582cc96SHong Zhang for (l=0; l<coloring->ncolumns[k]; l++) { 92b582cc96SHong Zhang col = i + bs*coloring->columns[k][l]; 93b582cc96SHong Zhang w3_array[col] += vscale_array[col]; 94b582cc96SHong Zhang } 95b582cc96SHong Zhang ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 96b582cc96SHong Zhang 9787d34202SHong Zhang /* Evaluate function w2s = F(w3) - F(x1) */ 98b582cc96SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 9987d34202SHong Zhang ierr = (*f)(sctx,w3,w2s[i],fctx);CHKERRQ(ierr); 100b582cc96SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 10187d34202SHong Zhang ierr = VecAXPY(w2s[i],-1.0,w1);CHKERRQ(ierr); 10287d34202SHong Zhang } 103b582cc96SHong Zhang 104*26cb46bfSHong Zhang /* Loop over rows of vector, putting results into Jacobian matrix */ 105b582cc96SHong Zhang for (l=0; l<coloring->nrows[k]; l++) { 106b582cc96SHong Zhang brow = coloring->rowcolden2sp3[nz++]; 107b582cc96SHong Zhang bcol = coloring->rowcolden2sp3[nz++]; 108b582cc96SHong Zhang bspidx = coloring->rowcolden2sp3[nz++]; 109*26cb46bfSHong Zhang ca_l = ca + bs2*bspidx; 110*26cb46bfSHong Zhang spidx = 0; 111*26cb46bfSHong Zhang for (i=0; i<bs; i++) { /* column of the block */ 112*26cb46bfSHong Zhang ierr = VecGetArray(w2s[i],&y);CHKERRQ(ierr); 113b582cc96SHong Zhang col = bs*bcol + i; 114*26cb46bfSHong Zhang for (j=0; j<bs; j++) { /* row of the block */ 115*26cb46bfSHong Zhang row = bs*brow + j; 116*26cb46bfSHong Zhang ca_l[spidx++] = y[row]/vscale_array[col]; 117b582cc96SHong Zhang } 11887d34202SHong Zhang ierr = VecRestoreArray(w2s[i],&y);CHKERRQ(ierr); 119*26cb46bfSHong Zhang } 120*26cb46bfSHong Zhang } 121b582cc96SHong Zhang } /* endof for each color */ 122b582cc96SHong Zhang ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 123b582cc96SHong Zhang ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 12487d34202SHong Zhang for (i=0; i<bs; i++) { 12587d34202SHong Zhang ierr = VecDestroy(&w2s[i]);CHKERRQ(ierr); 12687d34202SHong Zhang } 127b582cc96SHong Zhang coloring->currentcolor = -1; 128b582cc96SHong Zhang PetscFunctionReturn(0); 129b582cc96SHong Zhang } 130b582cc96SHong Zhang 131525d23c0SHong Zhang /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */ 132525d23c0SHong Zhang /* #define JACOBIANCOLOROPT */ 133525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 134525d23c0SHong Zhang #include <petsctime.h> 135525d23c0SHong Zhang #endif 136525d23c0SHong Zhang #undef __FUNCT__ 137525d23c0SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqAIJ" 138525d23c0SHong Zhang PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 139525d23c0SHong Zhang { 140525d23c0SHong Zhang PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 141525d23c0SHong Zhang PetscErrorCode ierr; 142b582cc96SHong Zhang PetscInt k,l,row,col,N,bs=J->rmap->bs,bs2=bs*bs; 143525d23c0SHong Zhang PetscScalar dx,*y,*xx,*w3_array; 144525d23c0SHong Zhang PetscScalar *vscale_array; 145525d23c0SHong Zhang PetscReal epsilon=coloring->error_rel,umin = coloring->umin,unorm; 146525d23c0SHong Zhang Vec w1=coloring->w1,w2=coloring->w2,w3; 147525d23c0SHong Zhang void *fctx=coloring->fctx; 148b582cc96SHong Zhang PetscBool flg=PETSC_FALSE,isBAIJ=PETSC_FALSE; 149b582cc96SHong Zhang PetscScalar *ca; 150525d23c0SHong Zhang const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 151525d23c0SHong Zhang PetscInt **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx; 152525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 153525d23c0SHong Zhang PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11; 154525d23c0SHong Zhang #endif 155525d23c0SHong Zhang 156525d23c0SHong Zhang PetscFunctionBegin; 157525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 158525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 159525d23c0SHong Zhang #endif 160b582cc96SHong Zhang ierr = PetscObjectTypeCompare((PetscObject)J,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 161b582cc96SHong Zhang if (isBAIJ) { 162b582cc96SHong Zhang Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 163b582cc96SHong Zhang ca = csp->a; 164b582cc96SHong Zhang } else { 165b582cc96SHong Zhang Mat_SeqAIJ *csp=(Mat_SeqAIJ*)J->data; 166b582cc96SHong Zhang ca = csp->a; 167b582cc96SHong Zhang bs = 1; bs2 = 1; 168b582cc96SHong Zhang } 169b582cc96SHong Zhang 170525d23c0SHong Zhang ierr = MatSetUnfactored(J);CHKERRQ(ierr); 171525d23c0SHong Zhang ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 172525d23c0SHong Zhang if (flg) { 173525d23c0SHong Zhang ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 174525d23c0SHong Zhang } else { 175525d23c0SHong Zhang PetscBool assembled; 176525d23c0SHong Zhang ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 177525d23c0SHong Zhang if (assembled) { 178525d23c0SHong Zhang ierr = MatZeroEntries(J);CHKERRQ(ierr); 179525d23c0SHong Zhang } 180525d23c0SHong Zhang } 181525d23c0SHong Zhang 182525d23c0SHong Zhang if (!coloring->vscale) { 183525d23c0SHong Zhang ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 184525d23c0SHong Zhang } 185525d23c0SHong Zhang 186525d23c0SHong Zhang if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 187525d23c0SHong Zhang ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 188525d23c0SHong Zhang } 189525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 190525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 191525d23c0SHong Zhang t_init += t1 - t0; 192525d23c0SHong Zhang #endif 193525d23c0SHong Zhang 194525d23c0SHong Zhang /* Set w1 = F(x1) */ 195525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 196525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 197525d23c0SHong Zhang #endif 198525d23c0SHong Zhang if (!coloring->fset) { 199525d23c0SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 200525d23c0SHong Zhang ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 201525d23c0SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 202525d23c0SHong Zhang } else { 203525d23c0SHong Zhang coloring->fset = PETSC_FALSE; 204525d23c0SHong Zhang } 205525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 206525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 207525d23c0SHong Zhang t_setvals += t1 - t0; 208525d23c0SHong Zhang #endif 209525d23c0SHong Zhang 210525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 211525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 212525d23c0SHong Zhang #endif 213525d23c0SHong Zhang if (!coloring->w3) { 214525d23c0SHong Zhang ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 215525d23c0SHong Zhang ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 216525d23c0SHong Zhang } 217525d23c0SHong Zhang w3 = coloring->w3; 218525d23c0SHong Zhang 219525d23c0SHong Zhang /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */ 220525d23c0SHong Zhang ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 221525d23c0SHong Zhang ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 222525d23c0SHong Zhang ierr = VecGetSize(x1,&N);CHKERRQ(ierr); 223525d23c0SHong Zhang for (col=0; col<N; col++) { 224525d23c0SHong Zhang if (coloring->htype[0] == 'w') { 225525d23c0SHong Zhang dx = 1.0 + unorm; 226525d23c0SHong Zhang } else { 227525d23c0SHong Zhang dx = xx[col]; 228525d23c0SHong Zhang } 229525d23c0SHong Zhang if (dx == (PetscScalar)0.0) dx = 1.0; 230525d23c0SHong Zhang if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 231525d23c0SHong Zhang else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 232525d23c0SHong Zhang dx *= epsilon; 233525d23c0SHong Zhang vscale_array[col] = (PetscScalar)dx; 234525d23c0SHong Zhang } 235525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 236525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 237525d23c0SHong Zhang t_init += t1 - t0; 238525d23c0SHong Zhang #endif 239525d23c0SHong Zhang 240525d23c0SHong Zhang nz = 0; 241525d23c0SHong Zhang for (k=0; k<ncolors; k++) { /* loop over colors */ 242525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 243525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 244525d23c0SHong Zhang #endif 245525d23c0SHong Zhang coloring->currentcolor = k; 246525d23c0SHong Zhang 247525d23c0SHong Zhang /* 248525d23c0SHong Zhang Loop over each column associated with color 249525d23c0SHong Zhang adding the perturbation to the vector w3 = x1 + dx. 250525d23c0SHong Zhang */ 251525d23c0SHong Zhang ierr = VecCopy(x1,w3);CHKERRQ(ierr); 252525d23c0SHong Zhang ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 253525d23c0SHong Zhang ncolumns_k = ncolumns[k]; 254525d23c0SHong Zhang for (l=0; l<ncolumns_k; l++) { /* loop over columns */ 255525d23c0SHong Zhang col = columns[k][l]; 256525d23c0SHong Zhang w3_array[col] += vscale_array[col]; 257525d23c0SHong Zhang } 258525d23c0SHong Zhang ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 259525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 260525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 261525d23c0SHong Zhang t_dx += t1 - t0; 262525d23c0SHong Zhang #endif 263525d23c0SHong Zhang 264525d23c0SHong Zhang /* 265525d23c0SHong Zhang Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 266525d23c0SHong Zhang w2 = F(x1 + dx) - F(x1) 267525d23c0SHong Zhang */ 268525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 269525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 270525d23c0SHong Zhang #endif 271525d23c0SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 272525d23c0SHong Zhang ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 273525d23c0SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 274525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 275525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 276525d23c0SHong Zhang t_func += t1 - t0; 277525d23c0SHong Zhang #endif 278525d23c0SHong Zhang 279525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 280525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 281525d23c0SHong Zhang #endif 282525d23c0SHong Zhang ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 283525d23c0SHong Zhang 284525d23c0SHong Zhang /* 285525d23c0SHong Zhang Loop over rows of vector, putting w2/dx into Jacobian matrix 286525d23c0SHong Zhang */ 287525d23c0SHong Zhang ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 288525d23c0SHong Zhang nrows_k = nrows[k]; 289525d23c0SHong Zhang for (l=0; l<nrows_k; l++) { /* loop over rows */ 290525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 291525d23c0SHong Zhang ierr = PetscTime(&t00);CHKERRQ(ierr); 292525d23c0SHong Zhang #endif 293525d23c0SHong Zhang row = coloring->rowcolden2sp3[nz++]; 294525d23c0SHong Zhang col = coloring->rowcolden2sp3[nz++]; 295525d23c0SHong Zhang spidx = coloring->rowcolden2sp3[nz++]; 296525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 297525d23c0SHong Zhang ierr = PetscTime(&t11);CHKERRQ(ierr); 298525d23c0SHong Zhang t_kl += t11 - t00; 299525d23c0SHong Zhang #endif 300b582cc96SHong Zhang //printf(" row col val 301b582cc96SHong Zhang ca[spidx*bs2] = y[row*bs]/vscale_array[col*bs]; 302b582cc96SHong Zhang //printf(" (%d, %d, %g)\n", row*bs,col*bs,ca[spidx*bs2]); 303525d23c0SHong Zhang } 304525d23c0SHong Zhang ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 305525d23c0SHong Zhang 306525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 307525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 308525d23c0SHong Zhang t_setvals += t1 - t0; 309525d23c0SHong Zhang #endif 310525d23c0SHong Zhang } /* endof for each color */ 311525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 312525d23c0SHong Zhang printf(" FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx); 313525d23c0SHong Zhang printf(" FDColorApply time: kl %g\n",t_kl); 314525d23c0SHong Zhang #endif 315525d23c0SHong Zhang ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 316525d23c0SHong Zhang ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 317525d23c0SHong Zhang 318525d23c0SHong Zhang coloring->currentcolor = -1; 319525d23c0SHong Zhang PetscFunctionReturn(0); 320525d23c0SHong Zhang } 321639f9d9dSBarry Smith 32279c1e64dSHong Zhang #undef __FUNCT__ 32379c1e64dSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 32479c1e64dSHong Zhang /* 32579c1e64dSHong Zhang This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 32679c1e64dSHong Zhang */ 32779c1e64dSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 32879c1e64dSHong Zhang { 32979c1e64dSHong Zhang PetscErrorCode ierr; 33079c1e64dSHong Zhang PetscInt i,n,nrows,N,j,k,m,ncols,col; 33179c1e64dSHong Zhang const PetscInt *is,*rows,*ci,*cj; 33252011a10SHong Zhang PetscInt nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz; 33379c1e64dSHong Zhang IS *isa; 334525d23c0SHong Zhang PetscBool isBAIJ; 33579c1e64dSHong Zhang Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 33679c1e64dSHong Zhang 33779c1e64dSHong Zhang PetscFunctionBegin; 33879c1e64dSHong Zhang ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 33952011a10SHong Zhang 340476e0d0aSHong Zhang /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 341476e0d0aSHong Zhang SeqBAIJ calls this routine, thus check it */ 34279c1e64dSHong Zhang ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 343525d23c0SHong Zhang ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 344525d23c0SHong Zhang if (!isBAIJ) { 34552011a10SHong Zhang bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 34679c1e64dSHong Zhang } 34779c1e64dSHong Zhang N = mat->cmap->N/bs; 34879c1e64dSHong Zhang c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 34979c1e64dSHong Zhang c->N = mat->cmap->N/bs; 35079c1e64dSHong Zhang c->m = mat->rmap->N/bs; 35179c1e64dSHong Zhang c->rstart = 0; 35279c1e64dSHong Zhang 35379c1e64dSHong Zhang c->ncolors = nis; 35479c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 35579c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 35679c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 35785740eacSHong Zhang ierr = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr); 35879c1e64dSHong Zhang 359525d23c0SHong Zhang if (isBAIJ) { 360525d23c0SHong Zhang ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 361525d23c0SHong Zhang } else { 3626378f32dSHong Zhang ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 363525d23c0SHong Zhang } 36479c1e64dSHong Zhang 365476e0d0aSHong Zhang ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr); 36679c1e64dSHong Zhang ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 36779c1e64dSHong Zhang 36817a7dee1SHong Zhang nz = 0; 36979c1e64dSHong Zhang for (i=0; i<nis; i++) { /* loop over colors */ 37079c1e64dSHong Zhang ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 37179c1e64dSHong Zhang ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 37279c1e64dSHong Zhang 37379c1e64dSHong Zhang c->ncolumns[i] = n; 37479c1e64dSHong Zhang if (n) { 37579c1e64dSHong Zhang ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 37679c1e64dSHong Zhang ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 37779c1e64dSHong Zhang } else { 37879c1e64dSHong Zhang c->columns[i] = 0; 37979c1e64dSHong Zhang } 38079c1e64dSHong Zhang 38179c1e64dSHong Zhang /* fast, crude version requires O(N*N) work */ 382476e0d0aSHong Zhang nrows = 0; 38379c1e64dSHong Zhang for (j=0; j<n; j++) { /* loop over columns */ 38479c1e64dSHong Zhang col = is[j]; 38579c1e64dSHong Zhang rows = cj + ci[col]; 38679c1e64dSHong Zhang m = ci[col+1] - ci[col]; 38779c1e64dSHong Zhang for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 38879c1e64dSHong Zhang rowhit[*rows] = col + 1; 389476e0d0aSHong Zhang spidxhit[*rows++] = spidx[ci[col] + k]; 390476e0d0aSHong Zhang nrows++; 39179c1e64dSHong Zhang } 39279c1e64dSHong Zhang } 393476e0d0aSHong Zhang c->nrows[i] = nrows; /* total num of rows for this color */ 39479c1e64dSHong Zhang 39579c1e64dSHong Zhang nrows = 0; 39679c1e64dSHong Zhang for (j=0; j<N; j++) { /* loop over rows */ 39779c1e64dSHong Zhang if (rowhit[j]) { 39817a7dee1SHong Zhang c->rowcolden2sp3[nz++] = j; /* row index */ 39917a7dee1SHong Zhang c->rowcolden2sp3[nz++] = rowhit[j] - 1; /* column index */ 40017a7dee1SHong Zhang c->rowcolden2sp3[nz++] = spidxhit[j]; /* den2sp index */ 40179c1e64dSHong Zhang rowhit[j] = 0.0; /* zero rowhit for reuse */ 40279c1e64dSHong Zhang } 40379c1e64dSHong Zhang } 40479c1e64dSHong Zhang ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 40579c1e64dSHong Zhang } 40617a7dee1SHong Zhang if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz); 40785740eacSHong Zhang 408525d23c0SHong Zhang if (isBAIJ) { 409525d23c0SHong Zhang ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 410525d23c0SHong Zhang } else { 4116378f32dSHong Zhang ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 412525d23c0SHong Zhang } 413476e0d0aSHong Zhang ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr); 41479c1e64dSHong Zhang ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 415476e0d0aSHong Zhang 4164737c552SHong Zhang c->ctype = IS_COLORING_GHOSTED; 417b582cc96SHong Zhang if (isBAIJ) { 418b582cc96SHong Zhang mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ; 419b582cc96SHong Zhang } else { 42079c1e64dSHong Zhang mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 421b582cc96SHong Zhang } 42252011a10SHong Zhang ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 42379c1e64dSHong Zhang PetscFunctionReturn(0); 42479c1e64dSHong Zhang } 42579c1e64dSHong Zhang 4264a2ae208SSatish Balay #undef __FUNCT__ 4274a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 4283acb8795SBarry Smith /* 4293acb8795SBarry Smith This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 4303acb8795SBarry Smith This is why it has the ugly code with the MatGetBlockSize() 4313acb8795SBarry Smith */ 432dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 433639f9d9dSBarry Smith { 4346849ba73SBarry Smith PetscErrorCode ierr; 4351a83f524SJed Brown PetscInt i,n,nrows,N,j,k,m,ncols,col; 4361a83f524SJed Brown const PetscInt *is,*rows,*ci,*cj; 4373acb8795SBarry Smith PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 438b9617806SBarry Smith IS *isa; 439ace3abfcSBarry Smith PetscBool done,flg = PETSC_FALSE; 440476e0d0aSHong Zhang PetscBool flg1; 44179c1e64dSHong Zhang PetscBool new_impl=PETSC_FALSE; 442639f9d9dSBarry Smith 4433a40ed3dSBarry Smith PetscFunctionBegin; 44479c1e64dSHong Zhang ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 44579c1e64dSHong Zhang if (new_impl){ 44679c1e64dSHong Zhang ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 44779c1e64dSHong Zhang PetscFunctionReturn(0); 44879c1e64dSHong Zhang } 449e7e72b3dSBarry Smith if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 450522c5e43SBarry Smith 451b9617806SBarry Smith ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 452476e0d0aSHong Zhang /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 453476e0d0aSHong Zhang SeqBAIJ calls this routine, thus check it */ 454251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 455476e0d0aSHong Zhang if (flg1) { 4563acb8795SBarry Smith ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 4573acb8795SBarry Smith } 4583acb8795SBarry Smith 4593acb8795SBarry Smith N = mat->cmap->N/bs; 4603acb8795SBarry Smith c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 4613acb8795SBarry Smith c->N = mat->cmap->N/bs; 4623acb8795SBarry Smith c->m = mat->rmap->N/bs; 463005c665bSBarry Smith c->rstart = 0; 464005c665bSBarry Smith 465639f9d9dSBarry Smith c->ncolors = nis; 46638baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 46738baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 46838baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 46938baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 47038baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 47143a90d84SBarry Smith 4723acb8795SBarry Smith ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 473ce94432eSBarry Smith if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 47470c3da92SBarry Smith 47570c3da92SBarry Smith /* 476639f9d9dSBarry Smith Temporary option to allow for debugging/testing 47770c3da92SBarry Smith */ 4780298fd71SBarry Smith ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 47970c3da92SBarry Smith 48038baddfdSBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 48138baddfdSBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 48270c3da92SBarry Smith 483639f9d9dSBarry Smith for (i=0; i<nis; i++) { 484b9b97703SBarry Smith ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 485639f9d9dSBarry Smith ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 4862205254eSKarl Rupp 487639f9d9dSBarry Smith c->ncolumns[i] = n; 488639f9d9dSBarry Smith if (n) { 48938baddfdSBarry Smith ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 49038baddfdSBarry Smith ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 49170c3da92SBarry Smith } else { 492639f9d9dSBarry Smith c->columns[i] = 0; 49370c3da92SBarry Smith } 49470c3da92SBarry Smith 4953a7fca6bSBarry Smith if (!flg) { /* ------------------------------------------------------------------------------*/ 4963a7fca6bSBarry Smith /* fast, crude version requires O(N*N) work */ 49738baddfdSBarry Smith ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 498639f9d9dSBarry Smith /* loop over columns*/ 499639f9d9dSBarry Smith for (j=0; j<n; j++) { 500639f9d9dSBarry Smith col = is[j]; 501639f9d9dSBarry Smith rows = cj + ci[col]; 502639f9d9dSBarry Smith m = ci[col+1] - ci[col]; 503639f9d9dSBarry Smith /* loop over columns marking them in rowhit */ 504639f9d9dSBarry Smith for (k=0; k<m; k++) { 505639f9d9dSBarry Smith rowhit[*rows++] = col + 1; 50670c3da92SBarry Smith } 50770c3da92SBarry Smith } 508639f9d9dSBarry Smith /* count the number of hits */ 509639f9d9dSBarry Smith nrows = 0; 51070c3da92SBarry Smith for (j=0; j<N; j++) { 511639f9d9dSBarry Smith if (rowhit[j]) nrows++; 512639f9d9dSBarry Smith } 513639f9d9dSBarry Smith c->nrows[i] = nrows; 51438baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 51538baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 516639f9d9dSBarry Smith nrows = 0; 517639f9d9dSBarry Smith for (j=0; j<N; j++) { 518639f9d9dSBarry Smith if (rowhit[j]) { 519639f9d9dSBarry Smith c->rows[i][nrows] = j; 520639f9d9dSBarry Smith c->columnsforrow[i][nrows] = rowhit[j] - 1; 521639f9d9dSBarry Smith nrows++; 52270c3da92SBarry Smith } 52370c3da92SBarry Smith } 524639f9d9dSBarry Smith } else { /*-------------------------------------------------------------------------------*/ 5253a7fca6bSBarry Smith /* slow version, using rowhit as a linked list */ 52638baddfdSBarry Smith PetscInt currentcol,fm,mfm; 527639f9d9dSBarry Smith rowhit[N] = N; 528639f9d9dSBarry Smith nrows = 0; 529639f9d9dSBarry Smith /* loop over columns */ 530639f9d9dSBarry Smith for (j=0; j<n; j++) { 531639f9d9dSBarry Smith col = is[j]; 532639f9d9dSBarry Smith rows = cj + ci[col]; 533639f9d9dSBarry Smith m = ci[col+1] - ci[col]; 534639f9d9dSBarry Smith /* loop over columns marking them in rowhit */ 535639f9d9dSBarry Smith fm = N; /* fm points to first entry in linked list */ 536639f9d9dSBarry Smith for (k=0; k<m; k++) { 537639f9d9dSBarry Smith currentcol = *rows++; 538639f9d9dSBarry Smith /* is it already in the list? */ 539639f9d9dSBarry Smith do { 540639f9d9dSBarry Smith mfm = fm; 541639f9d9dSBarry Smith fm = rowhit[fm]; 542639f9d9dSBarry Smith } while (fm < currentcol); 543639f9d9dSBarry Smith /* not in list so add it */ 544639f9d9dSBarry Smith if (fm != currentcol) { 545639f9d9dSBarry Smith nrows++; 546639f9d9dSBarry Smith columnsforrow[currentcol] = col; 547639f9d9dSBarry Smith /* next three lines insert new entry into linked list */ 548639f9d9dSBarry Smith rowhit[mfm] = currentcol; 549639f9d9dSBarry Smith rowhit[currentcol] = fm; 550639f9d9dSBarry Smith fm = currentcol; 551639f9d9dSBarry Smith /* fm points to present position in list since we know the columns are sorted */ 55271cd77b2SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 553639f9d9dSBarry Smith } 554639f9d9dSBarry Smith } 555639f9d9dSBarry Smith c->nrows[i] = nrows; 55638baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 55738baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 558639f9d9dSBarry Smith /* now store the linked list of rows into c->rows[i] */ 559639f9d9dSBarry Smith nrows = 0; 560639f9d9dSBarry Smith fm = rowhit[N]; 561639f9d9dSBarry Smith do { 562639f9d9dSBarry Smith c->rows[i][nrows] = fm; 563639f9d9dSBarry Smith c->columnsforrow[i][nrows++] = columnsforrow[fm]; 564639f9d9dSBarry Smith fm = rowhit[fm]; 565639f9d9dSBarry Smith } while (fm < N); 566639f9d9dSBarry Smith } /* ---------------------------------------------------------------------------------------*/ 567639f9d9dSBarry Smith ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 568639f9d9dSBarry Smith } 5693acb8795SBarry Smith ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 570639f9d9dSBarry Smith 571606d414cSSatish Balay ierr = PetscFree(rowhit);CHKERRQ(ierr); 572606d414cSSatish Balay ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 573639f9d9dSBarry Smith 57430b34957SBarry Smith /* Optimize by adding the vscale, and scaleforrow[][] fields */ 57530b34957SBarry Smith /* 57630b34957SBarry Smith see the version for MPIAIJ 57730b34957SBarry Smith */ 578ce94432eSBarry Smith ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 57938baddfdSBarry Smith ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 58030b34957SBarry Smith for (k=0; k<c->ncolors; k++) { 58138baddfdSBarry Smith ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 58230b34957SBarry Smith for (l=0; l<c->nrows[k]; l++) { 58330b34957SBarry Smith col = c->columnsforrow[k][l]; 5842205254eSKarl Rupp 58530b34957SBarry Smith c->vscaleforrow[k][l] = col; 58630b34957SBarry Smith } 58730b34957SBarry Smith } 588b9617806SBarry Smith ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 5893a40ed3dSBarry Smith PetscFunctionReturn(0); 59070c3da92SBarry Smith } 59179c1e64dSHong Zhang 592