170c3da92SBarry Smith 2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 3525d23c0SHong Zhang #include <../src/mat/impls/baij/seq/baij.h> 4525d23c0SHong Zhang 5b582cc96SHong Zhang #undef __FUNCT__ 6b582cc96SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqBAIJ" 7b582cc96SHong Zhang PetscErrorCode MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 8b582cc96SHong Zhang { 9b582cc96SHong Zhang PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 10b582cc96SHong Zhang PetscErrorCode ierr; 11d6752224SHong Zhang PetscInt bs=J->rmap->bs,bs2=bs*bs,i,j,k,l,row,col,N; 12b582cc96SHong Zhang PetscScalar dx,*y,*xx,*w3_array; 13b582cc96SHong Zhang PetscScalar *vscale_array; 14b582cc96SHong Zhang PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 15b582cc96SHong Zhang Vec w1 = coloring->w1,w2=coloring->w2,w3; 16b582cc96SHong Zhang void *fctx = coloring->fctx; 17b582cc96SHong Zhang PetscBool flg = PETSC_FALSE; 18b582cc96SHong Zhang Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 19d6752224SHong Zhang PetscScalar *ca = csp->a,*ca_l; 20*87d34202SHong Zhang PetscInt brow,bcol,bspidx,nz,nz_i; 21b582cc96SHong Zhang 22b582cc96SHong Zhang PetscFunctionBegin; 23b582cc96SHong Zhang ierr = MatSetUnfactored(J);CHKERRQ(ierr); 24b582cc96SHong Zhang ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 25b582cc96SHong Zhang if (flg) { 26b582cc96SHong Zhang ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 27b582cc96SHong Zhang } else { 28b582cc96SHong Zhang PetscBool assembled; 29b582cc96SHong Zhang ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 30b582cc96SHong Zhang if (assembled) { 31b582cc96SHong Zhang ierr = MatZeroEntries(J);CHKERRQ(ierr); 32b582cc96SHong Zhang } 33b582cc96SHong Zhang } 34b582cc96SHong Zhang 35b582cc96SHong Zhang if (!coloring->vscale) { 36b582cc96SHong Zhang ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 37b582cc96SHong Zhang } 38b582cc96SHong Zhang 39b582cc96SHong Zhang if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 40b582cc96SHong Zhang ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 41b582cc96SHong Zhang } 42b582cc96SHong Zhang 43b582cc96SHong Zhang /* Set w1 = F(x1) */ 44b582cc96SHong Zhang if (!coloring->fset) { 45b582cc96SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 46b582cc96SHong Zhang ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 47b582cc96SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 48b582cc96SHong Zhang } else { 49b582cc96SHong Zhang coloring->fset = PETSC_FALSE; 50b582cc96SHong Zhang } 51b582cc96SHong Zhang 52b582cc96SHong Zhang if (!coloring->w3) { 53b582cc96SHong Zhang ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 54b582cc96SHong Zhang ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 55b582cc96SHong Zhang } 56b582cc96SHong Zhang w3 = coloring->w3; 57b582cc96SHong Zhang 58b582cc96SHong Zhang /* Compute all the local scale factors: vscale = 1./dx = 1./(epsilon*xx) */ 59b582cc96SHong Zhang ierr = VecGetLocalSize(x1,&N);CHKERRQ(ierr); 60b582cc96SHong Zhang ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 61b582cc96SHong Zhang ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 62b582cc96SHong Zhang 63b582cc96SHong Zhang /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 64b582cc96SHong Zhang for (col=0; col<N; col++) { 65b582cc96SHong Zhang if (coloring->htype[0] == 'w') { 66b582cc96SHong Zhang dx = 1.0 + unorm; 67b582cc96SHong Zhang } else { 68b582cc96SHong Zhang dx = xx[col]; 69b582cc96SHong Zhang } 70b582cc96SHong Zhang if (dx == (PetscScalar)0.0) dx = 1.0; 71b582cc96SHong Zhang if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 72b582cc96SHong Zhang else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 73b582cc96SHong Zhang dx *= epsilon; 74b582cc96SHong Zhang vscale_array[col] = (PetscScalar)dx; 75b582cc96SHong Zhang } 76b582cc96SHong Zhang 77*87d34202SHong Zhang /* Create bs vectors w2s and w3s */ 78*87d34202SHong Zhang Vec w2s[bs]; 79*87d34202SHong Zhang for (i=0; i<bs; i++) { 80*87d34202SHong Zhang ierr = VecDuplicate(w2,&w2s[i]);CHKERRQ(ierr); 81*87d34202SHong Zhang } 82*87d34202SHong Zhang 83b582cc96SHong Zhang nz = 0; 84b582cc96SHong Zhang for (k=0; k<coloring->ncolors; k++) { /* Loop over each color */ 85b582cc96SHong Zhang coloring->currentcolor = k; 86*87d34202SHong Zhang 87*87d34202SHong Zhang /*----------- new -------------*/ 88*87d34202SHong Zhang /* Compute w3 = x1 + dx */ 89b582cc96SHong Zhang for (i=0; i<bs; i++) { 90b582cc96SHong Zhang ierr = VecCopy(x1,w3);CHKERRQ(ierr); 91b582cc96SHong Zhang ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 92b582cc96SHong Zhang for (l=0; l<coloring->ncolumns[k]; l++) { 93b582cc96SHong Zhang col = i + bs*coloring->columns[k][l]; 94b582cc96SHong Zhang w3_array[col] += vscale_array[col]; 95b582cc96SHong Zhang } 96b582cc96SHong Zhang ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 97b582cc96SHong Zhang 98*87d34202SHong Zhang /* Evaluate function w2s = F(w3) - F(x1) */ 99b582cc96SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 100*87d34202SHong Zhang ierr = (*f)(sctx,w3,w2s[i],fctx);CHKERRQ(ierr); 101b582cc96SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 102*87d34202SHong Zhang ierr = VecAXPY(w2s[i],-1.0,w1);CHKERRQ(ierr); 103*87d34202SHong Zhang } 104*87d34202SHong Zhang /*----------- endof new -------------*/ 105b582cc96SHong Zhang 106*87d34202SHong Zhang nz_i = nz; 107*87d34202SHong Zhang for (i=0; i<bs; i++) { 108*87d34202SHong Zhang nz = nz_i; 109b582cc96SHong Zhang /* 110b582cc96SHong Zhang Loop over rows of vector, putting results into Jacobian matrix 111b582cc96SHong Zhang */ 112*87d34202SHong Zhang ierr = VecGetArray(w2s[i],&y);CHKERRQ(ierr); 113b582cc96SHong Zhang for (l=0; l<coloring->nrows[k]; l++) { 114b582cc96SHong Zhang brow = coloring->rowcolden2sp3[nz++]; 115b582cc96SHong Zhang bcol = coloring->rowcolden2sp3[nz++]; 116b582cc96SHong Zhang bspidx = coloring->rowcolden2sp3[nz++]; 117b582cc96SHong Zhang row = bs*brow; 118b582cc96SHong Zhang col = bs*bcol + i; 119642b4fb1SHong Zhang 120d6752224SHong Zhang ca_l = ca + bs2*bspidx + i*bs; 121b582cc96SHong Zhang for (j=0; j<bs; j++) { 122d6752224SHong Zhang ca_l[j] = y[row+j]/vscale_array[col]; 123b582cc96SHong Zhang } 124b582cc96SHong Zhang } 125*87d34202SHong Zhang ierr = VecRestoreArray(w2s[i],&y);CHKERRQ(ierr); 126d6752224SHong Zhang } /* endof for (i=0; i<bs; i++) */ 127b582cc96SHong Zhang } /* endof for each color */ 128b582cc96SHong Zhang ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 129b582cc96SHong Zhang ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 130*87d34202SHong Zhang for (i=0; i<bs; i++) { 131*87d34202SHong Zhang ierr = VecDestroy(&w2s[i]);CHKERRQ(ierr); 132*87d34202SHong Zhang } 133b582cc96SHong Zhang coloring->currentcolor = -1; 134b582cc96SHong Zhang PetscFunctionReturn(0); 135b582cc96SHong Zhang } 136b582cc96SHong Zhang 137525d23c0SHong Zhang /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */ 138525d23c0SHong Zhang /* #define JACOBIANCOLOROPT */ 139525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 140525d23c0SHong Zhang #include <petsctime.h> 141525d23c0SHong Zhang #endif 142525d23c0SHong Zhang #undef __FUNCT__ 143525d23c0SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqAIJ" 144525d23c0SHong Zhang PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 145525d23c0SHong Zhang { 146525d23c0SHong Zhang PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 147525d23c0SHong Zhang PetscErrorCode ierr; 148b582cc96SHong Zhang PetscInt k,l,row,col,N,bs=J->rmap->bs,bs2=bs*bs; 149525d23c0SHong Zhang PetscScalar dx,*y,*xx,*w3_array; 150525d23c0SHong Zhang PetscScalar *vscale_array; 151525d23c0SHong Zhang PetscReal epsilon=coloring->error_rel,umin = coloring->umin,unorm; 152525d23c0SHong Zhang Vec w1=coloring->w1,w2=coloring->w2,w3; 153525d23c0SHong Zhang void *fctx=coloring->fctx; 154b582cc96SHong Zhang PetscBool flg=PETSC_FALSE,isBAIJ=PETSC_FALSE; 155b582cc96SHong Zhang PetscScalar *ca; 156525d23c0SHong Zhang const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 157525d23c0SHong Zhang PetscInt **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx; 158525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 159525d23c0SHong Zhang PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11; 160525d23c0SHong Zhang #endif 161525d23c0SHong Zhang 162525d23c0SHong Zhang PetscFunctionBegin; 163525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 164525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 165525d23c0SHong Zhang #endif 166b582cc96SHong Zhang ierr = PetscObjectTypeCompare((PetscObject)J,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 167b582cc96SHong Zhang if (isBAIJ) { 168b582cc96SHong Zhang Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 169b582cc96SHong Zhang ca = csp->a; 170b582cc96SHong Zhang } else { 171b582cc96SHong Zhang Mat_SeqAIJ *csp=(Mat_SeqAIJ*)J->data; 172b582cc96SHong Zhang ca = csp->a; 173b582cc96SHong Zhang bs = 1; bs2 = 1; 174b582cc96SHong Zhang } 175b582cc96SHong Zhang 176525d23c0SHong Zhang ierr = MatSetUnfactored(J);CHKERRQ(ierr); 177525d23c0SHong Zhang ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 178525d23c0SHong Zhang if (flg) { 179525d23c0SHong Zhang ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 180525d23c0SHong Zhang } else { 181525d23c0SHong Zhang PetscBool assembled; 182525d23c0SHong Zhang ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 183525d23c0SHong Zhang if (assembled) { 184525d23c0SHong Zhang ierr = MatZeroEntries(J);CHKERRQ(ierr); 185525d23c0SHong Zhang } 186525d23c0SHong Zhang } 187525d23c0SHong Zhang 188525d23c0SHong Zhang if (!coloring->vscale) { 189525d23c0SHong Zhang ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 190525d23c0SHong Zhang } 191525d23c0SHong Zhang 192525d23c0SHong Zhang if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 193525d23c0SHong Zhang ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 194525d23c0SHong Zhang } 195525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 196525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 197525d23c0SHong Zhang t_init += t1 - t0; 198525d23c0SHong Zhang #endif 199525d23c0SHong Zhang 200525d23c0SHong Zhang /* Set w1 = F(x1) */ 201525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 202525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 203525d23c0SHong Zhang #endif 204525d23c0SHong Zhang if (!coloring->fset) { 205525d23c0SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 206525d23c0SHong Zhang ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 207525d23c0SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 208525d23c0SHong Zhang } else { 209525d23c0SHong Zhang coloring->fset = PETSC_FALSE; 210525d23c0SHong Zhang } 211525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 212525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 213525d23c0SHong Zhang t_setvals += t1 - t0; 214525d23c0SHong Zhang #endif 215525d23c0SHong Zhang 216525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 217525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 218525d23c0SHong Zhang #endif 219525d23c0SHong Zhang if (!coloring->w3) { 220525d23c0SHong Zhang ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 221525d23c0SHong Zhang ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 222525d23c0SHong Zhang } 223525d23c0SHong Zhang w3 = coloring->w3; 224525d23c0SHong Zhang 225525d23c0SHong Zhang /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */ 226525d23c0SHong Zhang ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 227525d23c0SHong Zhang ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 228525d23c0SHong Zhang ierr = VecGetSize(x1,&N);CHKERRQ(ierr); 229525d23c0SHong Zhang for (col=0; col<N; col++) { 230525d23c0SHong Zhang if (coloring->htype[0] == 'w') { 231525d23c0SHong Zhang dx = 1.0 + unorm; 232525d23c0SHong Zhang } else { 233525d23c0SHong Zhang dx = xx[col]; 234525d23c0SHong Zhang } 235525d23c0SHong Zhang if (dx == (PetscScalar)0.0) dx = 1.0; 236525d23c0SHong Zhang if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 237525d23c0SHong Zhang else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 238525d23c0SHong Zhang dx *= epsilon; 239525d23c0SHong Zhang vscale_array[col] = (PetscScalar)dx; 240525d23c0SHong Zhang } 241525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 242525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 243525d23c0SHong Zhang t_init += t1 - t0; 244525d23c0SHong Zhang #endif 245525d23c0SHong Zhang 246525d23c0SHong Zhang nz = 0; 247525d23c0SHong Zhang for (k=0; k<ncolors; k++) { /* loop over colors */ 248525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 249525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 250525d23c0SHong Zhang #endif 251525d23c0SHong Zhang coloring->currentcolor = k; 252525d23c0SHong Zhang 253525d23c0SHong Zhang /* 254525d23c0SHong Zhang Loop over each column associated with color 255525d23c0SHong Zhang adding the perturbation to the vector w3 = x1 + dx. 256525d23c0SHong Zhang */ 257525d23c0SHong Zhang ierr = VecCopy(x1,w3);CHKERRQ(ierr); 258525d23c0SHong Zhang ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 259525d23c0SHong Zhang ncolumns_k = ncolumns[k]; 260525d23c0SHong Zhang for (l=0; l<ncolumns_k; l++) { /* loop over columns */ 261525d23c0SHong Zhang col = columns[k][l]; 262525d23c0SHong Zhang w3_array[col] += vscale_array[col]; 263525d23c0SHong Zhang } 264525d23c0SHong Zhang ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 265525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 266525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 267525d23c0SHong Zhang t_dx += t1 - t0; 268525d23c0SHong Zhang #endif 269525d23c0SHong Zhang 270525d23c0SHong Zhang /* 271525d23c0SHong Zhang Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 272525d23c0SHong Zhang w2 = F(x1 + dx) - F(x1) 273525d23c0SHong Zhang */ 274525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 275525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 276525d23c0SHong Zhang #endif 277525d23c0SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 278525d23c0SHong Zhang ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 279525d23c0SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 280525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 281525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 282525d23c0SHong Zhang t_func += t1 - t0; 283525d23c0SHong Zhang #endif 284525d23c0SHong Zhang 285525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 286525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 287525d23c0SHong Zhang #endif 288525d23c0SHong Zhang ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 289525d23c0SHong Zhang 290525d23c0SHong Zhang /* 291525d23c0SHong Zhang Loop over rows of vector, putting w2/dx into Jacobian matrix 292525d23c0SHong Zhang */ 293525d23c0SHong Zhang ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 294525d23c0SHong Zhang nrows_k = nrows[k]; 295525d23c0SHong Zhang for (l=0; l<nrows_k; l++) { /* loop over rows */ 296525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 297525d23c0SHong Zhang ierr = PetscTime(&t00);CHKERRQ(ierr); 298525d23c0SHong Zhang #endif 299525d23c0SHong Zhang row = coloring->rowcolden2sp3[nz++]; 300525d23c0SHong Zhang col = coloring->rowcolden2sp3[nz++]; 301525d23c0SHong Zhang spidx = coloring->rowcolden2sp3[nz++]; 302525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 303525d23c0SHong Zhang ierr = PetscTime(&t11);CHKERRQ(ierr); 304525d23c0SHong Zhang t_kl += t11 - t00; 305525d23c0SHong Zhang #endif 306b582cc96SHong Zhang //printf(" row col val 307b582cc96SHong Zhang ca[spidx*bs2] = y[row*bs]/vscale_array[col*bs]; 308b582cc96SHong Zhang //printf(" (%d, %d, %g)\n", row*bs,col*bs,ca[spidx*bs2]); 309525d23c0SHong Zhang } 310525d23c0SHong Zhang ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 311525d23c0SHong Zhang 312525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 313525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 314525d23c0SHong Zhang t_setvals += t1 - t0; 315525d23c0SHong Zhang #endif 316525d23c0SHong Zhang } /* endof for each color */ 317525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 318525d23c0SHong Zhang printf(" FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx); 319525d23c0SHong Zhang printf(" FDColorApply time: kl %g\n",t_kl); 320525d23c0SHong Zhang #endif 321525d23c0SHong Zhang ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 322525d23c0SHong Zhang ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 323525d23c0SHong Zhang 324525d23c0SHong Zhang coloring->currentcolor = -1; 325525d23c0SHong Zhang PetscFunctionReturn(0); 326525d23c0SHong Zhang } 327639f9d9dSBarry Smith 32879c1e64dSHong Zhang #undef __FUNCT__ 32979c1e64dSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 33079c1e64dSHong Zhang /* 33179c1e64dSHong Zhang This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 33279c1e64dSHong Zhang */ 33379c1e64dSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 33479c1e64dSHong Zhang { 33579c1e64dSHong Zhang PetscErrorCode ierr; 33679c1e64dSHong Zhang PetscInt i,n,nrows,N,j,k,m,ncols,col; 33779c1e64dSHong Zhang const PetscInt *is,*rows,*ci,*cj; 33852011a10SHong Zhang PetscInt nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz; 33979c1e64dSHong Zhang IS *isa; 340525d23c0SHong Zhang PetscBool isBAIJ; 34179c1e64dSHong Zhang Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 34279c1e64dSHong Zhang 34379c1e64dSHong Zhang PetscFunctionBegin; 34479c1e64dSHong Zhang ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 34552011a10SHong Zhang 346476e0d0aSHong Zhang /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 347476e0d0aSHong Zhang SeqBAIJ calls this routine, thus check it */ 34879c1e64dSHong Zhang ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 349525d23c0SHong Zhang ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 350525d23c0SHong Zhang if (!isBAIJ) { 35152011a10SHong Zhang bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 35279c1e64dSHong Zhang } 35379c1e64dSHong Zhang N = mat->cmap->N/bs; 35479c1e64dSHong Zhang c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 35579c1e64dSHong Zhang c->N = mat->cmap->N/bs; 35679c1e64dSHong Zhang c->m = mat->rmap->N/bs; 35779c1e64dSHong Zhang c->rstart = 0; 35879c1e64dSHong Zhang 35979c1e64dSHong Zhang c->ncolors = nis; 36079c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 36179c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 36279c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 36385740eacSHong Zhang ierr = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr); 36479c1e64dSHong Zhang 365525d23c0SHong Zhang if (isBAIJ) { 366525d23c0SHong Zhang ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 367525d23c0SHong Zhang } else { 3686378f32dSHong Zhang ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 369525d23c0SHong Zhang } 37079c1e64dSHong Zhang 371476e0d0aSHong Zhang ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr); 37279c1e64dSHong Zhang ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 37379c1e64dSHong Zhang 37417a7dee1SHong Zhang nz = 0; 37579c1e64dSHong Zhang for (i=0; i<nis; i++) { /* loop over colors */ 37679c1e64dSHong Zhang ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 37779c1e64dSHong Zhang ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 37879c1e64dSHong Zhang 37979c1e64dSHong Zhang c->ncolumns[i] = n; 38079c1e64dSHong Zhang if (n) { 38179c1e64dSHong Zhang ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 38279c1e64dSHong Zhang ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 38379c1e64dSHong Zhang } else { 38479c1e64dSHong Zhang c->columns[i] = 0; 38579c1e64dSHong Zhang } 38679c1e64dSHong Zhang 38779c1e64dSHong Zhang /* fast, crude version requires O(N*N) work */ 388476e0d0aSHong Zhang nrows = 0; 38979c1e64dSHong Zhang for (j=0; j<n; j++) { /* loop over columns */ 39079c1e64dSHong Zhang col = is[j]; 39179c1e64dSHong Zhang rows = cj + ci[col]; 39279c1e64dSHong Zhang m = ci[col+1] - ci[col]; 39379c1e64dSHong Zhang for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 39479c1e64dSHong Zhang rowhit[*rows] = col + 1; 395476e0d0aSHong Zhang spidxhit[*rows++] = spidx[ci[col] + k]; 396476e0d0aSHong Zhang nrows++; 39779c1e64dSHong Zhang } 39879c1e64dSHong Zhang } 399476e0d0aSHong Zhang c->nrows[i] = nrows; /* total num of rows for this color */ 40079c1e64dSHong Zhang 40179c1e64dSHong Zhang nrows = 0; 40279c1e64dSHong Zhang for (j=0; j<N; j++) { /* loop over rows */ 40379c1e64dSHong Zhang if (rowhit[j]) { 40417a7dee1SHong Zhang c->rowcolden2sp3[nz++] = j; /* row index */ 40517a7dee1SHong Zhang c->rowcolden2sp3[nz++] = rowhit[j] - 1; /* column index */ 40617a7dee1SHong Zhang c->rowcolden2sp3[nz++] = spidxhit[j]; /* den2sp index */ 40779c1e64dSHong Zhang rowhit[j] = 0.0; /* zero rowhit for reuse */ 40879c1e64dSHong Zhang } 40979c1e64dSHong Zhang } 41079c1e64dSHong Zhang ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 41179c1e64dSHong Zhang } 41217a7dee1SHong Zhang if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz); 41385740eacSHong Zhang 414525d23c0SHong Zhang if (isBAIJ) { 415525d23c0SHong Zhang ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 416525d23c0SHong Zhang } else { 4176378f32dSHong Zhang ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 418525d23c0SHong Zhang } 419476e0d0aSHong Zhang ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr); 42079c1e64dSHong Zhang ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 421476e0d0aSHong Zhang 4224737c552SHong Zhang c->ctype = IS_COLORING_GHOSTED; 423b582cc96SHong Zhang if (isBAIJ) { 424b582cc96SHong Zhang mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ; 425b582cc96SHong Zhang } else { 42679c1e64dSHong Zhang mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 427b582cc96SHong Zhang } 42852011a10SHong Zhang ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 42979c1e64dSHong Zhang PetscFunctionReturn(0); 43079c1e64dSHong Zhang } 43179c1e64dSHong Zhang 4324a2ae208SSatish Balay #undef __FUNCT__ 4334a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 4343acb8795SBarry Smith /* 4353acb8795SBarry Smith This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 4363acb8795SBarry Smith This is why it has the ugly code with the MatGetBlockSize() 4373acb8795SBarry Smith */ 438dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 439639f9d9dSBarry Smith { 4406849ba73SBarry Smith PetscErrorCode ierr; 4411a83f524SJed Brown PetscInt i,n,nrows,N,j,k,m,ncols,col; 4421a83f524SJed Brown const PetscInt *is,*rows,*ci,*cj; 4433acb8795SBarry Smith PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 444b9617806SBarry Smith IS *isa; 445ace3abfcSBarry Smith PetscBool done,flg = PETSC_FALSE; 446476e0d0aSHong Zhang PetscBool flg1; 44779c1e64dSHong Zhang PetscBool new_impl=PETSC_FALSE; 448639f9d9dSBarry Smith 4493a40ed3dSBarry Smith PetscFunctionBegin; 45079c1e64dSHong Zhang ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 45179c1e64dSHong Zhang if (new_impl){ 45279c1e64dSHong Zhang ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 45379c1e64dSHong Zhang PetscFunctionReturn(0); 45479c1e64dSHong Zhang } 455e7e72b3dSBarry Smith if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 456522c5e43SBarry Smith 457b9617806SBarry Smith ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 458476e0d0aSHong Zhang /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 459476e0d0aSHong Zhang SeqBAIJ calls this routine, thus check it */ 460251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 461476e0d0aSHong Zhang if (flg1) { 4623acb8795SBarry Smith ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 4633acb8795SBarry Smith } 4643acb8795SBarry Smith 4653acb8795SBarry Smith N = mat->cmap->N/bs; 4663acb8795SBarry Smith c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 4673acb8795SBarry Smith c->N = mat->cmap->N/bs; 4683acb8795SBarry Smith c->m = mat->rmap->N/bs; 469005c665bSBarry Smith c->rstart = 0; 470005c665bSBarry Smith 471639f9d9dSBarry Smith c->ncolors = nis; 47238baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 47338baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 47438baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 47538baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 47638baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 47743a90d84SBarry Smith 4783acb8795SBarry Smith ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 479ce94432eSBarry Smith if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 48070c3da92SBarry Smith 48170c3da92SBarry Smith /* 482639f9d9dSBarry Smith Temporary option to allow for debugging/testing 48370c3da92SBarry Smith */ 4840298fd71SBarry Smith ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 48570c3da92SBarry Smith 48638baddfdSBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 48738baddfdSBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 48870c3da92SBarry Smith 489639f9d9dSBarry Smith for (i=0; i<nis; i++) { 490b9b97703SBarry Smith ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 491639f9d9dSBarry Smith ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 4922205254eSKarl Rupp 493639f9d9dSBarry Smith c->ncolumns[i] = n; 494639f9d9dSBarry Smith if (n) { 49538baddfdSBarry Smith ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 49638baddfdSBarry Smith ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 49770c3da92SBarry Smith } else { 498639f9d9dSBarry Smith c->columns[i] = 0; 49970c3da92SBarry Smith } 50070c3da92SBarry Smith 5013a7fca6bSBarry Smith if (!flg) { /* ------------------------------------------------------------------------------*/ 5023a7fca6bSBarry Smith /* fast, crude version requires O(N*N) work */ 50338baddfdSBarry Smith ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 504639f9d9dSBarry Smith /* loop over columns*/ 505639f9d9dSBarry Smith for (j=0; j<n; j++) { 506639f9d9dSBarry Smith col = is[j]; 507639f9d9dSBarry Smith rows = cj + ci[col]; 508639f9d9dSBarry Smith m = ci[col+1] - ci[col]; 509639f9d9dSBarry Smith /* loop over columns marking them in rowhit */ 510639f9d9dSBarry Smith for (k=0; k<m; k++) { 511639f9d9dSBarry Smith rowhit[*rows++] = col + 1; 51270c3da92SBarry Smith } 51370c3da92SBarry Smith } 514639f9d9dSBarry Smith /* count the number of hits */ 515639f9d9dSBarry Smith nrows = 0; 51670c3da92SBarry Smith for (j=0; j<N; j++) { 517639f9d9dSBarry Smith if (rowhit[j]) nrows++; 518639f9d9dSBarry Smith } 519639f9d9dSBarry Smith c->nrows[i] = nrows; 52038baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 52138baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 522639f9d9dSBarry Smith nrows = 0; 523639f9d9dSBarry Smith for (j=0; j<N; j++) { 524639f9d9dSBarry Smith if (rowhit[j]) { 525639f9d9dSBarry Smith c->rows[i][nrows] = j; 526639f9d9dSBarry Smith c->columnsforrow[i][nrows] = rowhit[j] - 1; 527639f9d9dSBarry Smith nrows++; 52870c3da92SBarry Smith } 52970c3da92SBarry Smith } 530639f9d9dSBarry Smith } else { /*-------------------------------------------------------------------------------*/ 5313a7fca6bSBarry Smith /* slow version, using rowhit as a linked list */ 53238baddfdSBarry Smith PetscInt currentcol,fm,mfm; 533639f9d9dSBarry Smith rowhit[N] = N; 534639f9d9dSBarry Smith nrows = 0; 535639f9d9dSBarry Smith /* loop over columns */ 536639f9d9dSBarry Smith for (j=0; j<n; j++) { 537639f9d9dSBarry Smith col = is[j]; 538639f9d9dSBarry Smith rows = cj + ci[col]; 539639f9d9dSBarry Smith m = ci[col+1] - ci[col]; 540639f9d9dSBarry Smith /* loop over columns marking them in rowhit */ 541639f9d9dSBarry Smith fm = N; /* fm points to first entry in linked list */ 542639f9d9dSBarry Smith for (k=0; k<m; k++) { 543639f9d9dSBarry Smith currentcol = *rows++; 544639f9d9dSBarry Smith /* is it already in the list? */ 545639f9d9dSBarry Smith do { 546639f9d9dSBarry Smith mfm = fm; 547639f9d9dSBarry Smith fm = rowhit[fm]; 548639f9d9dSBarry Smith } while (fm < currentcol); 549639f9d9dSBarry Smith /* not in list so add it */ 550639f9d9dSBarry Smith if (fm != currentcol) { 551639f9d9dSBarry Smith nrows++; 552639f9d9dSBarry Smith columnsforrow[currentcol] = col; 553639f9d9dSBarry Smith /* next three lines insert new entry into linked list */ 554639f9d9dSBarry Smith rowhit[mfm] = currentcol; 555639f9d9dSBarry Smith rowhit[currentcol] = fm; 556639f9d9dSBarry Smith fm = currentcol; 557639f9d9dSBarry Smith /* fm points to present position in list since we know the columns are sorted */ 55871cd77b2SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 559639f9d9dSBarry Smith } 560639f9d9dSBarry Smith } 561639f9d9dSBarry Smith c->nrows[i] = nrows; 56238baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 56338baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 564639f9d9dSBarry Smith /* now store the linked list of rows into c->rows[i] */ 565639f9d9dSBarry Smith nrows = 0; 566639f9d9dSBarry Smith fm = rowhit[N]; 567639f9d9dSBarry Smith do { 568639f9d9dSBarry Smith c->rows[i][nrows] = fm; 569639f9d9dSBarry Smith c->columnsforrow[i][nrows++] = columnsforrow[fm]; 570639f9d9dSBarry Smith fm = rowhit[fm]; 571639f9d9dSBarry Smith } while (fm < N); 572639f9d9dSBarry Smith } /* ---------------------------------------------------------------------------------------*/ 573639f9d9dSBarry Smith ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 574639f9d9dSBarry Smith } 5753acb8795SBarry Smith ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 576639f9d9dSBarry Smith 577606d414cSSatish Balay ierr = PetscFree(rowhit);CHKERRQ(ierr); 578606d414cSSatish Balay ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 579639f9d9dSBarry Smith 58030b34957SBarry Smith /* Optimize by adding the vscale, and scaleforrow[][] fields */ 58130b34957SBarry Smith /* 58230b34957SBarry Smith see the version for MPIAIJ 58330b34957SBarry Smith */ 584ce94432eSBarry Smith ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 58538baddfdSBarry Smith ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 58630b34957SBarry Smith for (k=0; k<c->ncolors; k++) { 58738baddfdSBarry Smith ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 58830b34957SBarry Smith for (l=0; l<c->nrows[k]; l++) { 58930b34957SBarry Smith col = c->columnsforrow[k][l]; 5902205254eSKarl Rupp 59130b34957SBarry Smith c->vscaleforrow[k][l] = col; 59230b34957SBarry Smith } 59330b34957SBarry Smith } 594b9617806SBarry Smith ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 5953a40ed3dSBarry Smith PetscFunctionReturn(0); 59670c3da92SBarry Smith } 59779c1e64dSHong Zhang 598