170c3da92SBarry Smith 2c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> 3525d23c0SHong Zhang #include <../src/mat/impls/baij/seq/baij.h> 4525d23c0SHong Zhang 5*b582cc96SHong Zhang #undef __FUNCT__ 6*b582cc96SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqBAIJ" 7*b582cc96SHong Zhang PetscErrorCode MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 8*b582cc96SHong Zhang { 9*b582cc96SHong Zhang PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 10*b582cc96SHong Zhang PetscErrorCode ierr; 11*b582cc96SHong Zhang PetscInt bs=J->rmap->bs,bs2=bs*bs,i,j,k,start,end,l,row,col,N; 12*b582cc96SHong Zhang PetscScalar dx,*y,*xx,*w3_array; 13*b582cc96SHong Zhang PetscScalar *vscale_array; 14*b582cc96SHong Zhang PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 15*b582cc96SHong Zhang Vec w1 = coloring->w1,w2=coloring->w2,w3; 16*b582cc96SHong Zhang void *fctx = coloring->fctx; 17*b582cc96SHong Zhang PetscBool flg = PETSC_FALSE; 18*b582cc96SHong Zhang Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 19*b582cc96SHong Zhang PetscScalar *ca = csp->a; 20*b582cc96SHong Zhang PetscInt brow,bcol,bspidx,spidx,nz,nz_i; 21*b582cc96SHong Zhang 22*b582cc96SHong Zhang PetscFunctionBegin; 23*b582cc96SHong Zhang ierr = MatSetUnfactored(J);CHKERRQ(ierr); 24*b582cc96SHong Zhang ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 25*b582cc96SHong Zhang if (flg) { 26*b582cc96SHong Zhang ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 27*b582cc96SHong Zhang } else { 28*b582cc96SHong Zhang PetscBool assembled; 29*b582cc96SHong Zhang ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 30*b582cc96SHong Zhang if (assembled) { 31*b582cc96SHong Zhang ierr = MatZeroEntries(J);CHKERRQ(ierr); 32*b582cc96SHong Zhang } 33*b582cc96SHong Zhang } 34*b582cc96SHong Zhang 35*b582cc96SHong Zhang if (!coloring->vscale) { 36*b582cc96SHong Zhang ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 37*b582cc96SHong Zhang } 38*b582cc96SHong Zhang 39*b582cc96SHong Zhang if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 40*b582cc96SHong Zhang ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 41*b582cc96SHong Zhang } 42*b582cc96SHong Zhang ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 43*b582cc96SHong Zhang 44*b582cc96SHong Zhang /* Set w1 = F(x1) */ 45*b582cc96SHong Zhang if (!coloring->fset) { 46*b582cc96SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 47*b582cc96SHong Zhang ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 48*b582cc96SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 49*b582cc96SHong Zhang } else { 50*b582cc96SHong Zhang coloring->fset = PETSC_FALSE; 51*b582cc96SHong Zhang } 52*b582cc96SHong Zhang 53*b582cc96SHong Zhang if (!coloring->w3) { 54*b582cc96SHong Zhang ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 55*b582cc96SHong Zhang ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 56*b582cc96SHong Zhang } 57*b582cc96SHong Zhang w3 = coloring->w3; 58*b582cc96SHong Zhang 59*b582cc96SHong Zhang /* Compute all the local scale factors: vscale = 1./dx = 1./(epsilon*xx) */ 60*b582cc96SHong Zhang ierr = VecGetLocalSize(x1,&N);CHKERRQ(ierr); 61*b582cc96SHong Zhang ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 62*b582cc96SHong Zhang ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 63*b582cc96SHong Zhang 64*b582cc96SHong Zhang /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 65*b582cc96SHong Zhang for (col=0; col<N; col++) { 66*b582cc96SHong Zhang if (coloring->htype[0] == 'w') { 67*b582cc96SHong Zhang dx = 1.0 + unorm; 68*b582cc96SHong Zhang } else { 69*b582cc96SHong Zhang dx = xx[col]; 70*b582cc96SHong Zhang } 71*b582cc96SHong Zhang if (dx == (PetscScalar)0.0) dx = 1.0; 72*b582cc96SHong Zhang if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 73*b582cc96SHong Zhang else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 74*b582cc96SHong Zhang dx *= epsilon; 75*b582cc96SHong Zhang vscale_array[col] = (PetscScalar)dx; 76*b582cc96SHong Zhang } 77*b582cc96SHong Zhang 78*b582cc96SHong Zhang nz = 0; 79*b582cc96SHong Zhang for (k=0; k<coloring->ncolors; k++) { /* Loop over each color */ 80*b582cc96SHong Zhang coloring->currentcolor = k; 81*b582cc96SHong Zhang nz_i = nz; 82*b582cc96SHong Zhang for (i=0; i<bs; i++) { 83*b582cc96SHong Zhang //printf(" color= %d, i= %d, nz_i= %d ----------------\n",k,i,nz_i); 84*b582cc96SHong Zhang nz = nz_i; 85*b582cc96SHong Zhang ierr = VecCopy(x1,w3);CHKERRQ(ierr); 86*b582cc96SHong Zhang ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 87*b582cc96SHong Zhang 88*b582cc96SHong Zhang /* 89*b582cc96SHong Zhang Loop over each column associated with color 90*b582cc96SHong Zhang adding the perturbation to the vector w3. 91*b582cc96SHong Zhang */ 92*b582cc96SHong Zhang for (l=0; l<coloring->ncolumns[k]; l++) { 93*b582cc96SHong Zhang col = i + bs*coloring->columns[k][l]; 94*b582cc96SHong Zhang w3_array[col] += vscale_array[col]; 95*b582cc96SHong Zhang } 96*b582cc96SHong Zhang ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 97*b582cc96SHong Zhang 98*b582cc96SHong Zhang /* 99*b582cc96SHong Zhang Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 100*b582cc96SHong Zhang w2 = F(x1 + dx) - F(x1) 101*b582cc96SHong Zhang */ 102*b582cc96SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 103*b582cc96SHong Zhang ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 104*b582cc96SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 105*b582cc96SHong Zhang ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 106*b582cc96SHong Zhang 107*b582cc96SHong Zhang /* 108*b582cc96SHong Zhang Loop over rows of vector, putting results into Jacobian matrix 109*b582cc96SHong Zhang */ 110*b582cc96SHong Zhang ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 111*b582cc96SHong Zhang for (l=0; l<coloring->nrows[k]; l++) { 112*b582cc96SHong Zhang brow = coloring->rowcolden2sp3[nz++]; 113*b582cc96SHong Zhang bcol = coloring->rowcolden2sp3[nz++]; 114*b582cc96SHong Zhang bspidx = coloring->rowcolden2sp3[nz++]; 115*b582cc96SHong Zhang 116*b582cc96SHong Zhang row = bs*brow; 117*b582cc96SHong Zhang col = bs*bcol+i; 118*b582cc96SHong Zhang spidx = bs2*bspidx+i*bs; 119*b582cc96SHong Zhang 120*b582cc96SHong Zhang for (j=0; j<bs; j++) { 121*b582cc96SHong Zhang ca[spidx+j] = y[row+j]/vscale_array[col]; 122*b582cc96SHong Zhang //printf("(%d %d %g) nz= %d\n",row+j,col,ca[spidx],nz-3); 123*b582cc96SHong Zhang } 124*b582cc96SHong Zhang } 125*b582cc96SHong Zhang ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 126*b582cc96SHong Zhang } 127*b582cc96SHong Zhang } /* endof for each color */ 128*b582cc96SHong Zhang ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 129*b582cc96SHong Zhang ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 130*b582cc96SHong Zhang 131*b582cc96SHong Zhang coloring->currentcolor = -1; 132*b582cc96SHong Zhang PetscFunctionReturn(0); 133*b582cc96SHong Zhang } 134*b582cc96SHong Zhang 135525d23c0SHong Zhang /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */ 136525d23c0SHong Zhang /* #define JACOBIANCOLOROPT */ 137525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 138525d23c0SHong Zhang #include <petsctime.h> 139525d23c0SHong Zhang #endif 140525d23c0SHong Zhang #undef __FUNCT__ 141525d23c0SHong Zhang #define __FUNCT__ "MatFDColoringApply_SeqAIJ" 142525d23c0SHong Zhang PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 143525d23c0SHong Zhang { 144525d23c0SHong Zhang PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 145525d23c0SHong Zhang PetscErrorCode ierr; 146*b582cc96SHong Zhang PetscInt k,l,row,col,N,bs=J->rmap->bs,bs2=bs*bs; 147525d23c0SHong Zhang PetscScalar dx,*y,*xx,*w3_array; 148525d23c0SHong Zhang PetscScalar *vscale_array; 149525d23c0SHong Zhang PetscReal epsilon=coloring->error_rel,umin = coloring->umin,unorm; 150525d23c0SHong Zhang Vec w1=coloring->w1,w2=coloring->w2,w3; 151525d23c0SHong Zhang void *fctx=coloring->fctx; 152*b582cc96SHong Zhang PetscBool flg=PETSC_FALSE,isBAIJ=PETSC_FALSE; 153*b582cc96SHong Zhang PetscScalar *ca; 154525d23c0SHong Zhang const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 155525d23c0SHong Zhang PetscInt **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx; 156525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 157525d23c0SHong Zhang PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11; 158525d23c0SHong Zhang #endif 159525d23c0SHong Zhang 160525d23c0SHong Zhang PetscFunctionBegin; 161525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 162525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 163525d23c0SHong Zhang #endif 164*b582cc96SHong Zhang ierr = PetscObjectTypeCompare((PetscObject)J,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 165*b582cc96SHong Zhang if (isBAIJ) { 166*b582cc96SHong Zhang Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 167*b582cc96SHong Zhang ca = csp->a; 168*b582cc96SHong Zhang } else { 169*b582cc96SHong Zhang Mat_SeqAIJ *csp=(Mat_SeqAIJ*)J->data; 170*b582cc96SHong Zhang ca = csp->a; 171*b582cc96SHong Zhang bs = 1; bs2 = 1; 172*b582cc96SHong Zhang } 173*b582cc96SHong Zhang 174525d23c0SHong Zhang ierr = MatSetUnfactored(J);CHKERRQ(ierr); 175525d23c0SHong Zhang ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 176525d23c0SHong Zhang if (flg) { 177525d23c0SHong Zhang ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 178525d23c0SHong Zhang } else { 179525d23c0SHong Zhang PetscBool assembled; 180525d23c0SHong Zhang ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 181525d23c0SHong Zhang if (assembled) { 182525d23c0SHong Zhang ierr = MatZeroEntries(J);CHKERRQ(ierr); 183525d23c0SHong Zhang } 184525d23c0SHong Zhang } 185525d23c0SHong Zhang 186525d23c0SHong Zhang if (!coloring->vscale) { 187525d23c0SHong Zhang ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 188525d23c0SHong Zhang } 189525d23c0SHong Zhang 190525d23c0SHong Zhang if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 191525d23c0SHong Zhang ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 192525d23c0SHong Zhang } 193525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 194525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 195525d23c0SHong Zhang t_init += t1 - t0; 196525d23c0SHong Zhang #endif 197525d23c0SHong Zhang 198525d23c0SHong Zhang /* Set w1 = F(x1) */ 199525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 200525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 201525d23c0SHong Zhang #endif 202525d23c0SHong Zhang if (!coloring->fset) { 203525d23c0SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 204525d23c0SHong Zhang ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 205525d23c0SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 206525d23c0SHong Zhang } else { 207525d23c0SHong Zhang coloring->fset = PETSC_FALSE; 208525d23c0SHong Zhang } 209525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 210525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 211525d23c0SHong Zhang t_setvals += t1 - t0; 212525d23c0SHong Zhang #endif 213525d23c0SHong Zhang 214525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 215525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 216525d23c0SHong Zhang #endif 217525d23c0SHong Zhang if (!coloring->w3) { 218525d23c0SHong Zhang ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 219525d23c0SHong Zhang ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 220525d23c0SHong Zhang } 221525d23c0SHong Zhang w3 = coloring->w3; 222525d23c0SHong Zhang 223525d23c0SHong Zhang /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */ 224525d23c0SHong Zhang ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 225525d23c0SHong Zhang ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 226525d23c0SHong Zhang ierr = VecGetSize(x1,&N);CHKERRQ(ierr); 227525d23c0SHong Zhang for (col=0; col<N; col++) { 228525d23c0SHong Zhang if (coloring->htype[0] == 'w') { 229525d23c0SHong Zhang dx = 1.0 + unorm; 230525d23c0SHong Zhang } else { 231525d23c0SHong Zhang dx = xx[col]; 232525d23c0SHong Zhang } 233525d23c0SHong Zhang if (dx == (PetscScalar)0.0) dx = 1.0; 234525d23c0SHong Zhang if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 235525d23c0SHong Zhang else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 236525d23c0SHong Zhang dx *= epsilon; 237525d23c0SHong Zhang vscale_array[col] = (PetscScalar)dx; 238525d23c0SHong Zhang } 239525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 240525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 241525d23c0SHong Zhang t_init += t1 - t0; 242525d23c0SHong Zhang #endif 243525d23c0SHong Zhang 244525d23c0SHong Zhang nz = 0; 245525d23c0SHong Zhang for (k=0; k<ncolors; k++) { /* loop over colors */ 246525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 247525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 248525d23c0SHong Zhang #endif 249525d23c0SHong Zhang coloring->currentcolor = k; 250525d23c0SHong Zhang 251525d23c0SHong Zhang /* 252525d23c0SHong Zhang Loop over each column associated with color 253525d23c0SHong Zhang adding the perturbation to the vector w3 = x1 + dx. 254525d23c0SHong Zhang */ 255525d23c0SHong Zhang ierr = VecCopy(x1,w3);CHKERRQ(ierr); 256525d23c0SHong Zhang ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 257525d23c0SHong Zhang ncolumns_k = ncolumns[k]; 258525d23c0SHong Zhang for (l=0; l<ncolumns_k; l++) { /* loop over columns */ 259525d23c0SHong Zhang col = columns[k][l]; 260525d23c0SHong Zhang w3_array[col] += vscale_array[col]; 261525d23c0SHong Zhang } 262525d23c0SHong Zhang ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 263525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 264525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 265525d23c0SHong Zhang t_dx += t1 - t0; 266525d23c0SHong Zhang #endif 267525d23c0SHong Zhang 268525d23c0SHong Zhang /* 269525d23c0SHong Zhang Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 270525d23c0SHong Zhang w2 = F(x1 + dx) - F(x1) 271525d23c0SHong Zhang */ 272525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 273525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 274525d23c0SHong Zhang #endif 275525d23c0SHong Zhang ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 276525d23c0SHong Zhang ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 277525d23c0SHong Zhang ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 278525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 279525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 280525d23c0SHong Zhang t_func += t1 - t0; 281525d23c0SHong Zhang #endif 282525d23c0SHong Zhang 283525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 284525d23c0SHong Zhang ierr = PetscTime(&t0);CHKERRQ(ierr); 285525d23c0SHong Zhang #endif 286525d23c0SHong Zhang ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 287525d23c0SHong Zhang 288525d23c0SHong Zhang /* 289525d23c0SHong Zhang Loop over rows of vector, putting w2/dx into Jacobian matrix 290525d23c0SHong Zhang */ 291525d23c0SHong Zhang ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 292525d23c0SHong Zhang nrows_k = nrows[k]; 293525d23c0SHong Zhang for (l=0; l<nrows_k; l++) { /* loop over rows */ 294525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 295525d23c0SHong Zhang ierr = PetscTime(&t00);CHKERRQ(ierr); 296525d23c0SHong Zhang #endif 297525d23c0SHong Zhang row = coloring->rowcolden2sp3[nz++]; 298525d23c0SHong Zhang col = coloring->rowcolden2sp3[nz++]; 299525d23c0SHong Zhang spidx = coloring->rowcolden2sp3[nz++]; 300525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 301525d23c0SHong Zhang ierr = PetscTime(&t11);CHKERRQ(ierr); 302525d23c0SHong Zhang t_kl += t11 - t00; 303525d23c0SHong Zhang #endif 304*b582cc96SHong Zhang //printf(" row col val 305*b582cc96SHong Zhang ca[spidx*bs2] = y[row*bs]/vscale_array[col*bs]; 306*b582cc96SHong Zhang //printf(" (%d, %d, %g)\n", row*bs,col*bs,ca[spidx*bs2]); 307525d23c0SHong Zhang } 308525d23c0SHong Zhang ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 309525d23c0SHong Zhang 310525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 311525d23c0SHong Zhang ierr = PetscTime(&t1);CHKERRQ(ierr); 312525d23c0SHong Zhang t_setvals += t1 - t0; 313525d23c0SHong Zhang #endif 314525d23c0SHong Zhang } /* endof for each color */ 315525d23c0SHong Zhang #if defined(JACOBIANCOLOROPT) 316525d23c0SHong Zhang printf(" FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx); 317525d23c0SHong Zhang printf(" FDColorApply time: kl %g\n",t_kl); 318525d23c0SHong Zhang #endif 319525d23c0SHong Zhang ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 320525d23c0SHong Zhang ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 321525d23c0SHong Zhang 322525d23c0SHong Zhang coloring->currentcolor = -1; 323525d23c0SHong Zhang PetscFunctionReturn(0); 324525d23c0SHong Zhang } 325639f9d9dSBarry Smith 32679c1e64dSHong Zhang #undef __FUNCT__ 32779c1e64dSHong Zhang #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 32879c1e64dSHong Zhang /* 32979c1e64dSHong Zhang This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 33079c1e64dSHong Zhang */ 33179c1e64dSHong Zhang PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 33279c1e64dSHong Zhang { 33379c1e64dSHong Zhang PetscErrorCode ierr; 33479c1e64dSHong Zhang PetscInt i,n,nrows,N,j,k,m,ncols,col; 33579c1e64dSHong Zhang const PetscInt *is,*rows,*ci,*cj; 33652011a10SHong Zhang PetscInt nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz; 33779c1e64dSHong Zhang IS *isa; 338525d23c0SHong Zhang PetscBool isBAIJ; 33979c1e64dSHong Zhang Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 34079c1e64dSHong Zhang 34179c1e64dSHong Zhang PetscFunctionBegin; 34279c1e64dSHong Zhang ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 34352011a10SHong Zhang 344476e0d0aSHong Zhang /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 345476e0d0aSHong Zhang SeqBAIJ calls this routine, thus check it */ 34679c1e64dSHong Zhang ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 347525d23c0SHong Zhang ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 348525d23c0SHong Zhang if (!isBAIJ) { 34952011a10SHong Zhang bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 35079c1e64dSHong Zhang } 35179c1e64dSHong Zhang N = mat->cmap->N/bs; 35279c1e64dSHong Zhang c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 35379c1e64dSHong Zhang c->N = mat->cmap->N/bs; 35479c1e64dSHong Zhang c->m = mat->rmap->N/bs; 35579c1e64dSHong Zhang c->rstart = 0; 35679c1e64dSHong Zhang 35779c1e64dSHong Zhang c->ncolors = nis; 35879c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 35979c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 36079c1e64dSHong Zhang ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 36185740eacSHong Zhang ierr = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr); 36279c1e64dSHong Zhang 363525d23c0SHong Zhang if (isBAIJ) { 364525d23c0SHong Zhang ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 365525d23c0SHong Zhang } else { 3666378f32dSHong Zhang ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 367525d23c0SHong Zhang } 36879c1e64dSHong Zhang 369476e0d0aSHong Zhang ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr); 37079c1e64dSHong Zhang ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 37179c1e64dSHong Zhang 37217a7dee1SHong Zhang nz = 0; 37379c1e64dSHong Zhang for (i=0; i<nis; i++) { /* loop over colors */ 37479c1e64dSHong Zhang ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 37579c1e64dSHong Zhang ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 37679c1e64dSHong Zhang 37779c1e64dSHong Zhang c->ncolumns[i] = n; 37879c1e64dSHong Zhang if (n) { 37979c1e64dSHong Zhang ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 38079c1e64dSHong Zhang ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 38179c1e64dSHong Zhang } else { 38279c1e64dSHong Zhang c->columns[i] = 0; 38379c1e64dSHong Zhang } 38479c1e64dSHong Zhang 38579c1e64dSHong Zhang /* fast, crude version requires O(N*N) work */ 386476e0d0aSHong Zhang nrows = 0; 38779c1e64dSHong Zhang for (j=0; j<n; j++) { /* loop over columns */ 38879c1e64dSHong Zhang col = is[j]; 38979c1e64dSHong Zhang rows = cj + ci[col]; 39079c1e64dSHong Zhang m = ci[col+1] - ci[col]; 39179c1e64dSHong Zhang for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 39279c1e64dSHong Zhang rowhit[*rows] = col + 1; 393476e0d0aSHong Zhang spidxhit[*rows++] = spidx[ci[col] + k]; 394476e0d0aSHong Zhang nrows++; 39579c1e64dSHong Zhang } 39679c1e64dSHong Zhang } 397476e0d0aSHong Zhang c->nrows[i] = nrows; /* total num of rows for this color */ 39879c1e64dSHong Zhang 39979c1e64dSHong Zhang nrows = 0; 40079c1e64dSHong Zhang for (j=0; j<N; j++) { /* loop over rows */ 40179c1e64dSHong Zhang if (rowhit[j]) { 40217a7dee1SHong Zhang c->rowcolden2sp3[nz++] = j; /* row index */ 40317a7dee1SHong Zhang c->rowcolden2sp3[nz++] = rowhit[j] - 1; /* column index */ 40417a7dee1SHong Zhang c->rowcolden2sp3[nz++] = spidxhit[j]; /* den2sp index */ 40579c1e64dSHong Zhang rowhit[j] = 0.0; /* zero rowhit for reuse */ 40679c1e64dSHong Zhang } 40779c1e64dSHong Zhang } 40879c1e64dSHong Zhang ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 40979c1e64dSHong Zhang } 41017a7dee1SHong Zhang if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz); 41185740eacSHong Zhang 412525d23c0SHong Zhang if (isBAIJ) { 413525d23c0SHong Zhang ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 414525d23c0SHong Zhang } else { 4156378f32dSHong Zhang ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 416525d23c0SHong Zhang } 417476e0d0aSHong Zhang ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr); 41879c1e64dSHong Zhang ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 419476e0d0aSHong Zhang 4204737c552SHong Zhang c->ctype = IS_COLORING_GHOSTED; 421*b582cc96SHong Zhang if (isBAIJ) { 422*b582cc96SHong Zhang mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ; 423*b582cc96SHong Zhang } else { 42479c1e64dSHong Zhang mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 425*b582cc96SHong Zhang } 42652011a10SHong Zhang ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 42779c1e64dSHong Zhang PetscFunctionReturn(0); 42879c1e64dSHong Zhang } 42979c1e64dSHong Zhang 4304a2ae208SSatish Balay #undef __FUNCT__ 4314a2ae208SSatish Balay #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 4323acb8795SBarry Smith /* 4333acb8795SBarry Smith This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 4343acb8795SBarry Smith This is why it has the ugly code with the MatGetBlockSize() 4353acb8795SBarry Smith */ 436dfbe8321SBarry Smith PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 437639f9d9dSBarry Smith { 4386849ba73SBarry Smith PetscErrorCode ierr; 4391a83f524SJed Brown PetscInt i,n,nrows,N,j,k,m,ncols,col; 4401a83f524SJed Brown const PetscInt *is,*rows,*ci,*cj; 4413acb8795SBarry Smith PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 442b9617806SBarry Smith IS *isa; 443ace3abfcSBarry Smith PetscBool done,flg = PETSC_FALSE; 444476e0d0aSHong Zhang PetscBool flg1; 44579c1e64dSHong Zhang PetscBool new_impl=PETSC_FALSE; 446639f9d9dSBarry Smith 4473a40ed3dSBarry Smith PetscFunctionBegin; 44879c1e64dSHong Zhang ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 44979c1e64dSHong Zhang if (new_impl){ 45079c1e64dSHong Zhang ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 45179c1e64dSHong Zhang PetscFunctionReturn(0); 45279c1e64dSHong Zhang } 453e7e72b3dSBarry Smith if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 454522c5e43SBarry Smith 455b9617806SBarry Smith ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 456476e0d0aSHong Zhang /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 457476e0d0aSHong Zhang SeqBAIJ calls this routine, thus check it */ 458251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 459476e0d0aSHong Zhang if (flg1) { 4603acb8795SBarry Smith ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 4613acb8795SBarry Smith } 4623acb8795SBarry Smith 4633acb8795SBarry Smith N = mat->cmap->N/bs; 4643acb8795SBarry Smith c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 4653acb8795SBarry Smith c->N = mat->cmap->N/bs; 4663acb8795SBarry Smith c->m = mat->rmap->N/bs; 467005c665bSBarry Smith c->rstart = 0; 468005c665bSBarry Smith 469639f9d9dSBarry Smith c->ncolors = nis; 47038baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 47138baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 47238baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 47338baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 47438baddfdSBarry Smith ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 47543a90d84SBarry Smith 4763acb8795SBarry Smith ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 477ce94432eSBarry Smith if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 47870c3da92SBarry Smith 47970c3da92SBarry Smith /* 480639f9d9dSBarry Smith Temporary option to allow for debugging/testing 48170c3da92SBarry Smith */ 4820298fd71SBarry Smith ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 48370c3da92SBarry Smith 48438baddfdSBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 48538baddfdSBarry Smith ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 48670c3da92SBarry Smith 487639f9d9dSBarry Smith for (i=0; i<nis; i++) { 488b9b97703SBarry Smith ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 489639f9d9dSBarry Smith ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 4902205254eSKarl Rupp 491639f9d9dSBarry Smith c->ncolumns[i] = n; 492639f9d9dSBarry Smith if (n) { 49338baddfdSBarry Smith ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 49438baddfdSBarry Smith ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 49570c3da92SBarry Smith } else { 496639f9d9dSBarry Smith c->columns[i] = 0; 49770c3da92SBarry Smith } 49870c3da92SBarry Smith 4993a7fca6bSBarry Smith if (!flg) { /* ------------------------------------------------------------------------------*/ 5003a7fca6bSBarry Smith /* fast, crude version requires O(N*N) work */ 50138baddfdSBarry Smith ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 502639f9d9dSBarry Smith /* loop over columns*/ 503639f9d9dSBarry Smith for (j=0; j<n; j++) { 504639f9d9dSBarry Smith col = is[j]; 505639f9d9dSBarry Smith rows = cj + ci[col]; 506639f9d9dSBarry Smith m = ci[col+1] - ci[col]; 507639f9d9dSBarry Smith /* loop over columns marking them in rowhit */ 508639f9d9dSBarry Smith for (k=0; k<m; k++) { 509639f9d9dSBarry Smith rowhit[*rows++] = col + 1; 51070c3da92SBarry Smith } 51170c3da92SBarry Smith } 512639f9d9dSBarry Smith /* count the number of hits */ 513639f9d9dSBarry Smith nrows = 0; 51470c3da92SBarry Smith for (j=0; j<N; j++) { 515639f9d9dSBarry Smith if (rowhit[j]) nrows++; 516639f9d9dSBarry Smith } 517639f9d9dSBarry Smith c->nrows[i] = nrows; 51838baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 51938baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 520639f9d9dSBarry Smith nrows = 0; 521639f9d9dSBarry Smith for (j=0; j<N; j++) { 522639f9d9dSBarry Smith if (rowhit[j]) { 523639f9d9dSBarry Smith c->rows[i][nrows] = j; 524639f9d9dSBarry Smith c->columnsforrow[i][nrows] = rowhit[j] - 1; 525639f9d9dSBarry Smith nrows++; 52670c3da92SBarry Smith } 52770c3da92SBarry Smith } 528639f9d9dSBarry Smith } else { /*-------------------------------------------------------------------------------*/ 5293a7fca6bSBarry Smith /* slow version, using rowhit as a linked list */ 53038baddfdSBarry Smith PetscInt currentcol,fm,mfm; 531639f9d9dSBarry Smith rowhit[N] = N; 532639f9d9dSBarry Smith nrows = 0; 533639f9d9dSBarry Smith /* loop over columns */ 534639f9d9dSBarry Smith for (j=0; j<n; j++) { 535639f9d9dSBarry Smith col = is[j]; 536639f9d9dSBarry Smith rows = cj + ci[col]; 537639f9d9dSBarry Smith m = ci[col+1] - ci[col]; 538639f9d9dSBarry Smith /* loop over columns marking them in rowhit */ 539639f9d9dSBarry Smith fm = N; /* fm points to first entry in linked list */ 540639f9d9dSBarry Smith for (k=0; k<m; k++) { 541639f9d9dSBarry Smith currentcol = *rows++; 542639f9d9dSBarry Smith /* is it already in the list? */ 543639f9d9dSBarry Smith do { 544639f9d9dSBarry Smith mfm = fm; 545639f9d9dSBarry Smith fm = rowhit[fm]; 546639f9d9dSBarry Smith } while (fm < currentcol); 547639f9d9dSBarry Smith /* not in list so add it */ 548639f9d9dSBarry Smith if (fm != currentcol) { 549639f9d9dSBarry Smith nrows++; 550639f9d9dSBarry Smith columnsforrow[currentcol] = col; 551639f9d9dSBarry Smith /* next three lines insert new entry into linked list */ 552639f9d9dSBarry Smith rowhit[mfm] = currentcol; 553639f9d9dSBarry Smith rowhit[currentcol] = fm; 554639f9d9dSBarry Smith fm = currentcol; 555639f9d9dSBarry Smith /* fm points to present position in list since we know the columns are sorted */ 55671cd77b2SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 557639f9d9dSBarry Smith } 558639f9d9dSBarry Smith } 559639f9d9dSBarry Smith c->nrows[i] = nrows; 56038baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 56138baddfdSBarry Smith ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 562639f9d9dSBarry Smith /* now store the linked list of rows into c->rows[i] */ 563639f9d9dSBarry Smith nrows = 0; 564639f9d9dSBarry Smith fm = rowhit[N]; 565639f9d9dSBarry Smith do { 566639f9d9dSBarry Smith c->rows[i][nrows] = fm; 567639f9d9dSBarry Smith c->columnsforrow[i][nrows++] = columnsforrow[fm]; 568639f9d9dSBarry Smith fm = rowhit[fm]; 569639f9d9dSBarry Smith } while (fm < N); 570639f9d9dSBarry Smith } /* ---------------------------------------------------------------------------------------*/ 571639f9d9dSBarry Smith ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 572639f9d9dSBarry Smith } 5733acb8795SBarry Smith ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 574639f9d9dSBarry Smith 575606d414cSSatish Balay ierr = PetscFree(rowhit);CHKERRQ(ierr); 576606d414cSSatish Balay ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 577639f9d9dSBarry Smith 57830b34957SBarry Smith /* Optimize by adding the vscale, and scaleforrow[][] fields */ 57930b34957SBarry Smith /* 58030b34957SBarry Smith see the version for MPIAIJ 58130b34957SBarry Smith */ 582ce94432eSBarry Smith ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 58338baddfdSBarry Smith ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 58430b34957SBarry Smith for (k=0; k<c->ncolors; k++) { 58538baddfdSBarry Smith ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 58630b34957SBarry Smith for (l=0; l<c->nrows[k]; l++) { 58730b34957SBarry Smith col = c->columnsforrow[k][l]; 5882205254eSKarl Rupp 58930b34957SBarry Smith c->vscaleforrow[k][l] = col; 59030b34957SBarry Smith } 59130b34957SBarry Smith } 592b9617806SBarry Smith ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 5933a40ed3dSBarry Smith PetscFunctionReturn(0); 59470c3da92SBarry Smith } 59579c1e64dSHong Zhang 596