1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/baij/seq/baij.h> 4 5 /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */ 6 /* #define JACOBIANCOLOROPT */ 7 #if defined(JACOBIANCOLOROPT) 8 #include <petsctime.h> 9 #endif 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatFDColoringApply_SeqAIJ" 12 PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 13 { 14 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 15 PetscErrorCode ierr; 16 PetscInt k,l,row,col,N; 17 PetscScalar dx,*y,*xx,*w3_array; 18 PetscScalar *vscale_array; 19 PetscReal epsilon=coloring->error_rel,umin = coloring->umin,unorm; 20 Vec w1=coloring->w1,w2=coloring->w2,w3; 21 void *fctx=coloring->fctx; 22 PetscBool flg=PETSC_FALSE; 23 Mat_SeqAIJ *csp=(Mat_SeqAIJ*)J->data; 24 PetscScalar *ca=csp->a; 25 const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 26 PetscInt **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx; 27 #if defined(JACOBIANCOLOROPT) 28 PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11; 29 #endif 30 31 PetscFunctionBegin; 32 #if defined(JACOBIANCOLOROPT) 33 ierr = PetscTime(&t0);CHKERRQ(ierr); 34 #endif 35 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 36 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 37 if (flg) { 38 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 39 } else { 40 PetscBool assembled; 41 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 42 if (assembled) { 43 ierr = MatZeroEntries(J);CHKERRQ(ierr); 44 } 45 } 46 47 if (!coloring->vscale) { 48 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 49 } 50 51 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 52 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 53 } 54 #if defined(JACOBIANCOLOROPT) 55 ierr = PetscTime(&t1);CHKERRQ(ierr); 56 t_init += t1 - t0; 57 #endif 58 59 /* Set w1 = F(x1) */ 60 #if defined(JACOBIANCOLOROPT) 61 ierr = PetscTime(&t0);CHKERRQ(ierr); 62 #endif 63 if (!coloring->fset) { 64 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 65 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 66 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 67 } else { 68 coloring->fset = PETSC_FALSE; 69 } 70 #if defined(JACOBIANCOLOROPT) 71 ierr = PetscTime(&t1);CHKERRQ(ierr); 72 t_setvals += t1 - t0; 73 #endif 74 75 #if defined(JACOBIANCOLOROPT) 76 ierr = PetscTime(&t0);CHKERRQ(ierr); 77 #endif 78 if (!coloring->w3) { 79 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 80 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 81 } 82 w3 = coloring->w3; 83 84 /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */ 85 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 86 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 87 ierr = VecGetSize(x1,&N);CHKERRQ(ierr); 88 for (col=0; col<N; col++) { 89 if (coloring->htype[0] == 'w') { 90 dx = 1.0 + unorm; 91 } else { 92 dx = xx[col]; 93 } 94 if (dx == (PetscScalar)0.0) dx = 1.0; 95 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 96 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 97 dx *= epsilon; 98 vscale_array[col] = (PetscScalar)dx; 99 } 100 #if defined(JACOBIANCOLOROPT) 101 ierr = PetscTime(&t1);CHKERRQ(ierr); 102 t_init += t1 - t0; 103 #endif 104 105 nz = 0; 106 for (k=0; k<ncolors; k++) { /* loop over colors */ 107 #if defined(JACOBIANCOLOROPT) 108 ierr = PetscTime(&t0);CHKERRQ(ierr); 109 #endif 110 coloring->currentcolor = k; 111 112 /* 113 Loop over each column associated with color 114 adding the perturbation to the vector w3 = x1 + dx. 115 */ 116 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 117 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 118 ncolumns_k = ncolumns[k]; 119 for (l=0; l<ncolumns_k; l++) { /* loop over columns */ 120 col = columns[k][l]; 121 w3_array[col] += vscale_array[col]; 122 } 123 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 124 #if defined(JACOBIANCOLOROPT) 125 ierr = PetscTime(&t1);CHKERRQ(ierr); 126 t_dx += t1 - t0; 127 #endif 128 129 /* 130 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 131 w2 = F(x1 + dx) - F(x1) 132 */ 133 #if defined(JACOBIANCOLOROPT) 134 ierr = PetscTime(&t0);CHKERRQ(ierr); 135 #endif 136 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 137 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 138 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 139 #if defined(JACOBIANCOLOROPT) 140 ierr = PetscTime(&t1);CHKERRQ(ierr); 141 t_func += t1 - t0; 142 #endif 143 144 #if defined(JACOBIANCOLOROPT) 145 ierr = PetscTime(&t0);CHKERRQ(ierr); 146 #endif 147 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 148 149 /* 150 Loop over rows of vector, putting w2/dx into Jacobian matrix 151 */ 152 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 153 nrows_k = nrows[k]; 154 for (l=0; l<nrows_k; l++) { /* loop over rows */ 155 #if defined(JACOBIANCOLOROPT) 156 ierr = PetscTime(&t00);CHKERRQ(ierr); 157 #endif 158 row = coloring->rowcolden2sp3[nz++]; 159 col = coloring->rowcolden2sp3[nz++]; 160 spidx = coloring->rowcolden2sp3[nz++]; 161 #if defined(JACOBIANCOLOROPT) 162 ierr = PetscTime(&t11);CHKERRQ(ierr); 163 t_kl += t11 - t00; 164 #endif 165 ca[spidx] = y[row]/vscale_array[col]; 166 } 167 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 168 169 #if defined(JACOBIANCOLOROPT) 170 ierr = PetscTime(&t1);CHKERRQ(ierr); 171 t_setvals += t1 - t0; 172 #endif 173 } /* endof for each color */ 174 #if defined(JACOBIANCOLOROPT) 175 printf(" FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx); 176 printf(" FDColorApply time: kl %g\n",t_kl); 177 #endif 178 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 179 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 180 181 coloring->currentcolor = -1; 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 187 /* 188 This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 189 */ 190 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 191 { 192 PetscErrorCode ierr; 193 PetscInt i,n,nrows,N,j,k,m,ncols,col; 194 const PetscInt *is,*rows,*ci,*cj; 195 PetscInt nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz; 196 IS *isa; 197 PetscBool isBAIJ; 198 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 199 200 PetscFunctionBegin; 201 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 202 203 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 204 SeqBAIJ calls this routine, thus check it */ 205 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 206 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 207 if (!isBAIJ) { 208 bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 209 } 210 N = mat->cmap->N/bs; 211 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 212 c->N = mat->cmap->N/bs; 213 c->m = mat->rmap->N/bs; 214 c->rstart = 0; 215 216 c->ncolors = nis; 217 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 218 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 219 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 220 ierr = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr); 221 222 if (isBAIJ) { 223 ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 224 } else { 225 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 226 } 227 228 ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr); 229 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 230 231 nz = 0; 232 for (i=0; i<nis; i++) { /* loop over colors */ 233 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 234 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 235 236 c->ncolumns[i] = n; 237 if (n) { 238 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 239 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 240 } else { 241 c->columns[i] = 0; 242 } 243 244 /* fast, crude version requires O(N*N) work */ 245 nrows = 0; 246 for (j=0; j<n; j++) { /* loop over columns */ 247 col = is[j]; 248 rows = cj + ci[col]; 249 m = ci[col+1] - ci[col]; 250 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 251 rowhit[*rows] = col + 1; 252 spidxhit[*rows++] = spidx[ci[col] + k]; 253 nrows++; 254 } 255 } 256 c->nrows[i] = nrows; /* total num of rows for this color */ 257 258 nrows = 0; 259 for (j=0; j<N; j++) { /* loop over rows */ 260 if (rowhit[j]) { 261 c->rowcolden2sp3[nz++] = j; /* row index */ 262 c->rowcolden2sp3[nz++] = rowhit[j] - 1; /* column index */ 263 c->rowcolden2sp3[nz++] = spidxhit[j]; /* den2sp index */ 264 rowhit[j] = 0.0; /* zero rowhit for reuse */ 265 } 266 } 267 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 268 } 269 if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz); 270 271 if (isBAIJ) { 272 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 273 } else { 274 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 275 } 276 ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr); 277 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 278 279 c->ctype = IS_COLORING_GHOSTED; 280 mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 281 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 282 PetscFunctionReturn(0); 283 } 284 285 #undef __FUNCT__ 286 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 287 /* 288 This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 289 This is why it has the ugly code with the MatGetBlockSize() 290 */ 291 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 292 { 293 PetscErrorCode ierr; 294 PetscInt i,n,nrows,N,j,k,m,ncols,col; 295 const PetscInt *is,*rows,*ci,*cj; 296 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 297 IS *isa; 298 PetscBool done,flg = PETSC_FALSE; 299 PetscBool flg1; 300 PetscBool new_impl=PETSC_FALSE; 301 302 PetscFunctionBegin; 303 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 304 if (new_impl){ 305 ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 309 310 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 311 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 312 SeqBAIJ calls this routine, thus check it */ 313 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 314 if (flg1) { 315 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 316 } 317 318 N = mat->cmap->N/bs; 319 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 320 c->N = mat->cmap->N/bs; 321 c->m = mat->rmap->N/bs; 322 c->rstart = 0; 323 324 c->ncolors = nis; 325 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 326 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 327 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 328 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 329 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 330 331 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 332 if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 333 334 /* 335 Temporary option to allow for debugging/testing 336 */ 337 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 338 339 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 340 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 341 342 for (i=0; i<nis; i++) { 343 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 344 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 345 346 c->ncolumns[i] = n; 347 if (n) { 348 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 349 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 350 } else { 351 c->columns[i] = 0; 352 } 353 354 if (!flg) { /* ------------------------------------------------------------------------------*/ 355 /* fast, crude version requires O(N*N) work */ 356 ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 357 /* loop over columns*/ 358 for (j=0; j<n; j++) { 359 col = is[j]; 360 rows = cj + ci[col]; 361 m = ci[col+1] - ci[col]; 362 /* loop over columns marking them in rowhit */ 363 for (k=0; k<m; k++) { 364 rowhit[*rows++] = col + 1; 365 } 366 } 367 /* count the number of hits */ 368 nrows = 0; 369 for (j=0; j<N; j++) { 370 if (rowhit[j]) nrows++; 371 } 372 c->nrows[i] = nrows; 373 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 374 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 375 nrows = 0; 376 for (j=0; j<N; j++) { 377 if (rowhit[j]) { 378 c->rows[i][nrows] = j; 379 c->columnsforrow[i][nrows] = rowhit[j] - 1; 380 nrows++; 381 } 382 } 383 } else { /*-------------------------------------------------------------------------------*/ 384 /* slow version, using rowhit as a linked list */ 385 PetscInt currentcol,fm,mfm; 386 rowhit[N] = N; 387 nrows = 0; 388 /* loop over columns */ 389 for (j=0; j<n; j++) { 390 col = is[j]; 391 rows = cj + ci[col]; 392 m = ci[col+1] - ci[col]; 393 /* loop over columns marking them in rowhit */ 394 fm = N; /* fm points to first entry in linked list */ 395 for (k=0; k<m; k++) { 396 currentcol = *rows++; 397 /* is it already in the list? */ 398 do { 399 mfm = fm; 400 fm = rowhit[fm]; 401 } while (fm < currentcol); 402 /* not in list so add it */ 403 if (fm != currentcol) { 404 nrows++; 405 columnsforrow[currentcol] = col; 406 /* next three lines insert new entry into linked list */ 407 rowhit[mfm] = currentcol; 408 rowhit[currentcol] = fm; 409 fm = currentcol; 410 /* fm points to present position in list since we know the columns are sorted */ 411 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 412 } 413 } 414 c->nrows[i] = nrows; 415 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 416 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 417 /* now store the linked list of rows into c->rows[i] */ 418 nrows = 0; 419 fm = rowhit[N]; 420 do { 421 c->rows[i][nrows] = fm; 422 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 423 fm = rowhit[fm]; 424 } while (fm < N); 425 } /* ---------------------------------------------------------------------------------------*/ 426 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 427 } 428 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 429 430 ierr = PetscFree(rowhit);CHKERRQ(ierr); 431 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 432 433 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 434 /* 435 see the version for MPIAIJ 436 */ 437 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 438 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 439 for (k=0; k<c->ncolors; k++) { 440 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 441 for (l=0; l<c->nrows[k]; l++) { 442 col = c->columnsforrow[k][l]; 443 444 c->vscaleforrow[k][l] = col; 445 } 446 } 447 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 448 PetscFunctionReturn(0); 449 } 450 451