1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/baij/seq/baij.h> 4 5 /* #define STOREVALS */ 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ" 8 PetscErrorCode MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 9 { 10 PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 11 PetscErrorCode ierr; 12 PetscInt bs=J->rmap->bs,bs2=bs*bs,i,j,k,l,row,col,N; 13 PetscScalar dx,*y,*xx,*w3_array; 14 PetscScalar *vscale_array; 15 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 16 Vec w1 = coloring->w1,w2=coloring->w2,w3; 17 void *fctx = coloring->fctx; 18 PetscBool flg = PETSC_FALSE; 19 Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 20 PetscScalar *ca = csp->a,*ca_l; 21 PetscInt brow,bcol,bspidx,spidx,nz,nz_i; 22 #if defined(STOREVALS) 23 PetscScalar *vals,*vals_l; 24 #endif 25 26 27 PetscFunctionBegin; 28 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 29 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 30 if (flg) { 31 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 32 } else { 33 PetscBool assembled; 34 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 35 if (assembled) { 36 ierr = MatZeroEntries(J);CHKERRQ(ierr); 37 } 38 } 39 40 if (!coloring->vscale) { 41 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 42 } 43 44 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 45 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 46 } 47 48 /* Set w1 = F(x1) */ 49 if (!coloring->fset) { 50 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 51 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 52 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 53 } else { 54 coloring->fset = PETSC_FALSE; 55 } 56 57 if (!coloring->w3) { 58 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 59 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 60 } 61 w3 = coloring->w3; 62 63 /* Compute all the local scale factors: vscale = 1./dx = 1./(epsilon*xx) */ 64 ierr = VecGetLocalSize(x1,&N);CHKERRQ(ierr); 65 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 66 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 67 68 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 69 for (col=0; col<N; col++) { 70 if (coloring->htype[0] == 'w') { 71 dx = 1.0 + unorm; 72 } else { 73 dx = xx[col]; 74 } 75 if (dx == (PetscScalar)0.0) dx = 1.0; 76 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 77 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 78 dx *= epsilon; 79 vscale_array[col] = (PetscScalar)dx; 80 } 81 82 #if defined(STOREVALS) 83 ierr = PetscMalloc(N*bs*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 84 #endif 85 86 nz = 0; 87 for (k=0; k<coloring->ncolors; k++) { /* Loop over each color */ 88 coloring->currentcolor = k; 89 nz_i = nz; 90 for (i=0; i<bs; i++) { 91 nz = nz_i; 92 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 93 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 94 95 /* 96 Loop over each column associated with color 97 adding the perturbation to the vector w3. 98 */ 99 for (l=0; l<coloring->ncolumns[k]; l++) { 100 col = i + bs*coloring->columns[k][l]; 101 w3_array[col] += vscale_array[col]; 102 } 103 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 104 105 /* 106 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 107 w2 = F(x1 + dx) - F(x1) 108 */ 109 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 110 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 111 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 112 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 113 114 /* 115 Loop over rows of vector, putting results into Jacobian matrix 116 */ 117 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 118 for (l=0; l<coloring->nrows[k]; l++) { 119 brow = coloring->rowcolden2sp3[nz++]; 120 bcol = coloring->rowcolden2sp3[nz++]; 121 bspidx = coloring->rowcolden2sp3[nz++]; 122 #if defined(STOREVALS) 123 vals_l = vals + l*bs2; 124 #endif 125 row = bs*brow; 126 col = bs*bcol + i; 127 #if !defined(STOREVALS) 128 ca_l = ca + bs2*bspidx + i*bs; 129 #endif 130 for (j=0; j<bs; j++) { 131 #if defined(STOREVALS) 132 vals_l[i*bs+j] = y[row+j]/vscale_array[col]; 133 #else 134 ca_l[j] = y[row+j]/vscale_array[col]; 135 #endif 136 } 137 } 138 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 139 } /* endof for (i=0; i<bs; i++) */ 140 141 #if defined(STOREVALS) 142 /* insert vals to J */ 143 nz = nz_i; 144 for (l=0; l<coloring->nrows[k]; l++) { 145 nz += 2; 146 bspidx = coloring->rowcolden2sp3[nz++]; 147 vals_l = vals + l*bs2; 148 ca_l = ca + bs2*bspidx; 149 for (i=0; i<bs2; i++) { 150 ca_l[i] = vals_l[i]; 151 } 152 } 153 #endif 154 } /* endof for each color */ 155 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 156 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 157 #if defined(STOREVALS) 158 ierr = PetscFree(vals);CHKERRQ(ierr); 159 #endif 160 161 coloring->currentcolor = -1; 162 PetscFunctionReturn(0); 163 } 164 165 /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */ 166 /* #define JACOBIANCOLOROPT */ 167 #if defined(JACOBIANCOLOROPT) 168 #include <petsctime.h> 169 #endif 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatFDColoringApply_SeqAIJ" 172 PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 173 { 174 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 175 PetscErrorCode ierr; 176 PetscInt k,l,row,col,N,bs=J->rmap->bs,bs2=bs*bs; 177 PetscScalar dx,*y,*xx,*w3_array; 178 PetscScalar *vscale_array; 179 PetscReal epsilon=coloring->error_rel,umin = coloring->umin,unorm; 180 Vec w1=coloring->w1,w2=coloring->w2,w3; 181 void *fctx=coloring->fctx; 182 PetscBool flg=PETSC_FALSE,isBAIJ=PETSC_FALSE; 183 PetscScalar *ca; 184 const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 185 PetscInt **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx; 186 #if defined(JACOBIANCOLOROPT) 187 PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11; 188 #endif 189 190 PetscFunctionBegin; 191 #if defined(JACOBIANCOLOROPT) 192 ierr = PetscTime(&t0);CHKERRQ(ierr); 193 #endif 194 ierr = PetscObjectTypeCompare((PetscObject)J,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 195 if (isBAIJ) { 196 Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 197 ca = csp->a; 198 } else { 199 Mat_SeqAIJ *csp=(Mat_SeqAIJ*)J->data; 200 ca = csp->a; 201 bs = 1; bs2 = 1; 202 } 203 204 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 205 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 206 if (flg) { 207 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 208 } else { 209 PetscBool assembled; 210 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 211 if (assembled) { 212 ierr = MatZeroEntries(J);CHKERRQ(ierr); 213 } 214 } 215 216 if (!coloring->vscale) { 217 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 218 } 219 220 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 221 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 222 } 223 #if defined(JACOBIANCOLOROPT) 224 ierr = PetscTime(&t1);CHKERRQ(ierr); 225 t_init += t1 - t0; 226 #endif 227 228 /* Set w1 = F(x1) */ 229 #if defined(JACOBIANCOLOROPT) 230 ierr = PetscTime(&t0);CHKERRQ(ierr); 231 #endif 232 if (!coloring->fset) { 233 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 234 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 235 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 236 } else { 237 coloring->fset = PETSC_FALSE; 238 } 239 #if defined(JACOBIANCOLOROPT) 240 ierr = PetscTime(&t1);CHKERRQ(ierr); 241 t_setvals += t1 - t0; 242 #endif 243 244 #if defined(JACOBIANCOLOROPT) 245 ierr = PetscTime(&t0);CHKERRQ(ierr); 246 #endif 247 if (!coloring->w3) { 248 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 249 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 250 } 251 w3 = coloring->w3; 252 253 /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */ 254 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 255 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 256 ierr = VecGetSize(x1,&N);CHKERRQ(ierr); 257 for (col=0; col<N; col++) { 258 if (coloring->htype[0] == 'w') { 259 dx = 1.0 + unorm; 260 } else { 261 dx = xx[col]; 262 } 263 if (dx == (PetscScalar)0.0) dx = 1.0; 264 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 265 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 266 dx *= epsilon; 267 vscale_array[col] = (PetscScalar)dx; 268 } 269 #if defined(JACOBIANCOLOROPT) 270 ierr = PetscTime(&t1);CHKERRQ(ierr); 271 t_init += t1 - t0; 272 #endif 273 274 nz = 0; 275 for (k=0; k<ncolors; k++) { /* loop over colors */ 276 #if defined(JACOBIANCOLOROPT) 277 ierr = PetscTime(&t0);CHKERRQ(ierr); 278 #endif 279 coloring->currentcolor = k; 280 281 /* 282 Loop over each column associated with color 283 adding the perturbation to the vector w3 = x1 + dx. 284 */ 285 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 286 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 287 ncolumns_k = ncolumns[k]; 288 for (l=0; l<ncolumns_k; l++) { /* loop over columns */ 289 col = columns[k][l]; 290 w3_array[col] += vscale_array[col]; 291 } 292 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 293 #if defined(JACOBIANCOLOROPT) 294 ierr = PetscTime(&t1);CHKERRQ(ierr); 295 t_dx += t1 - t0; 296 #endif 297 298 /* 299 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 300 w2 = F(x1 + dx) - F(x1) 301 */ 302 #if defined(JACOBIANCOLOROPT) 303 ierr = PetscTime(&t0);CHKERRQ(ierr); 304 #endif 305 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 306 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 307 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 308 #if defined(JACOBIANCOLOROPT) 309 ierr = PetscTime(&t1);CHKERRQ(ierr); 310 t_func += t1 - t0; 311 #endif 312 313 #if defined(JACOBIANCOLOROPT) 314 ierr = PetscTime(&t0);CHKERRQ(ierr); 315 #endif 316 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 317 318 /* 319 Loop over rows of vector, putting w2/dx into Jacobian matrix 320 */ 321 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 322 nrows_k = nrows[k]; 323 for (l=0; l<nrows_k; l++) { /* loop over rows */ 324 #if defined(JACOBIANCOLOROPT) 325 ierr = PetscTime(&t00);CHKERRQ(ierr); 326 #endif 327 row = coloring->rowcolden2sp3[nz++]; 328 col = coloring->rowcolden2sp3[nz++]; 329 spidx = coloring->rowcolden2sp3[nz++]; 330 #if defined(JACOBIANCOLOROPT) 331 ierr = PetscTime(&t11);CHKERRQ(ierr); 332 t_kl += t11 - t00; 333 #endif 334 //printf(" row col val 335 ca[spidx*bs2] = y[row*bs]/vscale_array[col*bs]; 336 //printf(" (%d, %d, %g)\n", row*bs,col*bs,ca[spidx*bs2]); 337 } 338 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 339 340 #if defined(JACOBIANCOLOROPT) 341 ierr = PetscTime(&t1);CHKERRQ(ierr); 342 t_setvals += t1 - t0; 343 #endif 344 } /* endof for each color */ 345 #if defined(JACOBIANCOLOROPT) 346 printf(" FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx); 347 printf(" FDColorApply time: kl %g\n",t_kl); 348 #endif 349 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 350 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 351 352 coloring->currentcolor = -1; 353 PetscFunctionReturn(0); 354 } 355 356 #undef __FUNCT__ 357 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 358 /* 359 This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 360 */ 361 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 362 { 363 PetscErrorCode ierr; 364 PetscInt i,n,nrows,N,j,k,m,ncols,col; 365 const PetscInt *is,*rows,*ci,*cj; 366 PetscInt nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz; 367 IS *isa; 368 PetscBool isBAIJ; 369 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 370 371 PetscFunctionBegin; 372 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 373 374 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 375 SeqBAIJ calls this routine, thus check it */ 376 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 377 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 378 if (!isBAIJ) { 379 bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 380 } 381 N = mat->cmap->N/bs; 382 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 383 c->N = mat->cmap->N/bs; 384 c->m = mat->rmap->N/bs; 385 c->rstart = 0; 386 387 c->ncolors = nis; 388 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 389 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 390 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 391 ierr = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr); 392 393 if (isBAIJ) { 394 ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 395 } else { 396 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 397 } 398 399 ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr); 400 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 401 402 nz = 0; 403 for (i=0; i<nis; i++) { /* loop over colors */ 404 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 405 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 406 407 c->ncolumns[i] = n; 408 if (n) { 409 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 410 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 411 } else { 412 c->columns[i] = 0; 413 } 414 415 /* fast, crude version requires O(N*N) work */ 416 nrows = 0; 417 for (j=0; j<n; j++) { /* loop over columns */ 418 col = is[j]; 419 rows = cj + ci[col]; 420 m = ci[col+1] - ci[col]; 421 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 422 rowhit[*rows] = col + 1; 423 spidxhit[*rows++] = spidx[ci[col] + k]; 424 nrows++; 425 } 426 } 427 c->nrows[i] = nrows; /* total num of rows for this color */ 428 429 nrows = 0; 430 for (j=0; j<N; j++) { /* loop over rows */ 431 if (rowhit[j]) { 432 c->rowcolden2sp3[nz++] = j; /* row index */ 433 c->rowcolden2sp3[nz++] = rowhit[j] - 1; /* column index */ 434 c->rowcolden2sp3[nz++] = spidxhit[j]; /* den2sp index */ 435 rowhit[j] = 0.0; /* zero rowhit for reuse */ 436 } 437 } 438 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 439 } 440 if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz); 441 442 if (isBAIJ) { 443 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 444 } else { 445 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 446 } 447 ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr); 448 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 449 450 c->ctype = IS_COLORING_GHOSTED; 451 if (isBAIJ) { 452 mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ; 453 } else { 454 mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 455 } 456 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 462 /* 463 This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 464 This is why it has the ugly code with the MatGetBlockSize() 465 */ 466 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 467 { 468 PetscErrorCode ierr; 469 PetscInt i,n,nrows,N,j,k,m,ncols,col; 470 const PetscInt *is,*rows,*ci,*cj; 471 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 472 IS *isa; 473 PetscBool done,flg = PETSC_FALSE; 474 PetscBool flg1; 475 PetscBool new_impl=PETSC_FALSE; 476 477 PetscFunctionBegin; 478 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 479 if (new_impl){ 480 ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 484 485 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 486 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 487 SeqBAIJ calls this routine, thus check it */ 488 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 489 if (flg1) { 490 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 491 } 492 493 N = mat->cmap->N/bs; 494 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 495 c->N = mat->cmap->N/bs; 496 c->m = mat->rmap->N/bs; 497 c->rstart = 0; 498 499 c->ncolors = nis; 500 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 501 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 502 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 503 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 504 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 505 506 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 507 if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 508 509 /* 510 Temporary option to allow for debugging/testing 511 */ 512 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 513 514 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 515 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 516 517 for (i=0; i<nis; i++) { 518 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 519 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 520 521 c->ncolumns[i] = n; 522 if (n) { 523 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 524 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 525 } else { 526 c->columns[i] = 0; 527 } 528 529 if (!flg) { /* ------------------------------------------------------------------------------*/ 530 /* fast, crude version requires O(N*N) work */ 531 ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 532 /* loop over columns*/ 533 for (j=0; j<n; j++) { 534 col = is[j]; 535 rows = cj + ci[col]; 536 m = ci[col+1] - ci[col]; 537 /* loop over columns marking them in rowhit */ 538 for (k=0; k<m; k++) { 539 rowhit[*rows++] = col + 1; 540 } 541 } 542 /* count the number of hits */ 543 nrows = 0; 544 for (j=0; j<N; j++) { 545 if (rowhit[j]) nrows++; 546 } 547 c->nrows[i] = nrows; 548 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 549 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 550 nrows = 0; 551 for (j=0; j<N; j++) { 552 if (rowhit[j]) { 553 c->rows[i][nrows] = j; 554 c->columnsforrow[i][nrows] = rowhit[j] - 1; 555 nrows++; 556 } 557 } 558 } else { /*-------------------------------------------------------------------------------*/ 559 /* slow version, using rowhit as a linked list */ 560 PetscInt currentcol,fm,mfm; 561 rowhit[N] = N; 562 nrows = 0; 563 /* loop over columns */ 564 for (j=0; j<n; j++) { 565 col = is[j]; 566 rows = cj + ci[col]; 567 m = ci[col+1] - ci[col]; 568 /* loop over columns marking them in rowhit */ 569 fm = N; /* fm points to first entry in linked list */ 570 for (k=0; k<m; k++) { 571 currentcol = *rows++; 572 /* is it already in the list? */ 573 do { 574 mfm = fm; 575 fm = rowhit[fm]; 576 } while (fm < currentcol); 577 /* not in list so add it */ 578 if (fm != currentcol) { 579 nrows++; 580 columnsforrow[currentcol] = col; 581 /* next three lines insert new entry into linked list */ 582 rowhit[mfm] = currentcol; 583 rowhit[currentcol] = fm; 584 fm = currentcol; 585 /* fm points to present position in list since we know the columns are sorted */ 586 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 587 } 588 } 589 c->nrows[i] = nrows; 590 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 591 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 592 /* now store the linked list of rows into c->rows[i] */ 593 nrows = 0; 594 fm = rowhit[N]; 595 do { 596 c->rows[i][nrows] = fm; 597 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 598 fm = rowhit[fm]; 599 } while (fm < N); 600 } /* ---------------------------------------------------------------------------------------*/ 601 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 602 } 603 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 604 605 ierr = PetscFree(rowhit);CHKERRQ(ierr); 606 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 607 608 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 609 /* 610 see the version for MPIAIJ 611 */ 612 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 613 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 614 for (k=0; k<c->ncolors; k++) { 615 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 616 for (l=0; l<c->nrows[k]; l++) { 617 col = c->columnsforrow[k][l]; 618 619 c->vscaleforrow[k][l] = col; 620 } 621 } 622 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626