1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/baij/seq/baij.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ" 7 PetscErrorCode MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 8 { 9 PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 10 PetscErrorCode ierr; 11 PetscInt bs=J->rmap->bs,bs2=bs*bs,i,j,k,start,end,l,row,col,N; 12 PetscScalar dx,*y,*xx,*w3_array; 13 PetscScalar *vscale_array; 14 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 15 Vec w1 = coloring->w1,w2=coloring->w2,w3; 16 void *fctx = coloring->fctx; 17 PetscBool flg = PETSC_FALSE; 18 Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 19 PetscScalar *ca = csp->a; 20 PetscInt brow,bcol,bspidx,spidx,nz,nz_i; 21 22 PetscFunctionBegin; 23 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 24 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 25 if (flg) { 26 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 27 } else { 28 PetscBool assembled; 29 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 30 if (assembled) { 31 ierr = MatZeroEntries(J);CHKERRQ(ierr); 32 } 33 } 34 35 if (!coloring->vscale) { 36 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 37 } 38 39 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 40 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 41 } 42 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 43 44 /* Set w1 = F(x1) */ 45 if (!coloring->fset) { 46 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 47 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 48 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 49 } else { 50 coloring->fset = PETSC_FALSE; 51 } 52 53 if (!coloring->w3) { 54 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 55 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 56 } 57 w3 = coloring->w3; 58 59 /* Compute all the local scale factors: vscale = 1./dx = 1./(epsilon*xx) */ 60 ierr = VecGetLocalSize(x1,&N);CHKERRQ(ierr); 61 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 62 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 63 64 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 65 for (col=0; col<N; col++) { 66 if (coloring->htype[0] == 'w') { 67 dx = 1.0 + unorm; 68 } else { 69 dx = xx[col]; 70 } 71 if (dx == (PetscScalar)0.0) dx = 1.0; 72 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 73 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 74 dx *= epsilon; 75 vscale_array[col] = (PetscScalar)dx; 76 } 77 78 nz = 0; 79 for (k=0; k<coloring->ncolors; k++) { /* Loop over each color */ 80 coloring->currentcolor = k; 81 nz_i = nz; 82 for (i=0; i<bs; i++) { 83 //printf(" color= %d, i= %d, nz_i= %d ----------------\n",k,i,nz_i); 84 nz = nz_i; 85 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 86 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 87 88 /* 89 Loop over each column associated with color 90 adding the perturbation to the vector w3. 91 */ 92 for (l=0; l<coloring->ncolumns[k]; l++) { 93 col = i + bs*coloring->columns[k][l]; 94 w3_array[col] += vscale_array[col]; 95 } 96 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 97 98 /* 99 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 100 w2 = F(x1 + dx) - F(x1) 101 */ 102 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 103 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 104 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 105 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 106 107 /* 108 Loop over rows of vector, putting results into Jacobian matrix 109 */ 110 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 111 for (l=0; l<coloring->nrows[k]; l++) { 112 brow = coloring->rowcolden2sp3[nz++]; 113 bcol = coloring->rowcolden2sp3[nz++]; 114 bspidx = coloring->rowcolden2sp3[nz++]; 115 116 row = bs*brow; 117 col = bs*bcol+i; 118 spidx = bs2*bspidx+i*bs; 119 120 for (j=0; j<bs; j++) { 121 ca[spidx+j] = y[row+j]/vscale_array[col]; 122 //printf("(%d %d %g) nz= %d\n",row+j,col,ca[spidx],nz-3); 123 } 124 } 125 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 126 } 127 } /* endof for each color */ 128 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 129 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 130 131 coloring->currentcolor = -1; 132 PetscFunctionReturn(0); 133 } 134 135 /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */ 136 /* #define JACOBIANCOLOROPT */ 137 #if defined(JACOBIANCOLOROPT) 138 #include <petsctime.h> 139 #endif 140 #undef __FUNCT__ 141 #define __FUNCT__ "MatFDColoringApply_SeqAIJ" 142 PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 143 { 144 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 145 PetscErrorCode ierr; 146 PetscInt k,l,row,col,N,bs=J->rmap->bs,bs2=bs*bs; 147 PetscScalar dx,*y,*xx,*w3_array; 148 PetscScalar *vscale_array; 149 PetscReal epsilon=coloring->error_rel,umin = coloring->umin,unorm; 150 Vec w1=coloring->w1,w2=coloring->w2,w3; 151 void *fctx=coloring->fctx; 152 PetscBool flg=PETSC_FALSE,isBAIJ=PETSC_FALSE; 153 PetscScalar *ca; 154 const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 155 PetscInt **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx; 156 #if defined(JACOBIANCOLOROPT) 157 PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11; 158 #endif 159 160 PetscFunctionBegin; 161 #if defined(JACOBIANCOLOROPT) 162 ierr = PetscTime(&t0);CHKERRQ(ierr); 163 #endif 164 ierr = PetscObjectTypeCompare((PetscObject)J,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 165 if (isBAIJ) { 166 Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 167 ca = csp->a; 168 } else { 169 Mat_SeqAIJ *csp=(Mat_SeqAIJ*)J->data; 170 ca = csp->a; 171 bs = 1; bs2 = 1; 172 } 173 174 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 175 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 176 if (flg) { 177 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 178 } else { 179 PetscBool assembled; 180 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 181 if (assembled) { 182 ierr = MatZeroEntries(J);CHKERRQ(ierr); 183 } 184 } 185 186 if (!coloring->vscale) { 187 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 188 } 189 190 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 191 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 192 } 193 #if defined(JACOBIANCOLOROPT) 194 ierr = PetscTime(&t1);CHKERRQ(ierr); 195 t_init += t1 - t0; 196 #endif 197 198 /* Set w1 = F(x1) */ 199 #if defined(JACOBIANCOLOROPT) 200 ierr = PetscTime(&t0);CHKERRQ(ierr); 201 #endif 202 if (!coloring->fset) { 203 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 204 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 205 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 206 } else { 207 coloring->fset = PETSC_FALSE; 208 } 209 #if defined(JACOBIANCOLOROPT) 210 ierr = PetscTime(&t1);CHKERRQ(ierr); 211 t_setvals += t1 - t0; 212 #endif 213 214 #if defined(JACOBIANCOLOROPT) 215 ierr = PetscTime(&t0);CHKERRQ(ierr); 216 #endif 217 if (!coloring->w3) { 218 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 219 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 220 } 221 w3 = coloring->w3; 222 223 /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */ 224 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 225 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 226 ierr = VecGetSize(x1,&N);CHKERRQ(ierr); 227 for (col=0; col<N; col++) { 228 if (coloring->htype[0] == 'w') { 229 dx = 1.0 + unorm; 230 } else { 231 dx = xx[col]; 232 } 233 if (dx == (PetscScalar)0.0) dx = 1.0; 234 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 235 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 236 dx *= epsilon; 237 vscale_array[col] = (PetscScalar)dx; 238 } 239 #if defined(JACOBIANCOLOROPT) 240 ierr = PetscTime(&t1);CHKERRQ(ierr); 241 t_init += t1 - t0; 242 #endif 243 244 nz = 0; 245 for (k=0; k<ncolors; k++) { /* loop over colors */ 246 #if defined(JACOBIANCOLOROPT) 247 ierr = PetscTime(&t0);CHKERRQ(ierr); 248 #endif 249 coloring->currentcolor = k; 250 251 /* 252 Loop over each column associated with color 253 adding the perturbation to the vector w3 = x1 + dx. 254 */ 255 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 256 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 257 ncolumns_k = ncolumns[k]; 258 for (l=0; l<ncolumns_k; l++) { /* loop over columns */ 259 col = columns[k][l]; 260 w3_array[col] += vscale_array[col]; 261 } 262 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 263 #if defined(JACOBIANCOLOROPT) 264 ierr = PetscTime(&t1);CHKERRQ(ierr); 265 t_dx += t1 - t0; 266 #endif 267 268 /* 269 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 270 w2 = F(x1 + dx) - F(x1) 271 */ 272 #if defined(JACOBIANCOLOROPT) 273 ierr = PetscTime(&t0);CHKERRQ(ierr); 274 #endif 275 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 276 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 277 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 278 #if defined(JACOBIANCOLOROPT) 279 ierr = PetscTime(&t1);CHKERRQ(ierr); 280 t_func += t1 - t0; 281 #endif 282 283 #if defined(JACOBIANCOLOROPT) 284 ierr = PetscTime(&t0);CHKERRQ(ierr); 285 #endif 286 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 287 288 /* 289 Loop over rows of vector, putting w2/dx into Jacobian matrix 290 */ 291 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 292 nrows_k = nrows[k]; 293 for (l=0; l<nrows_k; l++) { /* loop over rows */ 294 #if defined(JACOBIANCOLOROPT) 295 ierr = PetscTime(&t00);CHKERRQ(ierr); 296 #endif 297 row = coloring->rowcolden2sp3[nz++]; 298 col = coloring->rowcolden2sp3[nz++]; 299 spidx = coloring->rowcolden2sp3[nz++]; 300 #if defined(JACOBIANCOLOROPT) 301 ierr = PetscTime(&t11);CHKERRQ(ierr); 302 t_kl += t11 - t00; 303 #endif 304 //printf(" row col val 305 ca[spidx*bs2] = y[row*bs]/vscale_array[col*bs]; 306 //printf(" (%d, %d, %g)\n", row*bs,col*bs,ca[spidx*bs2]); 307 } 308 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 309 310 #if defined(JACOBIANCOLOROPT) 311 ierr = PetscTime(&t1);CHKERRQ(ierr); 312 t_setvals += t1 - t0; 313 #endif 314 } /* endof for each color */ 315 #if defined(JACOBIANCOLOROPT) 316 printf(" FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx); 317 printf(" FDColorApply time: kl %g\n",t_kl); 318 #endif 319 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 320 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 321 322 coloring->currentcolor = -1; 323 PetscFunctionReturn(0); 324 } 325 326 #undef __FUNCT__ 327 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 328 /* 329 This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 330 */ 331 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 332 { 333 PetscErrorCode ierr; 334 PetscInt i,n,nrows,N,j,k,m,ncols,col; 335 const PetscInt *is,*rows,*ci,*cj; 336 PetscInt nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz; 337 IS *isa; 338 PetscBool isBAIJ; 339 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 340 341 PetscFunctionBegin; 342 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 343 344 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 345 SeqBAIJ calls this routine, thus check it */ 346 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 347 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 348 if (!isBAIJ) { 349 bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 350 } 351 N = mat->cmap->N/bs; 352 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 353 c->N = mat->cmap->N/bs; 354 c->m = mat->rmap->N/bs; 355 c->rstart = 0; 356 357 c->ncolors = nis; 358 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 359 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 360 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 361 ierr = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr); 362 363 if (isBAIJ) { 364 ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 365 } else { 366 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 367 } 368 369 ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr); 370 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 371 372 nz = 0; 373 for (i=0; i<nis; i++) { /* loop over colors */ 374 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 375 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 376 377 c->ncolumns[i] = n; 378 if (n) { 379 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 380 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 381 } else { 382 c->columns[i] = 0; 383 } 384 385 /* fast, crude version requires O(N*N) work */ 386 nrows = 0; 387 for (j=0; j<n; j++) { /* loop over columns */ 388 col = is[j]; 389 rows = cj + ci[col]; 390 m = ci[col+1] - ci[col]; 391 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 392 rowhit[*rows] = col + 1; 393 spidxhit[*rows++] = spidx[ci[col] + k]; 394 nrows++; 395 } 396 } 397 c->nrows[i] = nrows; /* total num of rows for this color */ 398 399 nrows = 0; 400 for (j=0; j<N; j++) { /* loop over rows */ 401 if (rowhit[j]) { 402 c->rowcolden2sp3[nz++] = j; /* row index */ 403 c->rowcolden2sp3[nz++] = rowhit[j] - 1; /* column index */ 404 c->rowcolden2sp3[nz++] = spidxhit[j]; /* den2sp index */ 405 rowhit[j] = 0.0; /* zero rowhit for reuse */ 406 } 407 } 408 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 409 } 410 if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz); 411 412 if (isBAIJ) { 413 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 414 } else { 415 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 416 } 417 ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr); 418 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 419 420 c->ctype = IS_COLORING_GHOSTED; 421 if (isBAIJ) { 422 mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ; 423 } else { 424 mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 425 } 426 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 427 PetscFunctionReturn(0); 428 } 429 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 432 /* 433 This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 434 This is why it has the ugly code with the MatGetBlockSize() 435 */ 436 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 437 { 438 PetscErrorCode ierr; 439 PetscInt i,n,nrows,N,j,k,m,ncols,col; 440 const PetscInt *is,*rows,*ci,*cj; 441 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 442 IS *isa; 443 PetscBool done,flg = PETSC_FALSE; 444 PetscBool flg1; 445 PetscBool new_impl=PETSC_FALSE; 446 447 PetscFunctionBegin; 448 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 449 if (new_impl){ 450 ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 451 PetscFunctionReturn(0); 452 } 453 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 454 455 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 456 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 457 SeqBAIJ calls this routine, thus check it */ 458 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 459 if (flg1) { 460 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 461 } 462 463 N = mat->cmap->N/bs; 464 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 465 c->N = mat->cmap->N/bs; 466 c->m = mat->rmap->N/bs; 467 c->rstart = 0; 468 469 c->ncolors = nis; 470 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 471 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 472 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 473 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 474 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 475 476 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 477 if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 478 479 /* 480 Temporary option to allow for debugging/testing 481 */ 482 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 483 484 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 485 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 486 487 for (i=0; i<nis; i++) { 488 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 489 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 490 491 c->ncolumns[i] = n; 492 if (n) { 493 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 494 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 495 } else { 496 c->columns[i] = 0; 497 } 498 499 if (!flg) { /* ------------------------------------------------------------------------------*/ 500 /* fast, crude version requires O(N*N) work */ 501 ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 502 /* loop over columns*/ 503 for (j=0; j<n; j++) { 504 col = is[j]; 505 rows = cj + ci[col]; 506 m = ci[col+1] - ci[col]; 507 /* loop over columns marking them in rowhit */ 508 for (k=0; k<m; k++) { 509 rowhit[*rows++] = col + 1; 510 } 511 } 512 /* count the number of hits */ 513 nrows = 0; 514 for (j=0; j<N; j++) { 515 if (rowhit[j]) nrows++; 516 } 517 c->nrows[i] = nrows; 518 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 519 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 520 nrows = 0; 521 for (j=0; j<N; j++) { 522 if (rowhit[j]) { 523 c->rows[i][nrows] = j; 524 c->columnsforrow[i][nrows] = rowhit[j] - 1; 525 nrows++; 526 } 527 } 528 } else { /*-------------------------------------------------------------------------------*/ 529 /* slow version, using rowhit as a linked list */ 530 PetscInt currentcol,fm,mfm; 531 rowhit[N] = N; 532 nrows = 0; 533 /* loop over columns */ 534 for (j=0; j<n; j++) { 535 col = is[j]; 536 rows = cj + ci[col]; 537 m = ci[col+1] - ci[col]; 538 /* loop over columns marking them in rowhit */ 539 fm = N; /* fm points to first entry in linked list */ 540 for (k=0; k<m; k++) { 541 currentcol = *rows++; 542 /* is it already in the list? */ 543 do { 544 mfm = fm; 545 fm = rowhit[fm]; 546 } while (fm < currentcol); 547 /* not in list so add it */ 548 if (fm != currentcol) { 549 nrows++; 550 columnsforrow[currentcol] = col; 551 /* next three lines insert new entry into linked list */ 552 rowhit[mfm] = currentcol; 553 rowhit[currentcol] = fm; 554 fm = currentcol; 555 /* fm points to present position in list since we know the columns are sorted */ 556 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 557 } 558 } 559 c->nrows[i] = nrows; 560 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 561 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 562 /* now store the linked list of rows into c->rows[i] */ 563 nrows = 0; 564 fm = rowhit[N]; 565 do { 566 c->rows[i][nrows] = fm; 567 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 568 fm = rowhit[fm]; 569 } while (fm < N); 570 } /* ---------------------------------------------------------------------------------------*/ 571 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 572 } 573 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 574 575 ierr = PetscFree(rowhit);CHKERRQ(ierr); 576 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 577 578 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 579 /* 580 see the version for MPIAIJ 581 */ 582 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 583 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 584 for (k=0; k<c->ncolors; k++) { 585 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 586 for (l=0; l<c->nrows[k]; l++) { 587 col = c->columnsforrow[k][l]; 588 589 c->vscaleforrow[k][l] = col; 590 } 591 } 592 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 593 PetscFunctionReturn(0); 594 } 595 596