1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/baij/seq/baij.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ" 7 PetscErrorCode MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 8 { 9 PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 10 PetscErrorCode ierr; 11 PetscInt bs=J->rmap->bs,bs2=bs*bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz; 12 PetscScalar dx,*dy_i,*xx,*w3_array,*dy=coloring->dy; 13 PetscScalar *vscale_array; 14 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 15 Vec w1 = coloring->w1,w2=coloring->w2,w3; 16 void *fctx = coloring->fctx; 17 PetscBool flg = PETSC_FALSE; 18 Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 19 PetscScalar *ca = csp->a,*ca_l; 20 21 PetscFunctionBegin; 22 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 23 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 24 if (flg) { 25 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 26 } else { 27 PetscBool assembled; 28 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 29 if (assembled) { 30 ierr = MatZeroEntries(J);CHKERRQ(ierr); 31 } 32 } 33 if (!coloring->vscale) { 34 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 35 } 36 37 /* Set w1 = F(x1) */ 38 if (!coloring->fset) { 39 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 40 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 41 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 42 } else { 43 coloring->fset = PETSC_FALSE; 44 } 45 46 if (!coloring->w3) { 47 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 48 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 49 } 50 w3 = coloring->w3; 51 52 /* Compute local scale factors: vscale = dx */ 53 if (coloring->htype[0] == 'w') { 54 /* tacky test; need to make systematic if we add other approaches to computing h*/ 55 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 56 dx = (1.0 + unorm)*epsilon; 57 ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr); 58 } else { 59 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 60 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 61 for (col=0; col<N; col++) { 62 dx = xx[col]; 63 if (dx == 0.0) dx = 1.0; 64 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 65 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 66 dx *= epsilon; 67 vscale_array[col] = dx; 68 } 69 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 70 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 71 } 72 73 nz = 0; 74 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 75 for (k=0; k<coloring->ncolors; k++) { /* Loop over each color */ 76 coloring->currentcolor = k; 77 78 /* Compute w3 = x1 + dx */ 79 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 80 dy_i = dy; 81 for (i=0; i<bs; i++) { /* Loop over a block of columns */ 82 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 83 for (l=0; l<coloring->ncolumns[k]; l++) { 84 col = i + bs*coloring->columns[k][l]; 85 w3_array[col] += vscale_array[col]; 86 if (i) { 87 w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */ 88 } 89 } 90 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 91 92 /* Evaluate function w2 = F(w3) - F(x1) */ 93 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 94 ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */ 95 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 96 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 97 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 98 ierr = VecResetArray(w2);CHKERRQ(ierr); 99 dy_i += N; /* points to dy+i*N */ 100 } 101 102 /* Loop over row blocks, putting dy/dx into Jacobian matrix */ 103 for (l=0; l<coloring->nrows[k]; l++) { 104 row = bs*coloring->rowcolden2sp3[nz++]; 105 col = bs*coloring->rowcolden2sp3[nz++]; 106 ca_l = ca + bs2*coloring->rowcolden2sp3[nz++]; 107 spidx = 0; 108 dy_i = dy; 109 for (i=0; i<bs; i++) { /* column of the block */ 110 for (j=0; j<bs; j++) { /* row of the block */ 111 ca_l[spidx++] = dy_i[row+j]/vscale_array[col+i]; 112 } 113 dy_i += N; /* points to dy+i*N */ 114 } 115 } 116 } /* endof for each color */ 117 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 118 119 coloring->currentcolor = -1; 120 PetscFunctionReturn(0); 121 } 122 123 /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */ 124 /* #define JACOBIANCOLOROPT */ 125 #if defined(JACOBIANCOLOROPT) 126 #include <petsctime.h> 127 #endif 128 #undef __FUNCT__ 129 #define __FUNCT__ "MatFDColoringApply_SeqAIJ" 130 PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 131 { 132 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 133 PetscErrorCode ierr; 134 PetscInt k,l,row,col,N,bs=J->rmap->bs,bs2=bs*bs; 135 PetscScalar dx,*y,*xx,*w3_array; 136 PetscScalar *vscale_array; 137 PetscReal epsilon=coloring->error_rel,umin = coloring->umin,unorm; 138 Vec w1=coloring->w1,w2=coloring->w2,w3; 139 void *fctx=coloring->fctx; 140 PetscBool flg=PETSC_FALSE,isBAIJ=PETSC_FALSE; 141 PetscScalar *ca; 142 const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 143 PetscInt **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx; 144 #if defined(JACOBIANCOLOROPT) 145 PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11; 146 #endif 147 148 PetscFunctionBegin; 149 #if defined(JACOBIANCOLOROPT) 150 ierr = PetscTime(&t0);CHKERRQ(ierr); 151 #endif 152 ierr = PetscObjectTypeCompare((PetscObject)J,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 153 if (isBAIJ) { 154 Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 155 ca = csp->a; 156 } else { 157 Mat_SeqAIJ *csp=(Mat_SeqAIJ*)J->data; 158 ca = csp->a; 159 bs = 1; bs2 = 1; 160 } 161 162 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 163 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 164 if (flg) { 165 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 166 } else { 167 PetscBool assembled; 168 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 169 if (assembled) { 170 ierr = MatZeroEntries(J);CHKERRQ(ierr); 171 } 172 } 173 174 if (!coloring->vscale) { 175 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 176 } 177 178 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 179 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 180 } 181 #if defined(JACOBIANCOLOROPT) 182 ierr = PetscTime(&t1);CHKERRQ(ierr); 183 t_init += t1 - t0; 184 #endif 185 186 /* Set w1 = F(x1) */ 187 #if defined(JACOBIANCOLOROPT) 188 ierr = PetscTime(&t0);CHKERRQ(ierr); 189 #endif 190 if (!coloring->fset) { 191 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 192 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 193 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 194 } else { 195 coloring->fset = PETSC_FALSE; 196 } 197 #if defined(JACOBIANCOLOROPT) 198 ierr = PetscTime(&t1);CHKERRQ(ierr); 199 t_setvals += t1 - t0; 200 #endif 201 202 #if defined(JACOBIANCOLOROPT) 203 ierr = PetscTime(&t0);CHKERRQ(ierr); 204 #endif 205 if (!coloring->w3) { 206 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 207 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 208 } 209 w3 = coloring->w3; 210 211 /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */ 212 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 213 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 214 ierr = VecGetSize(x1,&N);CHKERRQ(ierr); 215 for (col=0; col<N; col++) { 216 if (coloring->htype[0] == 'w') { 217 dx = 1.0 + unorm; 218 } else { 219 dx = xx[col]; 220 } 221 if (dx == (PetscScalar)0.0) dx = 1.0; 222 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 223 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 224 dx *= epsilon; 225 vscale_array[col] = (PetscScalar)dx; 226 } 227 #if defined(JACOBIANCOLOROPT) 228 ierr = PetscTime(&t1);CHKERRQ(ierr); 229 t_init += t1 - t0; 230 #endif 231 232 nz = 0; 233 for (k=0; k<ncolors; k++) { /* loop over colors */ 234 #if defined(JACOBIANCOLOROPT) 235 ierr = PetscTime(&t0);CHKERRQ(ierr); 236 #endif 237 coloring->currentcolor = k; 238 239 /* 240 Loop over each column associated with color 241 adding the perturbation to the vector w3 = x1 + dx. 242 */ 243 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 244 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 245 ncolumns_k = ncolumns[k]; 246 for (l=0; l<ncolumns_k; l++) { /* loop over columns */ 247 col = columns[k][l]; 248 w3_array[col] += vscale_array[col]; 249 } 250 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 251 #if defined(JACOBIANCOLOROPT) 252 ierr = PetscTime(&t1);CHKERRQ(ierr); 253 t_dx += t1 - t0; 254 #endif 255 256 /* 257 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 258 w2 = F(x1 + dx) - F(x1) 259 */ 260 #if defined(JACOBIANCOLOROPT) 261 ierr = PetscTime(&t0);CHKERRQ(ierr); 262 #endif 263 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 264 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 265 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 266 #if defined(JACOBIANCOLOROPT) 267 ierr = PetscTime(&t1);CHKERRQ(ierr); 268 t_func += t1 - t0; 269 #endif 270 271 #if defined(JACOBIANCOLOROPT) 272 ierr = PetscTime(&t0);CHKERRQ(ierr); 273 #endif 274 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 275 276 /* 277 Loop over rows of vector, putting w2/dx into Jacobian matrix 278 */ 279 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 280 nrows_k = nrows[k]; 281 for (l=0; l<nrows_k; l++) { /* loop over rows */ 282 #if defined(JACOBIANCOLOROPT) 283 ierr = PetscTime(&t00);CHKERRQ(ierr); 284 #endif 285 row = coloring->rowcolden2sp3[nz++]; 286 col = coloring->rowcolden2sp3[nz++]; 287 spidx = coloring->rowcolden2sp3[nz++]; 288 #if defined(JACOBIANCOLOROPT) 289 ierr = PetscTime(&t11);CHKERRQ(ierr); 290 t_kl += t11 - t00; 291 #endif 292 //printf(" row col val 293 ca[spidx*bs2] = y[row*bs]/vscale_array[col*bs]; 294 //printf(" (%d, %d, %g)\n", row*bs,col*bs,ca[spidx*bs2]); 295 } 296 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 297 298 #if defined(JACOBIANCOLOROPT) 299 ierr = PetscTime(&t1);CHKERRQ(ierr); 300 t_setvals += t1 - t0; 301 #endif 302 } /* endof for each color */ 303 #if defined(JACOBIANCOLOROPT) 304 printf(" FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx); 305 printf(" FDColorApply time: kl %g\n",t_kl); 306 #endif 307 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 308 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 309 310 coloring->currentcolor = -1; 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 316 /* 317 This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 318 */ 319 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 320 { 321 PetscErrorCode ierr; 322 PetscInt i,n,nrows,N,j,k,m,ncols,col; 323 const PetscInt *is,*rows,*ci,*cj; 324 PetscInt nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz; 325 IS *isa; 326 PetscBool isBAIJ; 327 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 328 329 PetscFunctionBegin; 330 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 331 332 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 333 SeqBAIJ calls this routine, thus check it */ 334 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 335 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 336 if (!isBAIJ) { 337 bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 338 } 339 N = mat->cmap->N/bs; 340 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 341 c->N = mat->cmap->N/bs; 342 c->m = mat->rmap->N/bs; 343 c->rstart = 0; 344 345 c->ncolors = nis; 346 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 347 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 348 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 349 ierr = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr); 350 351 if (isBAIJ) { 352 ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 353 } else { 354 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 355 } 356 357 ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr); 358 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 359 360 nz = 0; 361 for (i=0; i<nis; i++) { /* loop over colors */ 362 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 363 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 364 365 c->ncolumns[i] = n; 366 if (n) { 367 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 368 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 369 } else { 370 c->columns[i] = 0; 371 } 372 373 /* fast, crude version requires O(N*N) work */ 374 nrows = 0; 375 for (j=0; j<n; j++) { /* loop over columns */ 376 col = is[j]; 377 rows = cj + ci[col]; 378 m = ci[col+1] - ci[col]; 379 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 380 rowhit[*rows] = col + 1; 381 spidxhit[*rows++] = spidx[ci[col] + k]; 382 nrows++; 383 } 384 } 385 c->nrows[i] = nrows; /* total num of rows for this color */ 386 387 nrows = 0; 388 for (j=0; j<N; j++) { /* loop over rows */ 389 if (rowhit[j]) { 390 c->rowcolden2sp3[nz++] = j; /* row index */ 391 c->rowcolden2sp3[nz++] = rowhit[j] - 1; /* column index */ 392 c->rowcolden2sp3[nz++] = spidxhit[j]; /* den2sp index */ 393 rowhit[j] = 0.0; /* zero rowhit for reuse */ 394 } 395 } 396 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 397 } 398 if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz); 399 400 if (isBAIJ) { 401 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 402 } else { 403 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 404 } 405 ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr); 406 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 407 408 c->ctype = IS_COLORING_GHOSTED; 409 if (isBAIJ) { 410 mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ; 411 ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr); 412 } else { 413 mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 414 } 415 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 421 /* 422 This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 423 This is why it has the ugly code with the MatGetBlockSize() 424 */ 425 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 426 { 427 PetscErrorCode ierr; 428 PetscInt i,n,nrows,N,j,k,m,ncols,col; 429 const PetscInt *is,*rows,*ci,*cj; 430 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 431 IS *isa; 432 PetscBool done,flg = PETSC_FALSE; 433 PetscBool flg1; 434 PetscBool new_impl=PETSC_FALSE; 435 436 PetscFunctionBegin; 437 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 438 if (new_impl){ 439 ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 443 444 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 445 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 446 SeqBAIJ calls this routine, thus check it */ 447 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 448 if (flg1) { 449 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 450 } 451 452 N = mat->cmap->N/bs; 453 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 454 c->N = mat->cmap->N/bs; 455 c->m = mat->rmap->N/bs; 456 c->rstart = 0; 457 458 c->ncolors = nis; 459 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 460 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 461 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 462 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 463 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 464 465 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 466 if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 467 468 /* 469 Temporary option to allow for debugging/testing 470 */ 471 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 472 473 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 474 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 475 476 for (i=0; i<nis; i++) { 477 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 478 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 479 480 c->ncolumns[i] = n; 481 if (n) { 482 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 483 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 484 } else { 485 c->columns[i] = 0; 486 } 487 488 if (!flg) { /* ------------------------------------------------------------------------------*/ 489 /* fast, crude version requires O(N*N) work */ 490 ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 491 /* loop over columns*/ 492 for (j=0; j<n; j++) { 493 col = is[j]; 494 rows = cj + ci[col]; 495 m = ci[col+1] - ci[col]; 496 /* loop over columns marking them in rowhit */ 497 for (k=0; k<m; k++) { 498 rowhit[*rows++] = col + 1; 499 } 500 } 501 /* count the number of hits */ 502 nrows = 0; 503 for (j=0; j<N; j++) { 504 if (rowhit[j]) nrows++; 505 } 506 c->nrows[i] = nrows; 507 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 508 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 509 nrows = 0; 510 for (j=0; j<N; j++) { 511 if (rowhit[j]) { 512 c->rows[i][nrows] = j; 513 c->columnsforrow[i][nrows] = rowhit[j] - 1; 514 nrows++; 515 } 516 } 517 } else { /*-------------------------------------------------------------------------------*/ 518 /* slow version, using rowhit as a linked list */ 519 PetscInt currentcol,fm,mfm; 520 rowhit[N] = N; 521 nrows = 0; 522 /* loop over columns */ 523 for (j=0; j<n; j++) { 524 col = is[j]; 525 rows = cj + ci[col]; 526 m = ci[col+1] - ci[col]; 527 /* loop over columns marking them in rowhit */ 528 fm = N; /* fm points to first entry in linked list */ 529 for (k=0; k<m; k++) { 530 currentcol = *rows++; 531 /* is it already in the list? */ 532 do { 533 mfm = fm; 534 fm = rowhit[fm]; 535 } while (fm < currentcol); 536 /* not in list so add it */ 537 if (fm != currentcol) { 538 nrows++; 539 columnsforrow[currentcol] = col; 540 /* next three lines insert new entry into linked list */ 541 rowhit[mfm] = currentcol; 542 rowhit[currentcol] = fm; 543 fm = currentcol; 544 /* fm points to present position in list since we know the columns are sorted */ 545 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 546 } 547 } 548 c->nrows[i] = nrows; 549 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 550 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 551 /* now store the linked list of rows into c->rows[i] */ 552 nrows = 0; 553 fm = rowhit[N]; 554 do { 555 c->rows[i][nrows] = fm; 556 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 557 fm = rowhit[fm]; 558 } while (fm < N); 559 } /* ---------------------------------------------------------------------------------------*/ 560 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 561 } 562 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 563 564 ierr = PetscFree(rowhit);CHKERRQ(ierr); 565 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 566 567 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 568 /* 569 see the version for MPIAIJ 570 */ 571 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 572 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 573 for (k=0; k<c->ncolors; k++) { 574 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 575 for (l=0; l<c->nrows[k]; l++) { 576 col = c->columnsforrow[k][l]; 577 578 c->vscaleforrow[k][l] = col; 579 } 580 } 581 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 582 PetscFunctionReturn(0); 583 } 584 585