1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/baij/seq/baij.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ" 7 PetscErrorCode MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 8 { 9 PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 10 PetscErrorCode ierr; 11 PetscInt bs=J->rmap->bs,bs2=bs*bs,i,j,k,l,row,col,N; 12 PetscScalar dx,*y,*xx,*w3_array; 13 PetscScalar *vscale_array; 14 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 15 Vec w1 = coloring->w1,w2=coloring->w2,w3; 16 void *fctx = coloring->fctx; 17 PetscBool flg = PETSC_FALSE; 18 Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 19 PetscScalar *ca = csp->a,*ca_l; 20 PetscInt brow,bcol,bspidx,nz,nz_i; 21 22 PetscFunctionBegin; 23 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 24 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 25 if (flg) { 26 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 27 } else { 28 PetscBool assembled; 29 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 30 if (assembled) { 31 ierr = MatZeroEntries(J);CHKERRQ(ierr); 32 } 33 } 34 35 if (!coloring->vscale) { 36 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 37 } 38 39 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 40 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 41 } 42 43 /* Set w1 = F(x1) */ 44 if (!coloring->fset) { 45 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 46 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 47 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 48 } else { 49 coloring->fset = PETSC_FALSE; 50 } 51 52 if (!coloring->w3) { 53 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 54 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 55 } 56 w3 = coloring->w3; 57 58 /* Compute all the local scale factors: vscale = 1./dx = 1./(epsilon*xx) */ 59 ierr = VecGetLocalSize(x1,&N);CHKERRQ(ierr); 60 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 61 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 62 63 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 64 for (col=0; col<N; col++) { 65 if (coloring->htype[0] == 'w') { 66 dx = 1.0 + unorm; 67 } else { 68 dx = xx[col]; 69 } 70 if (dx == (PetscScalar)0.0) dx = 1.0; 71 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 72 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 73 dx *= epsilon; 74 vscale_array[col] = (PetscScalar)dx; 75 } 76 77 /* Create bs vectors w2s and w3s */ 78 Vec w2s[bs]; 79 for (i=0; i<bs; i++) { 80 ierr = VecDuplicate(w2,&w2s[i]);CHKERRQ(ierr); 81 } 82 83 nz = 0; 84 for (k=0; k<coloring->ncolors; k++) { /* Loop over each color */ 85 coloring->currentcolor = k; 86 87 /*----------- new -------------*/ 88 /* Compute w3 = x1 + dx */ 89 for (i=0; i<bs; i++) { 90 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 91 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 92 for (l=0; l<coloring->ncolumns[k]; l++) { 93 col = i + bs*coloring->columns[k][l]; 94 w3_array[col] += vscale_array[col]; 95 } 96 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 97 98 /* Evaluate function w2s = F(w3) - F(x1) */ 99 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 100 ierr = (*f)(sctx,w3,w2s[i],fctx);CHKERRQ(ierr); 101 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 102 ierr = VecAXPY(w2s[i],-1.0,w1);CHKERRQ(ierr); 103 } 104 /*----------- endof new -------------*/ 105 106 nz_i = nz; 107 for (i=0; i<bs; i++) { 108 nz = nz_i; 109 /* 110 Loop over rows of vector, putting results into Jacobian matrix 111 */ 112 ierr = VecGetArray(w2s[i],&y);CHKERRQ(ierr); 113 for (l=0; l<coloring->nrows[k]; l++) { 114 brow = coloring->rowcolden2sp3[nz++]; 115 bcol = coloring->rowcolden2sp3[nz++]; 116 bspidx = coloring->rowcolden2sp3[nz++]; 117 row = bs*brow; 118 col = bs*bcol + i; 119 120 ca_l = ca + bs2*bspidx + i*bs; 121 for (j=0; j<bs; j++) { 122 ca_l[j] = y[row+j]/vscale_array[col]; 123 } 124 } 125 ierr = VecRestoreArray(w2s[i],&y);CHKERRQ(ierr); 126 } /* endof for (i=0; i<bs; i++) */ 127 } /* endof for each color */ 128 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 129 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 130 for (i=0; i<bs; i++) { 131 ierr = VecDestroy(&w2s[i]);CHKERRQ(ierr); 132 } 133 coloring->currentcolor = -1; 134 PetscFunctionReturn(0); 135 } 136 137 /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */ 138 /* #define JACOBIANCOLOROPT */ 139 #if defined(JACOBIANCOLOROPT) 140 #include <petsctime.h> 141 #endif 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatFDColoringApply_SeqAIJ" 144 PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 145 { 146 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 147 PetscErrorCode ierr; 148 PetscInt k,l,row,col,N,bs=J->rmap->bs,bs2=bs*bs; 149 PetscScalar dx,*y,*xx,*w3_array; 150 PetscScalar *vscale_array; 151 PetscReal epsilon=coloring->error_rel,umin = coloring->umin,unorm; 152 Vec w1=coloring->w1,w2=coloring->w2,w3; 153 void *fctx=coloring->fctx; 154 PetscBool flg=PETSC_FALSE,isBAIJ=PETSC_FALSE; 155 PetscScalar *ca; 156 const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 157 PetscInt **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx; 158 #if defined(JACOBIANCOLOROPT) 159 PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11; 160 #endif 161 162 PetscFunctionBegin; 163 #if defined(JACOBIANCOLOROPT) 164 ierr = PetscTime(&t0);CHKERRQ(ierr); 165 #endif 166 ierr = PetscObjectTypeCompare((PetscObject)J,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 167 if (isBAIJ) { 168 Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 169 ca = csp->a; 170 } else { 171 Mat_SeqAIJ *csp=(Mat_SeqAIJ*)J->data; 172 ca = csp->a; 173 bs = 1; bs2 = 1; 174 } 175 176 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 177 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 178 if (flg) { 179 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 180 } else { 181 PetscBool assembled; 182 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 183 if (assembled) { 184 ierr = MatZeroEntries(J);CHKERRQ(ierr); 185 } 186 } 187 188 if (!coloring->vscale) { 189 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 190 } 191 192 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 193 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 194 } 195 #if defined(JACOBIANCOLOROPT) 196 ierr = PetscTime(&t1);CHKERRQ(ierr); 197 t_init += t1 - t0; 198 #endif 199 200 /* Set w1 = F(x1) */ 201 #if defined(JACOBIANCOLOROPT) 202 ierr = PetscTime(&t0);CHKERRQ(ierr); 203 #endif 204 if (!coloring->fset) { 205 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 206 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 207 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 208 } else { 209 coloring->fset = PETSC_FALSE; 210 } 211 #if defined(JACOBIANCOLOROPT) 212 ierr = PetscTime(&t1);CHKERRQ(ierr); 213 t_setvals += t1 - t0; 214 #endif 215 216 #if defined(JACOBIANCOLOROPT) 217 ierr = PetscTime(&t0);CHKERRQ(ierr); 218 #endif 219 if (!coloring->w3) { 220 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 221 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 222 } 223 w3 = coloring->w3; 224 225 /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */ 226 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 227 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 228 ierr = VecGetSize(x1,&N);CHKERRQ(ierr); 229 for (col=0; col<N; col++) { 230 if (coloring->htype[0] == 'w') { 231 dx = 1.0 + unorm; 232 } else { 233 dx = xx[col]; 234 } 235 if (dx == (PetscScalar)0.0) dx = 1.0; 236 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 237 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 238 dx *= epsilon; 239 vscale_array[col] = (PetscScalar)dx; 240 } 241 #if defined(JACOBIANCOLOROPT) 242 ierr = PetscTime(&t1);CHKERRQ(ierr); 243 t_init += t1 - t0; 244 #endif 245 246 nz = 0; 247 for (k=0; k<ncolors; k++) { /* loop over colors */ 248 #if defined(JACOBIANCOLOROPT) 249 ierr = PetscTime(&t0);CHKERRQ(ierr); 250 #endif 251 coloring->currentcolor = k; 252 253 /* 254 Loop over each column associated with color 255 adding the perturbation to the vector w3 = x1 + dx. 256 */ 257 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 258 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 259 ncolumns_k = ncolumns[k]; 260 for (l=0; l<ncolumns_k; l++) { /* loop over columns */ 261 col = columns[k][l]; 262 w3_array[col] += vscale_array[col]; 263 } 264 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 265 #if defined(JACOBIANCOLOROPT) 266 ierr = PetscTime(&t1);CHKERRQ(ierr); 267 t_dx += t1 - t0; 268 #endif 269 270 /* 271 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 272 w2 = F(x1 + dx) - F(x1) 273 */ 274 #if defined(JACOBIANCOLOROPT) 275 ierr = PetscTime(&t0);CHKERRQ(ierr); 276 #endif 277 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 278 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 279 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 280 #if defined(JACOBIANCOLOROPT) 281 ierr = PetscTime(&t1);CHKERRQ(ierr); 282 t_func += t1 - t0; 283 #endif 284 285 #if defined(JACOBIANCOLOROPT) 286 ierr = PetscTime(&t0);CHKERRQ(ierr); 287 #endif 288 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 289 290 /* 291 Loop over rows of vector, putting w2/dx into Jacobian matrix 292 */ 293 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 294 nrows_k = nrows[k]; 295 for (l=0; l<nrows_k; l++) { /* loop over rows */ 296 #if defined(JACOBIANCOLOROPT) 297 ierr = PetscTime(&t00);CHKERRQ(ierr); 298 #endif 299 row = coloring->rowcolden2sp3[nz++]; 300 col = coloring->rowcolden2sp3[nz++]; 301 spidx = coloring->rowcolden2sp3[nz++]; 302 #if defined(JACOBIANCOLOROPT) 303 ierr = PetscTime(&t11);CHKERRQ(ierr); 304 t_kl += t11 - t00; 305 #endif 306 //printf(" row col val 307 ca[spidx*bs2] = y[row*bs]/vscale_array[col*bs]; 308 //printf(" (%d, %d, %g)\n", row*bs,col*bs,ca[spidx*bs2]); 309 } 310 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 311 312 #if defined(JACOBIANCOLOROPT) 313 ierr = PetscTime(&t1);CHKERRQ(ierr); 314 t_setvals += t1 - t0; 315 #endif 316 } /* endof for each color */ 317 #if defined(JACOBIANCOLOROPT) 318 printf(" FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx); 319 printf(" FDColorApply time: kl %g\n",t_kl); 320 #endif 321 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 322 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 323 324 coloring->currentcolor = -1; 325 PetscFunctionReturn(0); 326 } 327 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 330 /* 331 This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 332 */ 333 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 334 { 335 PetscErrorCode ierr; 336 PetscInt i,n,nrows,N,j,k,m,ncols,col; 337 const PetscInt *is,*rows,*ci,*cj; 338 PetscInt nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz; 339 IS *isa; 340 PetscBool isBAIJ; 341 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 342 343 PetscFunctionBegin; 344 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 345 346 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 347 SeqBAIJ calls this routine, thus check it */ 348 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 349 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 350 if (!isBAIJ) { 351 bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 352 } 353 N = mat->cmap->N/bs; 354 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 355 c->N = mat->cmap->N/bs; 356 c->m = mat->rmap->N/bs; 357 c->rstart = 0; 358 359 c->ncolors = nis; 360 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 361 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 362 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 363 ierr = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr); 364 365 if (isBAIJ) { 366 ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 367 } else { 368 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 369 } 370 371 ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr); 372 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 373 374 nz = 0; 375 for (i=0; i<nis; i++) { /* loop over colors */ 376 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 377 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 378 379 c->ncolumns[i] = n; 380 if (n) { 381 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 382 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 383 } else { 384 c->columns[i] = 0; 385 } 386 387 /* fast, crude version requires O(N*N) work */ 388 nrows = 0; 389 for (j=0; j<n; j++) { /* loop over columns */ 390 col = is[j]; 391 rows = cj + ci[col]; 392 m = ci[col+1] - ci[col]; 393 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 394 rowhit[*rows] = col + 1; 395 spidxhit[*rows++] = spidx[ci[col] + k]; 396 nrows++; 397 } 398 } 399 c->nrows[i] = nrows; /* total num of rows for this color */ 400 401 nrows = 0; 402 for (j=0; j<N; j++) { /* loop over rows */ 403 if (rowhit[j]) { 404 c->rowcolden2sp3[nz++] = j; /* row index */ 405 c->rowcolden2sp3[nz++] = rowhit[j] - 1; /* column index */ 406 c->rowcolden2sp3[nz++] = spidxhit[j]; /* den2sp index */ 407 rowhit[j] = 0.0; /* zero rowhit for reuse */ 408 } 409 } 410 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 411 } 412 if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz); 413 414 if (isBAIJ) { 415 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 416 } else { 417 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 418 } 419 ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr); 420 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 421 422 c->ctype = IS_COLORING_GHOSTED; 423 if (isBAIJ) { 424 mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ; 425 } else { 426 mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 427 } 428 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 429 PetscFunctionReturn(0); 430 } 431 432 #undef __FUNCT__ 433 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 434 /* 435 This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 436 This is why it has the ugly code with the MatGetBlockSize() 437 */ 438 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 439 { 440 PetscErrorCode ierr; 441 PetscInt i,n,nrows,N,j,k,m,ncols,col; 442 const PetscInt *is,*rows,*ci,*cj; 443 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 444 IS *isa; 445 PetscBool done,flg = PETSC_FALSE; 446 PetscBool flg1; 447 PetscBool new_impl=PETSC_FALSE; 448 449 PetscFunctionBegin; 450 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 451 if (new_impl){ 452 ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 453 PetscFunctionReturn(0); 454 } 455 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 456 457 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 458 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 459 SeqBAIJ calls this routine, thus check it */ 460 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 461 if (flg1) { 462 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 463 } 464 465 N = mat->cmap->N/bs; 466 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 467 c->N = mat->cmap->N/bs; 468 c->m = mat->rmap->N/bs; 469 c->rstart = 0; 470 471 c->ncolors = nis; 472 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 473 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 474 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 475 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 476 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 477 478 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 479 if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 480 481 /* 482 Temporary option to allow for debugging/testing 483 */ 484 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 485 486 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 487 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 488 489 for (i=0; i<nis; i++) { 490 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 491 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 492 493 c->ncolumns[i] = n; 494 if (n) { 495 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 496 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 497 } else { 498 c->columns[i] = 0; 499 } 500 501 if (!flg) { /* ------------------------------------------------------------------------------*/ 502 /* fast, crude version requires O(N*N) work */ 503 ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 504 /* loop over columns*/ 505 for (j=0; j<n; j++) { 506 col = is[j]; 507 rows = cj + ci[col]; 508 m = ci[col+1] - ci[col]; 509 /* loop over columns marking them in rowhit */ 510 for (k=0; k<m; k++) { 511 rowhit[*rows++] = col + 1; 512 } 513 } 514 /* count the number of hits */ 515 nrows = 0; 516 for (j=0; j<N; j++) { 517 if (rowhit[j]) nrows++; 518 } 519 c->nrows[i] = nrows; 520 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 521 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 522 nrows = 0; 523 for (j=0; j<N; j++) { 524 if (rowhit[j]) { 525 c->rows[i][nrows] = j; 526 c->columnsforrow[i][nrows] = rowhit[j] - 1; 527 nrows++; 528 } 529 } 530 } else { /*-------------------------------------------------------------------------------*/ 531 /* slow version, using rowhit as a linked list */ 532 PetscInt currentcol,fm,mfm; 533 rowhit[N] = N; 534 nrows = 0; 535 /* loop over columns */ 536 for (j=0; j<n; j++) { 537 col = is[j]; 538 rows = cj + ci[col]; 539 m = ci[col+1] - ci[col]; 540 /* loop over columns marking them in rowhit */ 541 fm = N; /* fm points to first entry in linked list */ 542 for (k=0; k<m; k++) { 543 currentcol = *rows++; 544 /* is it already in the list? */ 545 do { 546 mfm = fm; 547 fm = rowhit[fm]; 548 } while (fm < currentcol); 549 /* not in list so add it */ 550 if (fm != currentcol) { 551 nrows++; 552 columnsforrow[currentcol] = col; 553 /* next three lines insert new entry into linked list */ 554 rowhit[mfm] = currentcol; 555 rowhit[currentcol] = fm; 556 fm = currentcol; 557 /* fm points to present position in list since we know the columns are sorted */ 558 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 559 } 560 } 561 c->nrows[i] = nrows; 562 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 563 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 564 /* now store the linked list of rows into c->rows[i] */ 565 nrows = 0; 566 fm = rowhit[N]; 567 do { 568 c->rows[i][nrows] = fm; 569 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 570 fm = rowhit[fm]; 571 } while (fm < N); 572 } /* ---------------------------------------------------------------------------------------*/ 573 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 574 } 575 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 576 577 ierr = PetscFree(rowhit);CHKERRQ(ierr); 578 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 579 580 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 581 /* 582 see the version for MPIAIJ 583 */ 584 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 585 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 586 for (k=0; k<c->ncolors; k++) { 587 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 588 for (l=0; l<c->nrows[k]; l++) { 589 col = c->columnsforrow[k][l]; 590 591 c->vscaleforrow[k][l] = col; 592 } 593 } 594 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598