1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/baij/seq/baij.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ" 7 PetscErrorCode MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 8 { 9 PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 10 PetscErrorCode ierr; 11 PetscInt bs=J->rmap->bs,bs2=bs*bs,i,j,k,l,row,col,N; 12 PetscScalar dx,*y,*xx,*w3_array; 13 PetscScalar *vscale_array; 14 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 15 Vec w1 = coloring->w1,w2=coloring->w2,w3; 16 void *fctx = coloring->fctx; 17 PetscBool flg = PETSC_FALSE; 18 Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 19 PetscScalar *ca = csp->a,*ca_l; 20 PetscInt brow,bcol,bspidx,spidx,nz,nz_i; 21 22 PetscFunctionBegin; 23 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 24 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 25 if (flg) { 26 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 27 } else { 28 PetscBool assembled; 29 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 30 if (assembled) { 31 ierr = MatZeroEntries(J);CHKERRQ(ierr); 32 } 33 } 34 35 if (!coloring->vscale) { 36 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 37 } 38 39 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 40 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 41 } 42 43 /* Set w1 = F(x1) */ 44 if (!coloring->fset) { 45 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 46 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 47 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 48 } else { 49 coloring->fset = PETSC_FALSE; 50 } 51 52 if (!coloring->w3) { 53 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 54 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 55 } 56 w3 = coloring->w3; 57 58 /* Compute all the local scale factors: vscale = 1./dx = 1./(epsilon*xx) */ 59 ierr = VecGetLocalSize(x1,&N);CHKERRQ(ierr); 60 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 61 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 62 63 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 64 for (col=0; col<N; col++) { 65 if (coloring->htype[0] == 'w') { 66 dx = 1.0 + unorm; 67 } else { 68 dx = xx[col]; 69 } 70 if (dx == (PetscScalar)0.0) dx = 1.0; 71 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 72 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 73 dx *= epsilon; 74 vscale_array[col] = (PetscScalar)dx; 75 } 76 77 nz = 0; 78 for (k=0; k<coloring->ncolors; k++) { /* Loop over each color */ 79 coloring->currentcolor = k; 80 nz_i = nz; 81 for (i=0; i<bs; i++) { 82 nz = nz_i; 83 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 84 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 85 86 /* 87 Loop over each column associated with color 88 adding the perturbation to the vector w3. 89 */ 90 for (l=0; l<coloring->ncolumns[k]; l++) { 91 col = i + bs*coloring->columns[k][l]; 92 w3_array[col] += vscale_array[col]; 93 } 94 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 95 96 /* 97 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 98 w2 = F(x1 + dx) - F(x1) 99 */ 100 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 101 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 102 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 103 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 104 105 /* 106 Loop over rows of vector, putting results into Jacobian matrix 107 */ 108 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 109 for (l=0; l<coloring->nrows[k]; l++) { 110 brow = coloring->rowcolden2sp3[nz++]; 111 bcol = coloring->rowcolden2sp3[nz++]; 112 bspidx = coloring->rowcolden2sp3[nz++]; 113 row = bs*brow; 114 col = bs*bcol + i; 115 116 ca_l = ca + bs2*bspidx + i*bs; 117 for (j=0; j<bs; j++) { 118 ca_l[j] = y[row+j]/vscale_array[col]; 119 } 120 } 121 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 122 } /* endof for (i=0; i<bs; i++) */ 123 } /* endof for each color */ 124 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 125 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 126 127 coloring->currentcolor = -1; 128 PetscFunctionReturn(0); 129 } 130 131 /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */ 132 /* #define JACOBIANCOLOROPT */ 133 #if defined(JACOBIANCOLOROPT) 134 #include <petsctime.h> 135 #endif 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatFDColoringApply_SeqAIJ" 138 PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 139 { 140 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 141 PetscErrorCode ierr; 142 PetscInt k,l,row,col,N,bs=J->rmap->bs,bs2=bs*bs; 143 PetscScalar dx,*y,*xx,*w3_array; 144 PetscScalar *vscale_array; 145 PetscReal epsilon=coloring->error_rel,umin = coloring->umin,unorm; 146 Vec w1=coloring->w1,w2=coloring->w2,w3; 147 void *fctx=coloring->fctx; 148 PetscBool flg=PETSC_FALSE,isBAIJ=PETSC_FALSE; 149 PetscScalar *ca; 150 const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 151 PetscInt **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx; 152 #if defined(JACOBIANCOLOROPT) 153 PetscLogDouble t0,t1,t_init=0.0,t_setvals=0.0,t_func=0.0,t_dx=0.0,t_kl=0.0,t00,t11; 154 #endif 155 156 PetscFunctionBegin; 157 #if defined(JACOBIANCOLOROPT) 158 ierr = PetscTime(&t0);CHKERRQ(ierr); 159 #endif 160 ierr = PetscObjectTypeCompare((PetscObject)J,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 161 if (isBAIJ) { 162 Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 163 ca = csp->a; 164 } else { 165 Mat_SeqAIJ *csp=(Mat_SeqAIJ*)J->data; 166 ca = csp->a; 167 bs = 1; bs2 = 1; 168 } 169 170 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 171 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 172 if (flg) { 173 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 174 } else { 175 PetscBool assembled; 176 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 177 if (assembled) { 178 ierr = MatZeroEntries(J);CHKERRQ(ierr); 179 } 180 } 181 182 if (!coloring->vscale) { 183 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 184 } 185 186 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 187 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 188 } 189 #if defined(JACOBIANCOLOROPT) 190 ierr = PetscTime(&t1);CHKERRQ(ierr); 191 t_init += t1 - t0; 192 #endif 193 194 /* Set w1 = F(x1) */ 195 #if defined(JACOBIANCOLOROPT) 196 ierr = PetscTime(&t0);CHKERRQ(ierr); 197 #endif 198 if (!coloring->fset) { 199 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 200 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 201 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 202 } else { 203 coloring->fset = PETSC_FALSE; 204 } 205 #if defined(JACOBIANCOLOROPT) 206 ierr = PetscTime(&t1);CHKERRQ(ierr); 207 t_setvals += t1 - t0; 208 #endif 209 210 #if defined(JACOBIANCOLOROPT) 211 ierr = PetscTime(&t0);CHKERRQ(ierr); 212 #endif 213 if (!coloring->w3) { 214 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 215 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 216 } 217 w3 = coloring->w3; 218 219 /* Compute scale factors: vscale = 1./dx = 1./(epsilon*xx) */ 220 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 221 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 222 ierr = VecGetSize(x1,&N);CHKERRQ(ierr); 223 for (col=0; col<N; col++) { 224 if (coloring->htype[0] == 'w') { 225 dx = 1.0 + unorm; 226 } else { 227 dx = xx[col]; 228 } 229 if (dx == (PetscScalar)0.0) dx = 1.0; 230 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 231 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 232 dx *= epsilon; 233 vscale_array[col] = (PetscScalar)dx; 234 } 235 #if defined(JACOBIANCOLOROPT) 236 ierr = PetscTime(&t1);CHKERRQ(ierr); 237 t_init += t1 - t0; 238 #endif 239 240 nz = 0; 241 for (k=0; k<ncolors; k++) { /* loop over colors */ 242 #if defined(JACOBIANCOLOROPT) 243 ierr = PetscTime(&t0);CHKERRQ(ierr); 244 #endif 245 coloring->currentcolor = k; 246 247 /* 248 Loop over each column associated with color 249 adding the perturbation to the vector w3 = x1 + dx. 250 */ 251 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 252 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 253 ncolumns_k = ncolumns[k]; 254 for (l=0; l<ncolumns_k; l++) { /* loop over columns */ 255 col = columns[k][l]; 256 w3_array[col] += vscale_array[col]; 257 } 258 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 259 #if defined(JACOBIANCOLOROPT) 260 ierr = PetscTime(&t1);CHKERRQ(ierr); 261 t_dx += t1 - t0; 262 #endif 263 264 /* 265 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 266 w2 = F(x1 + dx) - F(x1) 267 */ 268 #if defined(JACOBIANCOLOROPT) 269 ierr = PetscTime(&t0);CHKERRQ(ierr); 270 #endif 271 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 272 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 273 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 274 #if defined(JACOBIANCOLOROPT) 275 ierr = PetscTime(&t1);CHKERRQ(ierr); 276 t_func += t1 - t0; 277 #endif 278 279 #if defined(JACOBIANCOLOROPT) 280 ierr = PetscTime(&t0);CHKERRQ(ierr); 281 #endif 282 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 283 284 /* 285 Loop over rows of vector, putting w2/dx into Jacobian matrix 286 */ 287 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 288 nrows_k = nrows[k]; 289 for (l=0; l<nrows_k; l++) { /* loop over rows */ 290 #if defined(JACOBIANCOLOROPT) 291 ierr = PetscTime(&t00);CHKERRQ(ierr); 292 #endif 293 row = coloring->rowcolden2sp3[nz++]; 294 col = coloring->rowcolden2sp3[nz++]; 295 spidx = coloring->rowcolden2sp3[nz++]; 296 #if defined(JACOBIANCOLOROPT) 297 ierr = PetscTime(&t11);CHKERRQ(ierr); 298 t_kl += t11 - t00; 299 #endif 300 //printf(" row col val 301 ca[spidx*bs2] = y[row*bs]/vscale_array[col*bs]; 302 //printf(" (%d, %d, %g)\n", row*bs,col*bs,ca[spidx*bs2]); 303 } 304 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 305 306 #if defined(JACOBIANCOLOROPT) 307 ierr = PetscTime(&t1);CHKERRQ(ierr); 308 t_setvals += t1 - t0; 309 #endif 310 } /* endof for each color */ 311 #if defined(JACOBIANCOLOROPT) 312 printf(" FDColorApply time: init %g + func %g + setvalues %g + dx %g= %g\n",t_init,t_func,t_setvals,t_dx,t_init+t_func+t_setvals+t_dx); 313 printf(" FDColorApply time: kl %g\n",t_kl); 314 #endif 315 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 316 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 317 318 coloring->currentcolor = -1; 319 PetscFunctionReturn(0); 320 } 321 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 324 /* 325 This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 326 */ 327 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 328 { 329 PetscErrorCode ierr; 330 PetscInt i,n,nrows,N,j,k,m,ncols,col; 331 const PetscInt *is,*rows,*ci,*cj; 332 PetscInt nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz; 333 IS *isa; 334 PetscBool isBAIJ; 335 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 336 337 PetscFunctionBegin; 338 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 339 340 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 341 SeqBAIJ calls this routine, thus check it */ 342 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 343 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 344 if (!isBAIJ) { 345 bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 346 } 347 N = mat->cmap->N/bs; 348 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 349 c->N = mat->cmap->N/bs; 350 c->m = mat->rmap->N/bs; 351 c->rstart = 0; 352 353 c->ncolors = nis; 354 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 355 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 356 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 357 ierr = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr); 358 359 if (isBAIJ) { 360 ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 361 } else { 362 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 363 } 364 365 ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr); 366 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 367 368 nz = 0; 369 for (i=0; i<nis; i++) { /* loop over colors */ 370 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 371 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 372 373 c->ncolumns[i] = n; 374 if (n) { 375 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 376 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 377 } else { 378 c->columns[i] = 0; 379 } 380 381 /* fast, crude version requires O(N*N) work */ 382 nrows = 0; 383 for (j=0; j<n; j++) { /* loop over columns */ 384 col = is[j]; 385 rows = cj + ci[col]; 386 m = ci[col+1] - ci[col]; 387 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 388 rowhit[*rows] = col + 1; 389 spidxhit[*rows++] = spidx[ci[col] + k]; 390 nrows++; 391 } 392 } 393 c->nrows[i] = nrows; /* total num of rows for this color */ 394 395 nrows = 0; 396 for (j=0; j<N; j++) { /* loop over rows */ 397 if (rowhit[j]) { 398 c->rowcolden2sp3[nz++] = j; /* row index */ 399 c->rowcolden2sp3[nz++] = rowhit[j] - 1; /* column index */ 400 c->rowcolden2sp3[nz++] = spidxhit[j]; /* den2sp index */ 401 rowhit[j] = 0.0; /* zero rowhit for reuse */ 402 } 403 } 404 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 405 } 406 if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz); 407 408 if (isBAIJ) { 409 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 410 } else { 411 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 412 } 413 ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr); 414 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 415 416 c->ctype = IS_COLORING_GHOSTED; 417 if (isBAIJ) { 418 mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ; 419 } else { 420 mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 421 } 422 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 428 /* 429 This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 430 This is why it has the ugly code with the MatGetBlockSize() 431 */ 432 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 433 { 434 PetscErrorCode ierr; 435 PetscInt i,n,nrows,N,j,k,m,ncols,col; 436 const PetscInt *is,*rows,*ci,*cj; 437 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 438 IS *isa; 439 PetscBool done,flg = PETSC_FALSE; 440 PetscBool flg1; 441 PetscBool new_impl=PETSC_FALSE; 442 443 PetscFunctionBegin; 444 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 445 if (new_impl){ 446 ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 447 PetscFunctionReturn(0); 448 } 449 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 450 451 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 452 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 453 SeqBAIJ calls this routine, thus check it */ 454 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 455 if (flg1) { 456 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 457 } 458 459 N = mat->cmap->N/bs; 460 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 461 c->N = mat->cmap->N/bs; 462 c->m = mat->rmap->N/bs; 463 c->rstart = 0; 464 465 c->ncolors = nis; 466 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 467 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 468 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 469 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 470 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 471 472 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 473 if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 474 475 /* 476 Temporary option to allow for debugging/testing 477 */ 478 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 479 480 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 481 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 482 483 for (i=0; i<nis; i++) { 484 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 485 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 486 487 c->ncolumns[i] = n; 488 if (n) { 489 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 490 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 491 } else { 492 c->columns[i] = 0; 493 } 494 495 if (!flg) { /* ------------------------------------------------------------------------------*/ 496 /* fast, crude version requires O(N*N) work */ 497 ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 498 /* loop over columns*/ 499 for (j=0; j<n; j++) { 500 col = is[j]; 501 rows = cj + ci[col]; 502 m = ci[col+1] - ci[col]; 503 /* loop over columns marking them in rowhit */ 504 for (k=0; k<m; k++) { 505 rowhit[*rows++] = col + 1; 506 } 507 } 508 /* count the number of hits */ 509 nrows = 0; 510 for (j=0; j<N; j++) { 511 if (rowhit[j]) nrows++; 512 } 513 c->nrows[i] = nrows; 514 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 515 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 516 nrows = 0; 517 for (j=0; j<N; j++) { 518 if (rowhit[j]) { 519 c->rows[i][nrows] = j; 520 c->columnsforrow[i][nrows] = rowhit[j] - 1; 521 nrows++; 522 } 523 } 524 } else { /*-------------------------------------------------------------------------------*/ 525 /* slow version, using rowhit as a linked list */ 526 PetscInt currentcol,fm,mfm; 527 rowhit[N] = N; 528 nrows = 0; 529 /* loop over columns */ 530 for (j=0; j<n; j++) { 531 col = is[j]; 532 rows = cj + ci[col]; 533 m = ci[col+1] - ci[col]; 534 /* loop over columns marking them in rowhit */ 535 fm = N; /* fm points to first entry in linked list */ 536 for (k=0; k<m; k++) { 537 currentcol = *rows++; 538 /* is it already in the list? */ 539 do { 540 mfm = fm; 541 fm = rowhit[fm]; 542 } while (fm < currentcol); 543 /* not in list so add it */ 544 if (fm != currentcol) { 545 nrows++; 546 columnsforrow[currentcol] = col; 547 /* next three lines insert new entry into linked list */ 548 rowhit[mfm] = currentcol; 549 rowhit[currentcol] = fm; 550 fm = currentcol; 551 /* fm points to present position in list since we know the columns are sorted */ 552 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 553 } 554 } 555 c->nrows[i] = nrows; 556 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 557 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 558 /* now store the linked list of rows into c->rows[i] */ 559 nrows = 0; 560 fm = rowhit[N]; 561 do { 562 c->rows[i][nrows] = fm; 563 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 564 fm = rowhit[fm]; 565 } while (fm < N); 566 } /* ---------------------------------------------------------------------------------------*/ 567 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 568 } 569 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 570 571 ierr = PetscFree(rowhit);CHKERRQ(ierr); 572 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 573 574 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 575 /* 576 see the version for MPIAIJ 577 */ 578 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 579 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 580 for (k=0; k<c->ncolors; k++) { 581 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 582 for (l=0; l<c->nrows[k]; l++) { 583 col = c->columnsforrow[k][l]; 584 585 c->vscaleforrow[k][l] = col; 586 } 587 } 588 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 589 PetscFunctionReturn(0); 590 } 591 592