1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/baij/seq/baij.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ" 7 PetscErrorCode MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 8 { 9 PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 10 PetscErrorCode ierr; 11 PetscInt bs=J->rmap->bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz; 12 PetscScalar dx,*dy_i,*xx,*w3_array,*dy=coloring->dy; 13 PetscScalar *vscale_array; 14 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 15 Vec w1 = coloring->w1,w2=coloring->w2,w3; 16 void *fctx = coloring->fctx; 17 PetscBool flg = PETSC_FALSE; 18 PetscScalar *valaddr; 19 MatEntry *Jentry=coloring->matentry; 20 21 PetscFunctionBegin; 22 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 23 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 24 if (flg) { 25 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 26 } else { 27 PetscBool assembled; 28 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 29 if (assembled) { 30 ierr = MatZeroEntries(J);CHKERRQ(ierr); 31 } 32 } 33 if (!coloring->vscale) { 34 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 35 } 36 37 /* Set w1 = F(x1) */ 38 if (!coloring->fset) { 39 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 40 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 41 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 42 } else { 43 coloring->fset = PETSC_FALSE; 44 } 45 46 if (!coloring->w3) { 47 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 48 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 49 } 50 w3 = coloring->w3; 51 52 /* Compute local scale factors: vscale = dx */ 53 if (coloring->htype[0] == 'w') { 54 /* tacky test; need to make systematic if we add other approaches to computing h*/ 55 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 56 dx = (1.0 + unorm)*epsilon; 57 ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr); 58 } else { 59 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 60 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 61 for (col=0; col<N; col++) { 62 dx = xx[col]; 63 if (dx == 0.0) dx = 1.0; 64 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 65 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 66 dx *= epsilon; 67 vscale_array[col] = dx; 68 } 69 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 70 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 71 } 72 73 nz = 0; 74 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 75 for (k=0; k<coloring->ncolors; k++) { /* Loop over each color */ 76 coloring->currentcolor = k; 77 78 /* Compute w3 = x1 + dx */ 79 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 80 dy_i = dy; 81 for (i=0; i<bs; i++) { /* Loop over a block of columns */ 82 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 83 for (l=0; l<coloring->ncolumns[k]; l++) { 84 col = i + bs*coloring->columns[k][l]; 85 w3_array[col] += vscale_array[col]; 86 if (i) { 87 w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */ 88 } 89 } 90 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 91 92 /* Evaluate function w2 = F(w3) - F(x1) */ 93 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 94 ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */ 95 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 96 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 97 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 98 ierr = VecResetArray(w2);CHKERRQ(ierr); 99 dy_i += N; /* points to dy+i*N */ 100 } 101 102 /* Loop over row blocks, putting dy/dx into Jacobian matrix */ 103 for (l=0; l<coloring->nrows[k]; l++) { 104 row = bs*Jentry[nz].row; 105 col = bs*Jentry[nz].col; 106 valaddr = Jentry[nz].valaddr; 107 nz++; 108 spidx = 0; 109 dy_i = dy; 110 for (i=0; i<bs; i++) { /* column of the block */ 111 for (j=0; j<bs; j++) { /* row of the block */ 112 valaddr[spidx++] = dy_i[row+j]/vscale_array[col+i]; 113 } 114 dy_i += N; /* points to dy+i*N */ 115 } 116 } 117 } /* endof for each color */ 118 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 119 120 coloring->currentcolor = -1; 121 PetscFunctionReturn(0); 122 } 123 124 /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */ 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFDColoringApply_SeqAIJ" 127 PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 128 { 129 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 130 PetscErrorCode ierr; 131 PetscInt k,l,row,col,N; 132 PetscScalar dx,*y,*xx,*w3_array; 133 PetscScalar *vscale_array; 134 PetscReal epsilon=coloring->error_rel,umin = coloring->umin,unorm; 135 Vec w1=coloring->w1,w2=coloring->w2,w3; 136 void *fctx=coloring->fctx; 137 PetscBool flg=PETSC_FALSE; 138 const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 139 PetscInt **columns=coloring->columns,ncolumns_k,nrows_k,nz; 140 MatEntry *Jentry=coloring->matentry; 141 142 PetscFunctionBegin; 143 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 144 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 145 if (flg) { 146 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 147 } else { 148 PetscBool assembled; 149 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 150 if (assembled) { 151 ierr = MatZeroEntries(J);CHKERRQ(ierr); 152 } 153 } 154 if (!coloring->vscale) { 155 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 156 } 157 158 /* Set w1 = F(x1) */ 159 if (!coloring->fset) { 160 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 161 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 162 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 163 } else { 164 coloring->fset = PETSC_FALSE; 165 } 166 167 if (!coloring->w3) { 168 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 169 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 170 } 171 w3 = coloring->w3; 172 173 /* Compute scale factors: vscale = dx */ 174 if (coloring->htype[0] == 'w') { 175 /* tacky test; need to make systematic if we add other approaches to computing h*/ 176 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 177 dx = (1.0 + unorm)*epsilon; 178 ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr); 179 } else { 180 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 181 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 182 ierr = VecGetSize(x1,&N);CHKERRQ(ierr); 183 for (col=0; col<N; col++) { 184 dx = xx[col]; 185 if (dx == 0.0) dx = 1.0; 186 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 187 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 188 dx *= epsilon; 189 vscale_array[col] = dx; 190 } 191 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 192 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 193 } 194 195 nz = 0; 196 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 197 for (k=0; k<ncolors; k++) { /* loop over colors */ 198 coloring->currentcolor = k; 199 200 /* 201 Loop over each column associated with color 202 adding the perturbation to the vector w3 = x1 + dx. 203 */ 204 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 205 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 206 ncolumns_k = ncolumns[k]; 207 for (l=0; l<ncolumns_k; l++) { /* loop over columns */ 208 col = columns[k][l]; 209 w3_array[col] += vscale_array[col]; 210 } 211 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 212 213 /* 214 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 215 w2 = F(x1 + dx) - F(x1) 216 */ 217 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 218 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 219 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 220 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 221 222 /* 223 Loop over rows of vector, putting w2/dx into Jacobian matrix 224 */ 225 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 226 nrows_k = nrows[k]; 227 nrows_k = nrows[k]; 228 for (l=0; l<nrows_k; l++) { /* loop over rows */ 229 row = Jentry[nz].row; 230 col = Jentry[nz].col; 231 *(Jentry[nz++].valaddr) = y[row]/vscale_array[col]; 232 } 233 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 234 } /* endof for each color */ 235 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 236 237 coloring->currentcolor = -1; 238 PetscFunctionReturn(0); 239 } 240 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 243 /* 244 This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 245 */ 246 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 247 { 248 PetscErrorCode ierr; 249 PetscInt i,n,nrows,N,j,k,m,ncols,col; 250 const PetscInt *is,*rows,*ci,*cj; 251 PetscInt nis=iscoloring->n,*rowhit,*columnsforrow,bs,bs2,*spidx,*spidxhit,nz; 252 IS *isa; 253 PetscBool isBAIJ; 254 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 255 PetscScalar *mat_val=csp->a; 256 PetscScalar **valaddrhit; 257 MatEntry *Jentry; 258 259 PetscFunctionBegin; 260 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 261 262 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 263 SeqBAIJ calls this routine, thus check it */ 264 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 265 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 266 if (!isBAIJ) { 267 bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 268 } 269 N = mat->cmap->N/bs; 270 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 271 c->N = mat->cmap->N/bs; 272 c->m = mat->rmap->N/bs; 273 c->rstart = 0; 274 275 c->ncolors = nis; 276 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 277 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 278 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 279 280 ierr = PetscMalloc(csp->nz*sizeof(MatEntry),&Jentry);CHKERRQ(ierr); 281 ierr = PetscLogObjectMemory((PetscObject)c,csp->nz*sizeof(MatEntry));CHKERRQ(ierr); 282 c->matentry = Jentry; 283 284 if (isBAIJ) { 285 ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 286 } else { 287 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 288 } 289 290 ierr = PetscMalloc4(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit,c->m,PetscScalar*,&valaddrhit);CHKERRQ(ierr); 291 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 292 293 nz = 0; 294 for (i=0; i<nis; i++) { /* loop over colors */ 295 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 296 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 297 298 c->ncolumns[i] = n; 299 if (n) { 300 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 301 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 302 } else { 303 c->columns[i] = 0; 304 } 305 306 /* fast, crude version requires O(N*N) work */ 307 bs2 = bs*bs; 308 nrows = 0; 309 for (j=0; j<n; j++) { /* loop over columns */ 310 col = is[j]; 311 rows = cj + ci[col]; 312 m = ci[col+1] - ci[col]; 313 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 314 rowhit[*rows] = col + 1; 315 spidxhit[*rows] = spidx[ci[col] + k]; 316 valaddrhit[*rows] = &mat_val[bs2*spidx[ci[col] + k]]; 317 rows++; 318 nrows++; 319 } 320 } 321 c->nrows[i] = nrows; /* total num of rows for this color */ 322 323 nrows = 0; 324 for (j=0; j<N; j++) { /* loop over rows */ 325 if (rowhit[j]) { 326 Jentry[nz].row = j; /* local row index */ 327 Jentry[nz].col = rowhit[j] - 1; /* local column index */ 328 Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 329 nz++; 330 rowhit[j] = 0.0; /* zero rowhit for reuse */ 331 } 332 } 333 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 334 } 335 if (nz != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz); 336 337 if (isBAIJ) { 338 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 339 } else { 340 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 341 } 342 ierr = PetscFree4(rowhit,columnsforrow,spidxhit,valaddrhit);CHKERRQ(ierr); 343 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 344 345 c->ctype = IS_COLORING_GHOSTED; 346 if (isBAIJ) { 347 mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ; 348 ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr); 349 } else { 350 mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 351 } 352 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 353 PetscFunctionReturn(0); 354 } 355 356 #undef __FUNCT__ 357 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 358 /* 359 This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 360 This is why it has the ugly code with the MatGetBlockSize() 361 */ 362 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 363 { 364 PetscErrorCode ierr; 365 PetscInt i,n,nrows,N,j,k,m,ncols,col; 366 const PetscInt *is,*rows,*ci,*cj; 367 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 368 IS *isa; 369 PetscBool done,flg = PETSC_FALSE; 370 PetscBool flg1; 371 PetscBool new_impl=PETSC_FALSE; 372 373 PetscFunctionBegin; 374 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 375 if (new_impl){ 376 ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 377 PetscFunctionReturn(0); 378 } 379 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 380 381 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 382 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 383 SeqBAIJ calls this routine, thus check it */ 384 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 385 if (flg1) { 386 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 387 } 388 389 N = mat->cmap->N/bs; 390 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 391 c->N = mat->cmap->N/bs; 392 c->m = mat->rmap->N/bs; 393 c->rstart = 0; 394 395 c->ncolors = nis; 396 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 397 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 398 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 399 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 400 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 401 402 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 403 if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 404 405 /* 406 Temporary option to allow for debugging/testing 407 */ 408 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 409 410 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 411 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 412 413 for (i=0; i<nis; i++) { 414 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 415 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 416 417 c->ncolumns[i] = n; 418 if (n) { 419 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 420 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 421 } else { 422 c->columns[i] = 0; 423 } 424 425 if (!flg) { /* ------------------------------------------------------------------------------*/ 426 /* fast, crude version requires O(N*N) work */ 427 ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 428 /* loop over columns*/ 429 for (j=0; j<n; j++) { 430 col = is[j]; 431 rows = cj + ci[col]; 432 m = ci[col+1] - ci[col]; 433 /* loop over columns marking them in rowhit */ 434 for (k=0; k<m; k++) { 435 rowhit[*rows++] = col + 1; 436 } 437 } 438 /* count the number of hits */ 439 nrows = 0; 440 for (j=0; j<N; j++) { 441 if (rowhit[j]) nrows++; 442 } 443 c->nrows[i] = nrows; 444 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 445 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 446 nrows = 0; 447 for (j=0; j<N; j++) { 448 if (rowhit[j]) { 449 c->rows[i][nrows] = j; 450 c->columnsforrow[i][nrows] = rowhit[j] - 1; 451 nrows++; 452 } 453 } 454 } else { /*-------------------------------------------------------------------------------*/ 455 /* slow version, using rowhit as a linked list */ 456 PetscInt currentcol,fm,mfm; 457 rowhit[N] = N; 458 nrows = 0; 459 /* loop over columns */ 460 for (j=0; j<n; j++) { 461 col = is[j]; 462 rows = cj + ci[col]; 463 m = ci[col+1] - ci[col]; 464 /* loop over columns marking them in rowhit */ 465 fm = N; /* fm points to first entry in linked list */ 466 for (k=0; k<m; k++) { 467 currentcol = *rows++; 468 /* is it already in the list? */ 469 do { 470 mfm = fm; 471 fm = rowhit[fm]; 472 } while (fm < currentcol); 473 /* not in list so add it */ 474 if (fm != currentcol) { 475 nrows++; 476 columnsforrow[currentcol] = col; 477 /* next three lines insert new entry into linked list */ 478 rowhit[mfm] = currentcol; 479 rowhit[currentcol] = fm; 480 fm = currentcol; 481 /* fm points to present position in list since we know the columns are sorted */ 482 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 483 } 484 } 485 c->nrows[i] = nrows; 486 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 487 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 488 /* now store the linked list of rows into c->rows[i] */ 489 nrows = 0; 490 fm = rowhit[N]; 491 do { 492 c->rows[i][nrows] = fm; 493 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 494 fm = rowhit[fm]; 495 } while (fm < N); 496 } /* ---------------------------------------------------------------------------------------*/ 497 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 498 } 499 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 500 501 ierr = PetscFree(rowhit);CHKERRQ(ierr); 502 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 503 504 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 505 /* 506 see the version for MPIAIJ 507 */ 508 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 509 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 510 for (k=0; k<c->ncolors; k++) { 511 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 512 for (l=0; l<c->nrows[k]; l++) { 513 col = c->columnsforrow[k][l]; 514 515 c->vscaleforrow[k][l] = col; 516 } 517 } 518 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522