1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/baij/seq/baij.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ" 7 PetscErrorCode MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 8 { 9 PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 10 PetscErrorCode ierr; 11 PetscInt bs=J->rmap->bs,bs2=bs*bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz; 12 PetscScalar dx,*dy_i,*xx,*w3_array,*dy=coloring->dy; 13 PetscScalar *vscale_array; 14 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 15 Vec w1 = coloring->w1,w2=coloring->w2,w3; 16 void *fctx = coloring->fctx; 17 PetscBool flg = PETSC_FALSE; 18 Mat_SeqBAIJ *csp=(Mat_SeqBAIJ*)J->data; 19 PetscScalar *ca = csp->a,*ca_l; 20 21 PetscFunctionBegin; 22 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 23 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 24 if (flg) { 25 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 26 } else { 27 PetscBool assembled; 28 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 29 if (assembled) { 30 ierr = MatZeroEntries(J);CHKERRQ(ierr); 31 } 32 } 33 if (!coloring->vscale) { 34 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 35 } 36 37 /* Set w1 = F(x1) */ 38 if (!coloring->fset) { 39 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 40 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 41 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 42 } else { 43 coloring->fset = PETSC_FALSE; 44 } 45 46 if (!coloring->w3) { 47 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 48 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 49 } 50 w3 = coloring->w3; 51 52 /* Compute local scale factors: vscale = dx */ 53 if (coloring->htype[0] == 'w') { 54 /* tacky test; need to make systematic if we add other approaches to computing h*/ 55 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 56 dx = (1.0 + unorm)*epsilon; 57 ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr); 58 } else { 59 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 60 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 61 for (col=0; col<N; col++) { 62 dx = xx[col]; 63 if (dx == 0.0) dx = 1.0; 64 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 65 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 66 dx *= epsilon; 67 vscale_array[col] = dx; 68 } 69 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 70 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 71 } 72 73 nz = 0; 74 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 75 for (k=0; k<coloring->ncolors; k++) { /* Loop over each color */ 76 coloring->currentcolor = k; 77 78 /* Compute w3 = x1 + dx */ 79 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 80 dy_i = dy; 81 for (i=0; i<bs; i++) { /* Loop over a block of columns */ 82 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 83 for (l=0; l<coloring->ncolumns[k]; l++) { 84 col = i + bs*coloring->columns[k][l]; 85 w3_array[col] += vscale_array[col]; 86 if (i) { 87 w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */ 88 } 89 } 90 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 91 92 /* Evaluate function w2 = F(w3) - F(x1) */ 93 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 94 ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */ 95 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 96 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 97 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 98 ierr = VecResetArray(w2);CHKERRQ(ierr); 99 dy_i += N; /* points to dy+i*N */ 100 } 101 102 /* Loop over row blocks, putting dy/dx into Jacobian matrix */ 103 for (l=0; l<coloring->nrows[k]; l++) { 104 row = bs*coloring->rowcolden2sp3[nz++]; 105 col = bs*coloring->rowcolden2sp3[nz++]; 106 ca_l = ca + bs2*coloring->rowcolden2sp3[nz++]; 107 spidx = 0; 108 dy_i = dy; 109 for (i=0; i<bs; i++) { /* column of the block */ 110 for (j=0; j<bs; j++) { /* row of the block */ 111 ca_l[spidx++] = dy_i[row+j]/vscale_array[col+i]; 112 } 113 dy_i += N; /* points to dy+i*N */ 114 } 115 } 116 } /* endof for each color */ 117 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 118 119 coloring->currentcolor = -1; 120 PetscFunctionReturn(0); 121 } 122 123 /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */ 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatFDColoringApply_SeqAIJ" 126 PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 127 { 128 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 129 PetscErrorCode ierr; 130 PetscInt k,l,row,col,N; 131 PetscScalar dx,*y,*xx,*w3_array; 132 PetscScalar *vscale_array; 133 PetscReal epsilon=coloring->error_rel,umin = coloring->umin,unorm; 134 Vec w1=coloring->w1,w2=coloring->w2,w3; 135 void *fctx=coloring->fctx; 136 PetscBool flg=PETSC_FALSE; 137 Mat_SeqAIJ *csp=(Mat_SeqAIJ*)J->data; 138 PetscScalar *ca=csp->a; 139 const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 140 PetscInt **columns=coloring->columns,ncolumns_k,nrows_k,nz,spidx; 141 142 PetscFunctionBegin; 143 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 144 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 145 if (flg) { 146 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 147 } else { 148 PetscBool assembled; 149 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 150 if (assembled) { 151 ierr = MatZeroEntries(J);CHKERRQ(ierr); 152 } 153 } 154 if (!coloring->vscale) { 155 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 156 } 157 158 /* Set w1 = F(x1) */ 159 if (!coloring->fset) { 160 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 161 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 162 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 163 } else { 164 coloring->fset = PETSC_FALSE; 165 } 166 167 if (!coloring->w3) { 168 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 169 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 170 } 171 w3 = coloring->w3; 172 173 /* Compute scale factors: vscale = dx */ 174 if (coloring->htype[0] == 'w') { 175 /* tacky test; need to make systematic if we add other approaches to computing h*/ 176 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 177 dx = (1.0 + unorm)*epsilon; 178 ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr); 179 } else { 180 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 181 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 182 ierr = VecGetSize(x1,&N);CHKERRQ(ierr); 183 for (col=0; col<N; col++) { 184 dx = xx[col]; 185 if (dx == 0.0) dx = 1.0; 186 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 187 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 188 dx *= epsilon; 189 vscale_array[col] = dx; 190 } 191 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 192 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 193 } 194 195 nz = 0; 196 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 197 for (k=0; k<ncolors; k++) { /* loop over colors */ 198 coloring->currentcolor = k; 199 200 /* 201 Loop over each column associated with color 202 adding the perturbation to the vector w3 = x1 + dx. 203 */ 204 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 205 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 206 ncolumns_k = ncolumns[k]; 207 for (l=0; l<ncolumns_k; l++) { /* loop over columns */ 208 col = columns[k][l]; 209 w3_array[col] += vscale_array[col]; 210 } 211 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 212 213 /* 214 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 215 w2 = F(x1 + dx) - F(x1) 216 */ 217 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 218 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 219 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 220 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 221 222 /* 223 Loop over rows of vector, putting w2/dx into Jacobian matrix 224 */ 225 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 226 nrows_k = nrows[k]; 227 for (l=0; l<nrows_k; l++) { /* loop over rows */ 228 row = coloring->rowcolden2sp3[nz++]; 229 col = coloring->rowcolden2sp3[nz++]; 230 spidx = coloring->rowcolden2sp3[nz++]; 231 ca[spidx] = y[row]/vscale_array[col]; 232 } 233 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 234 } /* endof for each color */ 235 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 236 237 coloring->currentcolor = -1; 238 PetscFunctionReturn(0); 239 } 240 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_den2sp" 243 /* 244 This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 245 */ 246 PetscErrorCode MatFDColoringCreate_SeqAIJ_den2sp(Mat mat,ISColoring iscoloring,MatFDColoring c) 247 { 248 PetscErrorCode ierr; 249 PetscInt i,n,nrows,N,j,k,m,ncols,col; 250 const PetscInt *is,*rows,*ci,*cj; 251 PetscInt nis=iscoloring->n,*rowhit,*columnsforrow,bs,*spidx,*spidxhit,nz; 252 IS *isa; 253 PetscBool isBAIJ; 254 Mat_SeqAIJ *csp = (Mat_SeqAIJ*)mat->data; 255 256 PetscFunctionBegin; 257 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 258 259 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 260 SeqBAIJ calls this routine, thus check it */ 261 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 262 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 263 if (!isBAIJ) { 264 bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 265 } 266 N = mat->cmap->N/bs; 267 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 268 c->N = mat->cmap->N/bs; 269 c->m = mat->rmap->N/bs; 270 c->rstart = 0; 271 272 c->ncolors = nis; 273 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 274 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 275 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 276 ierr = PetscMalloc(3*csp->nz*sizeof(PetscInt*),&c->rowcolden2sp3);CHKERRQ(ierr); 277 278 if (isBAIJ) { 279 ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 280 } else { 281 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 282 } 283 284 ierr = PetscMalloc3(c->m,PetscInt,&rowhit,N,PetscInt,&columnsforrow,c->m,PetscInt,&spidxhit);CHKERRQ(ierr); 285 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 286 287 nz = 0; 288 for (i=0; i<nis; i++) { /* loop over colors */ 289 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 290 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 291 292 c->ncolumns[i] = n; 293 if (n) { 294 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 295 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 296 } else { 297 c->columns[i] = 0; 298 } 299 300 /* fast, crude version requires O(N*N) work */ 301 nrows = 0; 302 for (j=0; j<n; j++) { /* loop over columns */ 303 col = is[j]; 304 rows = cj + ci[col]; 305 m = ci[col+1] - ci[col]; 306 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 307 rowhit[*rows] = col + 1; 308 spidxhit[*rows++] = spidx[ci[col] + k]; 309 nrows++; 310 } 311 } 312 c->nrows[i] = nrows; /* total num of rows for this color */ 313 314 nrows = 0; 315 for (j=0; j<N; j++) { /* loop over rows */ 316 if (rowhit[j]) { 317 c->rowcolden2sp3[nz++] = j; /* row index */ 318 c->rowcolden2sp3[nz++] = rowhit[j] - 1; /* column index */ 319 c->rowcolden2sp3[nz++] = spidxhit[j]; /* den2sp index */ 320 rowhit[j] = 0.0; /* zero rowhit for reuse */ 321 } 322 } 323 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 324 } 325 if (nz/3 != csp->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz/3,csp->nz); 326 327 if (isBAIJ) { 328 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 329 } else { 330 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 331 } 332 ierr = PetscFree3(rowhit,columnsforrow,spidxhit);CHKERRQ(ierr); 333 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 334 335 c->ctype = IS_COLORING_GHOSTED; 336 if (isBAIJ) { 337 mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ; 338 ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr); 339 } else { 340 mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 341 } 342 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 348 /* 349 This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 350 This is why it has the ugly code with the MatGetBlockSize() 351 */ 352 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 353 { 354 PetscErrorCode ierr; 355 PetscInt i,n,nrows,N,j,k,m,ncols,col; 356 const PetscInt *is,*rows,*ci,*cj; 357 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 358 IS *isa; 359 PetscBool done,flg = PETSC_FALSE; 360 PetscBool flg1; 361 PetscBool new_impl=PETSC_FALSE; 362 363 PetscFunctionBegin; 364 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 365 if (new_impl){ 366 ierr = MatFDColoringCreate_SeqAIJ_den2sp(mat,iscoloring,c);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 370 371 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 372 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 373 SeqBAIJ calls this routine, thus check it */ 374 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 375 if (flg1) { 376 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 377 } 378 379 N = mat->cmap->N/bs; 380 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 381 c->N = mat->cmap->N/bs; 382 c->m = mat->rmap->N/bs; 383 c->rstart = 0; 384 385 c->ncolors = nis; 386 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 387 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 388 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 389 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 390 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 391 392 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 393 if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 394 395 /* 396 Temporary option to allow for debugging/testing 397 */ 398 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 399 400 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 401 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 402 403 for (i=0; i<nis; i++) { 404 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 405 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 406 407 c->ncolumns[i] = n; 408 if (n) { 409 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 410 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 411 } else { 412 c->columns[i] = 0; 413 } 414 415 if (!flg) { /* ------------------------------------------------------------------------------*/ 416 /* fast, crude version requires O(N*N) work */ 417 ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 418 /* loop over columns*/ 419 for (j=0; j<n; j++) { 420 col = is[j]; 421 rows = cj + ci[col]; 422 m = ci[col+1] - ci[col]; 423 /* loop over columns marking them in rowhit */ 424 for (k=0; k<m; k++) { 425 rowhit[*rows++] = col + 1; 426 } 427 } 428 /* count the number of hits */ 429 nrows = 0; 430 for (j=0; j<N; j++) { 431 if (rowhit[j]) nrows++; 432 } 433 c->nrows[i] = nrows; 434 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 435 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 436 nrows = 0; 437 for (j=0; j<N; j++) { 438 if (rowhit[j]) { 439 c->rows[i][nrows] = j; 440 c->columnsforrow[i][nrows] = rowhit[j] - 1; 441 nrows++; 442 } 443 } 444 } else { /*-------------------------------------------------------------------------------*/ 445 /* slow version, using rowhit as a linked list */ 446 PetscInt currentcol,fm,mfm; 447 rowhit[N] = N; 448 nrows = 0; 449 /* loop over columns */ 450 for (j=0; j<n; j++) { 451 col = is[j]; 452 rows = cj + ci[col]; 453 m = ci[col+1] - ci[col]; 454 /* loop over columns marking them in rowhit */ 455 fm = N; /* fm points to first entry in linked list */ 456 for (k=0; k<m; k++) { 457 currentcol = *rows++; 458 /* is it already in the list? */ 459 do { 460 mfm = fm; 461 fm = rowhit[fm]; 462 } while (fm < currentcol); 463 /* not in list so add it */ 464 if (fm != currentcol) { 465 nrows++; 466 columnsforrow[currentcol] = col; 467 /* next three lines insert new entry into linked list */ 468 rowhit[mfm] = currentcol; 469 rowhit[currentcol] = fm; 470 fm = currentcol; 471 /* fm points to present position in list since we know the columns are sorted */ 472 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 473 } 474 } 475 c->nrows[i] = nrows; 476 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 477 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 478 /* now store the linked list of rows into c->rows[i] */ 479 nrows = 0; 480 fm = rowhit[N]; 481 do { 482 c->rows[i][nrows] = fm; 483 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 484 fm = rowhit[fm]; 485 } while (fm < N); 486 } /* ---------------------------------------------------------------------------------------*/ 487 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 488 } 489 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 490 491 ierr = PetscFree(rowhit);CHKERRQ(ierr); 492 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 493 494 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 495 /* 496 see the version for MPIAIJ 497 */ 498 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 499 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 500 for (k=0; k<c->ncolors; k++) { 501 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 502 for (l=0; l<c->nrows[k]; l++) { 503 col = c->columnsforrow[k][l]; 504 505 c->vscaleforrow[k][l] = col; 506 } 507 } 508 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512