1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/baij/seq/baij.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ" 7 PetscErrorCode MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 8 { 9 PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 10 PetscErrorCode ierr; 11 PetscInt bs=J->rmap->bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz; 12 PetscScalar dx,*dy_i,*xx,*w3_array,*dy=coloring->dy; 13 PetscScalar *vscale_array; 14 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 15 Vec w1 = coloring->w1,w2=coloring->w2,w3; 16 void *fctx = coloring->fctx; 17 PetscBool flg = PETSC_FALSE; 18 PetscScalar *valaddr; 19 MatEntry *Jentry=coloring->matentry; 20 21 PetscFunctionBegin; 22 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 23 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 24 if (flg) { 25 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 26 } else { 27 PetscBool assembled; 28 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 29 if (assembled) { 30 ierr = MatZeroEntries(J);CHKERRQ(ierr); 31 } 32 } 33 if (!coloring->vscale) { 34 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 35 } 36 37 /* Set w1 = F(x1) */ 38 if (!coloring->fset) { 39 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 40 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 41 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 42 } else { 43 coloring->fset = PETSC_FALSE; 44 } 45 46 if (!coloring->w3) { 47 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 48 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 49 } 50 w3 = coloring->w3; 51 52 /* Compute local scale factors: vscale = dx */ 53 if (coloring->htype[0] == 'w') { 54 /* tacky test; need to make systematic if we add other approaches to computing h*/ 55 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 56 dx = (1.0 + unorm)*epsilon; 57 ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr); 58 } else { 59 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 60 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 61 for (col=0; col<N; col++) { 62 dx = xx[col]; 63 if (dx == 0.0) dx = 1.0; 64 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 65 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 66 dx *= epsilon; 67 vscale_array[col] = dx; 68 } 69 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 70 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 71 } 72 73 nz = 0; 74 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 75 for (k=0; k<coloring->ncolors; k++) { /* Loop over each color */ 76 coloring->currentcolor = k; 77 78 /* Compute w3 = x1 + dx */ 79 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 80 dy_i = dy; 81 for (i=0; i<bs; i++) { /* Loop over a block of columns */ 82 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 83 for (l=0; l<coloring->ncolumns[k]; l++) { 84 col = i + bs*coloring->columns[k][l]; 85 w3_array[col] += vscale_array[col]; 86 if (i) { 87 w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */ 88 } 89 } 90 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 91 92 /* Evaluate function w2 = F(w3) - F(x1) */ 93 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 94 ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */ 95 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 96 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 97 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 98 ierr = VecResetArray(w2);CHKERRQ(ierr); 99 dy_i += N; /* points to dy+i*N */ 100 } 101 102 /* Loop over row blocks, putting dy/dx into Jacobian matrix */ 103 for (l=0; l<coloring->nrows[k]; l++) { 104 row = bs*Jentry[nz].row; 105 col = bs*Jentry[nz].col; 106 valaddr = Jentry[nz].valaddr; 107 nz++; 108 spidx = 0; 109 dy_i = dy; 110 for (i=0; i<bs; i++) { /* column of the block */ 111 for (j=0; j<bs; j++) { /* row of the block */ 112 valaddr[spidx++] = dy_i[row+j]/vscale_array[col+i]; 113 } 114 dy_i += N; /* points to dy+i*N */ 115 } 116 } 117 } /* endof for each color */ 118 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 119 120 coloring->currentcolor = -1; 121 PetscFunctionReturn(0); 122 } 123 124 /* Optimize MatFDColoringApply_AIJ() by using array den2sp to skip calling MatSetValues() */ 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFDColoringApply_SeqAIJ" 127 PetscErrorCode MatFDColoringApply_SeqAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 128 { 129 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 130 PetscErrorCode ierr; 131 PetscInt k,l,row,col,N; 132 PetscScalar dx,*y,*xx,*w3_array; 133 PetscScalar *vscale_array; 134 PetscReal epsilon=coloring->error_rel,umin = coloring->umin,unorm; 135 Vec w1=coloring->w1,w2=coloring->w2,w3; 136 void *fctx=coloring->fctx; 137 PetscBool flg=PETSC_FALSE; 138 const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 139 PetscInt **columns=coloring->columns,ncolumns_k,nrows_k,nz; 140 MatEntry *Jentry=coloring->matentry; 141 142 PetscFunctionBegin; 143 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 144 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 145 if (flg) { 146 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 147 } else { 148 PetscBool assembled; 149 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 150 if (assembled) { 151 ierr = MatZeroEntries(J);CHKERRQ(ierr); 152 } 153 } 154 if (!coloring->vscale) { 155 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 156 } 157 158 /* Set w1 = F(x1) */ 159 if (!coloring->fset) { 160 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 161 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 162 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 163 } else { 164 coloring->fset = PETSC_FALSE; 165 } 166 167 if (!coloring->w3) { 168 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 169 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 170 } 171 w3 = coloring->w3; 172 173 /* Compute scale factors: vscale = dx */ 174 if (coloring->htype[0] == 'w') { 175 /* tacky test; need to make systematic if we add other approaches to computing h*/ 176 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 177 dx = (1.0 + unorm)*epsilon; 178 ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr); 179 } else { 180 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 181 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 182 ierr = VecGetSize(x1,&N);CHKERRQ(ierr); 183 for (col=0; col<N; col++) { 184 dx = xx[col]; 185 if (dx == 0.0) dx = 1.0; 186 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 187 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 188 dx *= epsilon; 189 vscale_array[col] = dx; 190 } 191 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 192 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 193 } 194 195 nz = 0; 196 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 197 for (k=0; k<ncolors; k++) { /* loop over colors */ 198 coloring->currentcolor = k; 199 200 /* 201 Loop over each column associated with color 202 adding the perturbation to the vector w3 = x1 + dx. 203 */ 204 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 205 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 206 ncolumns_k = ncolumns[k]; 207 for (l=0; l<ncolumns_k; l++) { /* loop over columns */ 208 col = columns[k][l]; 209 w3_array[col] += vscale_array[col]; 210 } 211 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 212 213 /* 214 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 215 w2 = F(x1 + dx) - F(x1) 216 */ 217 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 218 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 219 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 220 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 221 222 /* 223 Loop over rows of vector, putting w2/dx into Jacobian matrix 224 */ 225 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 226 nrows_k = nrows[k]; 227 nrows_k = nrows[k]; 228 for (l=0; l<nrows_k; l++) { /* loop over rows */ 229 row = Jentry[nz].row; 230 col = Jentry[nz].col; 231 *(Jentry[nz++].valaddr) = y[row]/vscale_array[col]; 232 } 233 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 234 } /* endof for each color */ 235 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 236 237 coloring->currentcolor = -1; 238 PetscFunctionReturn(0); 239 } 240 241 #undef __FUNCT__ 242 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_new" 243 /* 244 This routine optimizes MatFDColoringCreate_SeqAIJ() by using den2sp array 245 */ 246 PetscErrorCode MatFDColoringCreate_SeqAIJ_new(Mat mat,ISColoring iscoloring,MatFDColoring c) 247 { 248 PetscErrorCode ierr; 249 PetscInt i,n,nrows,N,j,k,m,ncols,col; 250 const PetscInt *is,*row,*ci,*cj; 251 PetscInt nis=iscoloring->n,*rowhit,bs,bs2,*spidx,nz; 252 IS *isa; 253 PetscBool isBAIJ; 254 Mat_SeqAIJ *spA = (Mat_SeqAIJ*)mat->data; 255 PetscScalar *A_val=spA->a; 256 PetscScalar **valaddrhit; 257 MatEntry *Jentry; 258 259 PetscFunctionBegin; 260 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 261 262 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 263 SeqBAIJ calls this routine, thus check it */ 264 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 265 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 266 if (!isBAIJ) { 267 bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 268 } 269 N = mat->cmap->N/bs; 270 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 271 c->N = mat->cmap->N/bs; 272 c->m = mat->rmap->N/bs; 273 c->rstart = 0; 274 275 c->ncolors = nis; 276 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 277 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 278 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 279 ierr = PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));CHKERRQ(ierr); 280 281 ierr = PetscMalloc(spA->nz*sizeof(MatEntry),&Jentry);CHKERRQ(ierr); 282 ierr = PetscLogObjectMemory((PetscObject)c,spA->nz*sizeof(MatEntry));CHKERRQ(ierr); 283 c->matentry = Jentry; 284 285 if (isBAIJ) { 286 ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 287 } else { 288 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 289 } 290 291 ierr = PetscMalloc2(c->m,PetscInt,&rowhit,c->m,PetscScalar*,&valaddrhit);CHKERRQ(ierr); 292 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 293 294 nz = 0; 295 for (i=0; i<nis; i++) { /* loop over colors */ 296 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 297 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 298 299 c->ncolumns[i] = n; 300 if (n) { 301 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 302 ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr); 303 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 304 } else { 305 c->columns[i] = 0; 306 } 307 308 /* fast, crude version requires O(N*N) work */ 309 bs2 = bs*bs; 310 nrows = 0; 311 for (j=0; j<n; j++) { /* loop over columns */ 312 col = is[j]; 313 row = cj + ci[col]; 314 m = ci[col+1] - ci[col]; 315 nrows += m; 316 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 317 rowhit[*row] = col + 1; 318 valaddrhit[*row++] = &A_val[bs2*spidx[ci[col] + k]]; 319 } 320 } 321 c->nrows[i] = nrows; /* total num of rows for this color */ 322 323 for (j=0; j<N; j++) { /* loop over rows */ 324 if (rowhit[j]) { 325 Jentry[nz].row = j; /* local row index */ 326 Jentry[nz].col = rowhit[j] - 1; /* local column index */ 327 Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 328 nz++; 329 rowhit[j] = 0.0; /* zero rowhit for reuse */ 330 } 331 } 332 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 333 } 334 if (nz != spA->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz,spA->nz); 335 336 if (isBAIJ) { 337 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 338 } else { 339 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 340 } 341 ierr = PetscFree2(rowhit,valaddrhit);CHKERRQ(ierr); 342 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 343 344 c->ctype = IS_COLORING_GHOSTED; 345 if (isBAIJ) { 346 mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ; 347 ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr); 348 } else { 349 mat->ops->fdcoloringapply = MatFDColoringApply_SeqAIJ; 350 } 351 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 352 PetscFunctionReturn(0); 353 } 354 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 357 /* 358 This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 359 This is why it has the ugly code with the MatGetBlockSize() 360 */ 361 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 362 { 363 PetscErrorCode ierr; 364 PetscInt i,n,nrows,N,j,k,m,ncols,col; 365 const PetscInt *is,*rows,*ci,*cj; 366 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 367 IS *isa; 368 PetscBool done,flg = PETSC_FALSE; 369 PetscBool flg1; 370 PetscBool new_impl=PETSC_FALSE; 371 372 PetscFunctionBegin; 373 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 374 if (new_impl){ 375 ierr = MatFDColoringCreate_SeqAIJ_new(mat,iscoloring,c);CHKERRQ(ierr); 376 PetscFunctionReturn(0); 377 } 378 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 379 380 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 381 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 382 SeqBAIJ calls this routine, thus check it */ 383 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 384 if (flg1) { 385 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 386 } 387 388 N = mat->cmap->N/bs; 389 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 390 c->N = mat->cmap->N/bs; 391 c->m = mat->rmap->N/bs; 392 c->rstart = 0; 393 394 c->ncolors = nis; 395 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 396 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 397 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 398 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 399 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 400 401 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 402 if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 403 404 /* 405 Temporary option to allow for debugging/testing 406 */ 407 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 408 409 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 410 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 411 412 for (i=0; i<nis; i++) { 413 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 414 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 415 416 c->ncolumns[i] = n; 417 if (n) { 418 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 419 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 420 } else { 421 c->columns[i] = 0; 422 } 423 424 if (!flg) { /* ------------------------------------------------------------------------------*/ 425 /* fast, crude version requires O(N*N) work */ 426 ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 427 /* loop over columns*/ 428 for (j=0; j<n; j++) { 429 col = is[j]; 430 rows = cj + ci[col]; 431 m = ci[col+1] - ci[col]; 432 /* loop over columns marking them in rowhit */ 433 for (k=0; k<m; k++) { 434 rowhit[*rows++] = col + 1; 435 } 436 } 437 /* count the number of hits */ 438 nrows = 0; 439 for (j=0; j<N; j++) { 440 if (rowhit[j]) nrows++; 441 } 442 c->nrows[i] = nrows; 443 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 444 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 445 nrows = 0; 446 for (j=0; j<N; j++) { 447 if (rowhit[j]) { 448 c->rows[i][nrows] = j; 449 c->columnsforrow[i][nrows] = rowhit[j] - 1; 450 nrows++; 451 } 452 } 453 } else { /*-------------------------------------------------------------------------------*/ 454 /* slow version, using rowhit as a linked list */ 455 PetscInt currentcol,fm,mfm; 456 rowhit[N] = N; 457 nrows = 0; 458 /* loop over columns */ 459 for (j=0; j<n; j++) { 460 col = is[j]; 461 rows = cj + ci[col]; 462 m = ci[col+1] - ci[col]; 463 /* loop over columns marking them in rowhit */ 464 fm = N; /* fm points to first entry in linked list */ 465 for (k=0; k<m; k++) { 466 currentcol = *rows++; 467 /* is it already in the list? */ 468 do { 469 mfm = fm; 470 fm = rowhit[fm]; 471 } while (fm < currentcol); 472 /* not in list so add it */ 473 if (fm != currentcol) { 474 nrows++; 475 columnsforrow[currentcol] = col; 476 /* next three lines insert new entry into linked list */ 477 rowhit[mfm] = currentcol; 478 rowhit[currentcol] = fm; 479 fm = currentcol; 480 /* fm points to present position in list since we know the columns are sorted */ 481 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 482 } 483 } 484 c->nrows[i] = nrows; 485 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 486 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 487 /* now store the linked list of rows into c->rows[i] */ 488 nrows = 0; 489 fm = rowhit[N]; 490 do { 491 c->rows[i][nrows] = fm; 492 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 493 fm = rowhit[fm]; 494 } while (fm < N); 495 } /* ---------------------------------------------------------------------------------------*/ 496 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 497 } 498 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 499 500 ierr = PetscFree(rowhit);CHKERRQ(ierr); 501 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 502 503 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 504 /* 505 see the version for MPIAIJ 506 */ 507 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 508 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 509 for (k=0; k<c->ncolors; k++) { 510 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 511 for (l=0; l<c->nrows[k]; l++) { 512 col = c->columnsforrow[k][l]; 513 514 c->vscaleforrow[k][l] = col; 515 } 516 } 517 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 518 PetscFunctionReturn(0); 519 } 520 521