1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/baij/seq/baij.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ" 7 PetscErrorCode MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 8 { 9 PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 10 PetscErrorCode ierr; 11 PetscInt bs=J->rmap->bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz; 12 PetscScalar dx,*dy_i,*xx,*w3_array,*dy=coloring->dy; 13 PetscScalar *vscale_array; 14 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 15 Vec w1 = coloring->w1,w2=coloring->w2,w3; 16 void *fctx = coloring->fctx; 17 PetscBool flg = PETSC_FALSE; 18 PetscScalar *valaddr; 19 MatEntry *Jentry=coloring->matentry; 20 21 PetscFunctionBegin; 22 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 23 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 24 if (flg) { 25 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 26 } else { 27 PetscBool assembled; 28 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 29 if (assembled) { 30 ierr = MatZeroEntries(J);CHKERRQ(ierr); 31 } 32 } 33 if (!coloring->vscale) { 34 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 35 } 36 37 /* Set w1 = F(x1) */ 38 if (!coloring->fset) { 39 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 40 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 41 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 42 } else { 43 coloring->fset = PETSC_FALSE; 44 } 45 46 if (!coloring->w3) { 47 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 48 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 49 } 50 w3 = coloring->w3; 51 52 /* Compute local scale factors: vscale = dx */ 53 if (coloring->htype[0] == 'w') { 54 /* tacky test; need to make systematic if we add other approaches to computing h*/ 55 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 56 dx = (1.0 + unorm)*epsilon; 57 ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr); 58 } else { 59 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 60 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 61 for (col=0; col<N; col++) { 62 dx = xx[col]; 63 if (dx == 0.0) dx = 1.0; 64 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 65 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 66 dx *= epsilon; 67 vscale_array[col] = dx; 68 } 69 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 70 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 71 } 72 73 nz = 0; 74 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 75 for (k=0; k<coloring->ncolors; k++) { /* Loop over each color */ 76 coloring->currentcolor = k; 77 78 /* Compute w3 = x1 + dx */ 79 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 80 dy_i = dy; 81 for (i=0; i<bs; i++) { /* Loop over a block of columns */ 82 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 83 for (l=0; l<coloring->ncolumns[k]; l++) { 84 col = i + bs*coloring->columns[k][l]; 85 w3_array[col] += vscale_array[col]; 86 if (i) { 87 w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */ 88 } 89 } 90 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 91 92 /* Evaluate function w2 = F(w3) - F(x1) */ 93 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 94 ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */ 95 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 96 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 97 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 98 ierr = VecResetArray(w2);CHKERRQ(ierr); 99 dy_i += N; /* points to dy+i*N */ 100 } 101 102 /* Loop over row blocks, putting dy/dx into Jacobian matrix */ 103 for (l=0; l<coloring->nrows[k]; l++) { 104 row = bs*Jentry[nz].row; 105 col = bs*Jentry[nz].col; 106 valaddr = Jentry[nz].valaddr; 107 nz++; 108 spidx = 0; 109 dy_i = dy; 110 for (i=0; i<bs; i++) { /* column of the block */ 111 for (j=0; j<bs; j++) { /* row of the block */ 112 valaddr[spidx++] = dy_i[row+j]/vscale_array[col+i]; 113 } 114 dy_i += N; /* points to dy+i*N */ 115 } 116 } 117 } /* endof for each color */ 118 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 119 120 coloring->currentcolor = -1; 121 PetscFunctionReturn(0); 122 } 123 124 extern PetscErrorCode MatFDColoringApply_AIJ_new(Mat,MatFDColoring,Vec,MatStructure*,void*); 125 extern PetscErrorCode MatFDColoringApply_BAIJ_new(Mat,MatFDColoring,Vec,MatStructure*,void *); 126 /* also used for SeqBAIJ matrices */ 127 #undef __FUNCT__ 128 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_new" 129 PetscErrorCode MatFDColoringCreate_SeqAIJ_new(Mat mat,ISColoring iscoloring,MatFDColoring c) 130 { 131 PetscErrorCode ierr; 132 PetscInt i,n,nrows,N,j,k,m,ncols,col; 133 const PetscInt *is,*row,*ci,*cj; 134 PetscInt nis=iscoloring->n,*rowhit,bs,bs2,*spidx,nz; 135 IS *isa; 136 PetscBool isBAIJ; 137 Mat_SeqAIJ *spA = (Mat_SeqAIJ*)mat->data; 138 PetscScalar *A_val=spA->a; 139 PetscScalar **valaddrhit; 140 MatEntry *Jentry; 141 142 PetscFunctionBegin; 143 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 144 145 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 146 SeqBAIJ calls this routine, thus check it */ 147 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 148 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 149 if (!isBAIJ) { 150 bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 151 } 152 N = mat->cmap->N/bs; 153 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 154 c->N = mat->cmap->N/bs; 155 c->m = mat->rmap->N/bs; 156 c->rstart = 0; 157 158 c->ncolors = nis; 159 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 160 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 161 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 162 ierr = PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));CHKERRQ(ierr); 163 164 ierr = PetscMalloc(spA->nz*sizeof(MatEntry),&Jentry);CHKERRQ(ierr); 165 ierr = PetscLogObjectMemory((PetscObject)c,spA->nz*sizeof(MatEntry));CHKERRQ(ierr); 166 c->matentry = Jentry; 167 168 if (isBAIJ) { 169 ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 170 } else { 171 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 172 } 173 174 ierr = PetscMalloc2(c->m,PetscInt,&rowhit,c->m,PetscScalar*,&valaddrhit);CHKERRQ(ierr); 175 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 176 177 nz = 0; 178 for (i=0; i<nis; i++) { /* loop over colors */ 179 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 180 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 181 182 c->ncolumns[i] = n; 183 if (n) { 184 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 185 ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr); 186 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 187 } else { 188 c->columns[i] = 0; 189 } 190 191 /* fast, crude version requires O(N*N) work */ 192 bs2 = bs*bs; 193 nrows = 0; 194 for (j=0; j<n; j++) { /* loop over columns */ 195 col = is[j]; 196 row = cj + ci[col]; 197 m = ci[col+1] - ci[col]; 198 nrows += m; 199 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 200 rowhit[*row] = col + 1; 201 valaddrhit[*row++] = &A_val[bs2*spidx[ci[col] + k]]; 202 } 203 } 204 c->nrows[i] = nrows; /* total num of rows for this color */ 205 206 for (j=0; j<N; j++) { /* loop over rows */ 207 if (rowhit[j]) { 208 Jentry[nz].row = j; /* local row index */ 209 Jentry[nz].col = rowhit[j] - 1; /* local column index */ 210 Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 211 nz++; 212 rowhit[j] = 0.0; /* zero rowhit for reuse */ 213 } 214 } 215 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 216 } 217 if (nz != spA->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz,spA->nz); 218 219 if (isBAIJ) { 220 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 221 } else { 222 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 223 } 224 ierr = PetscFree2(rowhit,valaddrhit);CHKERRQ(ierr); 225 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 226 227 c->ctype = IS_COLORING_GHOSTED; 228 if (isBAIJ) { 229 mat->ops->fdcoloringapply = MatFDColoringApply_BAIJ_new; 230 ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr); 231 } else { 232 mat->ops->fdcoloringapply = MatFDColoringApply_AIJ_new; 233 } 234 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 240 /* 241 This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 242 This is why it has the ugly code with the MatGetBlockSize() 243 */ 244 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 245 { 246 PetscErrorCode ierr; 247 PetscInt i,n,nrows,N,j,k,m,ncols,col; 248 const PetscInt *is,*rows,*ci,*cj; 249 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 250 IS *isa; 251 PetscBool done,flg = PETSC_FALSE; 252 PetscBool flg1; 253 PetscBool new_impl=PETSC_FALSE; 254 255 PetscFunctionBegin; 256 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 257 if (new_impl){ 258 ierr = MatFDColoringCreate_SeqAIJ_new(mat,iscoloring,c);CHKERRQ(ierr); 259 PetscFunctionReturn(0); 260 } 261 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 262 263 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 264 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 265 SeqBAIJ calls this routine, thus check it */ 266 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 267 if (flg1) { 268 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 269 } 270 271 N = mat->cmap->N/bs; 272 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 273 c->N = mat->cmap->N/bs; 274 c->m = mat->rmap->N/bs; 275 c->rstart = 0; 276 277 c->ncolors = nis; 278 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 279 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 280 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 281 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 282 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 283 284 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 285 if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 286 287 /* 288 Temporary option to allow for debugging/testing 289 */ 290 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 291 292 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 293 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 294 295 for (i=0; i<nis; i++) { 296 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 297 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 298 299 c->ncolumns[i] = n; 300 if (n) { 301 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 302 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 303 } else { 304 c->columns[i] = 0; 305 } 306 307 if (!flg) { /* ------------------------------------------------------------------------------*/ 308 /* fast, crude version requires O(N*N) work */ 309 ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 310 /* loop over columns*/ 311 for (j=0; j<n; j++) { 312 col = is[j]; 313 rows = cj + ci[col]; 314 m = ci[col+1] - ci[col]; 315 /* loop over columns marking them in rowhit */ 316 for (k=0; k<m; k++) { 317 rowhit[*rows++] = col + 1; 318 } 319 } 320 /* count the number of hits */ 321 nrows = 0; 322 for (j=0; j<N; j++) { 323 if (rowhit[j]) nrows++; 324 } 325 c->nrows[i] = nrows; 326 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 327 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 328 nrows = 0; 329 for (j=0; j<N; j++) { 330 if (rowhit[j]) { 331 c->rows[i][nrows] = j; 332 c->columnsforrow[i][nrows] = rowhit[j] - 1; 333 nrows++; 334 } 335 } 336 } else { /*-------------------------------------------------------------------------------*/ 337 /* slow version, using rowhit as a linked list */ 338 PetscInt currentcol,fm,mfm; 339 rowhit[N] = N; 340 nrows = 0; 341 /* loop over columns */ 342 for (j=0; j<n; j++) { 343 col = is[j]; 344 rows = cj + ci[col]; 345 m = ci[col+1] - ci[col]; 346 /* loop over columns marking them in rowhit */ 347 fm = N; /* fm points to first entry in linked list */ 348 for (k=0; k<m; k++) { 349 currentcol = *rows++; 350 /* is it already in the list? */ 351 do { 352 mfm = fm; 353 fm = rowhit[fm]; 354 } while (fm < currentcol); 355 /* not in list so add it */ 356 if (fm != currentcol) { 357 nrows++; 358 columnsforrow[currentcol] = col; 359 /* next three lines insert new entry into linked list */ 360 rowhit[mfm] = currentcol; 361 rowhit[currentcol] = fm; 362 fm = currentcol; 363 /* fm points to present position in list since we know the columns are sorted */ 364 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 365 } 366 } 367 c->nrows[i] = nrows; 368 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 369 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 370 /* now store the linked list of rows into c->rows[i] */ 371 nrows = 0; 372 fm = rowhit[N]; 373 do { 374 c->rows[i][nrows] = fm; 375 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 376 fm = rowhit[fm]; 377 } while (fm < N); 378 } /* ---------------------------------------------------------------------------------------*/ 379 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 380 } 381 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 382 383 ierr = PetscFree(rowhit);CHKERRQ(ierr); 384 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 385 386 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 387 /* 388 see the version for MPIAIJ 389 */ 390 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 391 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 392 for (k=0; k<c->ncolors; k++) { 393 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 394 for (l=0; l<c->nrows[k]; l++) { 395 col = c->columnsforrow[k][l]; 396 397 c->vscaleforrow[k][l] = col; 398 } 399 } 400 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404