1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/baij/seq/baij.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatFDColoringApply_SeqBAIJ" 7 PetscErrorCode MatFDColoringApply_SeqBAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 8 { 9 PetscErrorCode (*f)(void*,Vec,Vec,void*)=(PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 10 PetscErrorCode ierr; 11 PetscInt bs=J->rmap->bs,i,j,k,l,row,col,N=J->cmap->n,spidx,nz; 12 PetscScalar dx,*dy_i,*xx,*w3_array,*dy=coloring->dy; 13 PetscScalar *vscale_array; 14 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 15 Vec w1 = coloring->w1,w2=coloring->w2,w3; 16 void *fctx = coloring->fctx; 17 PetscBool flg = PETSC_FALSE; 18 PetscScalar *valaddr; 19 MatEntry *Jentry=coloring->matentry; 20 21 PetscFunctionBegin; 22 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 23 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 24 if (flg) { 25 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 26 } else { 27 PetscBool assembled; 28 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 29 if (assembled) { 30 ierr = MatZeroEntries(J);CHKERRQ(ierr); 31 } 32 } 33 if (!coloring->vscale) { 34 ierr = VecDuplicate(x1,&coloring->vscale);CHKERRQ(ierr); 35 } 36 37 /* Set w1 = F(x1) */ 38 if (!coloring->fset) { 39 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 40 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 41 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 42 } else { 43 coloring->fset = PETSC_FALSE; 44 } 45 46 if (!coloring->w3) { 47 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 48 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 49 } 50 w3 = coloring->w3; 51 52 /* Compute local scale factors: vscale = dx */ 53 if (coloring->htype[0] == 'w') { 54 /* tacky test; need to make systematic if we add other approaches to computing h*/ 55 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 56 dx = (1.0 + unorm)*epsilon; 57 ierr = VecSet(coloring->vscale,dx);CHKERRQ(ierr); 58 } else { 59 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 60 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 61 for (col=0; col<N; col++) { 62 dx = xx[col]; 63 if (dx == 0.0) dx = 1.0; 64 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 65 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 66 dx *= epsilon; 67 vscale_array[col] = dx; 68 } 69 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 70 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 71 } 72 73 nz = 0; 74 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 75 for (k=0; k<coloring->ncolors; k++) { /* Loop over each color */ 76 coloring->currentcolor = k; 77 78 /* Compute w3 = x1 + dx */ 79 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 80 dy_i = dy; 81 for (i=0; i<bs; i++) { /* Loop over a block of columns */ 82 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 83 for (l=0; l<coloring->ncolumns[k]; l++) { 84 col = i + bs*coloring->columns[k][l]; 85 w3_array[col] += vscale_array[col]; 86 if (i) { 87 w3_array[col-1] -= vscale_array[col-1]; /* resume original w3[col-1] */ 88 } 89 } 90 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 91 92 /* Evaluate function w2 = F(w3) - F(x1) */ 93 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 94 ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */ 95 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 96 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 97 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 98 ierr = VecResetArray(w2);CHKERRQ(ierr); 99 dy_i += N; /* points to dy+i*N */ 100 } 101 102 /* Loop over row blocks, putting dy/dx into Jacobian matrix */ 103 for (l=0; l<coloring->nrows[k]; l++) { 104 row = bs*Jentry[nz].row; 105 col = bs*Jentry[nz].col; 106 valaddr = Jentry[nz].valaddr; 107 nz++; 108 spidx = 0; 109 dy_i = dy; 110 for (i=0; i<bs; i++) { /* column of the block */ 111 for (j=0; j<bs; j++) { /* row of the block */ 112 valaddr[spidx++] = dy_i[row+j]/vscale_array[col+i]; 113 } 114 dy_i += N; /* points to dy+i*N */ 115 } 116 } 117 } /* endof for each color */ 118 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 119 120 coloring->currentcolor = -1; 121 PetscFunctionReturn(0); 122 } 123 124 extern PetscErrorCode MatFDColoringApply_AIJ_new(Mat,MatFDColoring,Vec,MatStructure*,void*); 125 /* also used for SeqBAIJ matrices */ 126 #undef __FUNCT__ 127 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ_new" 128 PetscErrorCode MatFDColoringCreate_SeqAIJ_new(Mat mat,ISColoring iscoloring,MatFDColoring c) 129 { 130 PetscErrorCode ierr; 131 PetscInt i,n,nrows,N,j,k,m,ncols,col; 132 const PetscInt *is,*row,*ci,*cj; 133 PetscInt nis=iscoloring->n,*rowhit,bs,bs2,*spidx,nz; 134 IS *isa; 135 PetscBool isBAIJ; 136 Mat_SeqAIJ *spA = (Mat_SeqAIJ*)mat->data; 137 PetscScalar *A_val=spA->a; 138 PetscScalar **valaddrhit; 139 MatEntry *Jentry; 140 141 PetscFunctionBegin; 142 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 143 144 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 145 SeqBAIJ calls this routine, thus check it */ 146 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 147 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&isBAIJ);CHKERRQ(ierr); 148 if (!isBAIJ) { 149 bs = 1; /* only bs=1 is supported for non SEQBAIJ matrix */ 150 } 151 N = mat->cmap->N/bs; 152 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 153 c->N = mat->cmap->N/bs; 154 c->m = mat->rmap->N/bs; 155 c->rstart = 0; 156 157 c->ncolors = nis; 158 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 159 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 160 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 161 ierr = PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));CHKERRQ(ierr); 162 163 ierr = PetscMalloc(spA->nz*sizeof(MatEntry),&Jentry);CHKERRQ(ierr); 164 ierr = PetscLogObjectMemory((PetscObject)c,spA->nz*sizeof(MatEntry));CHKERRQ(ierr); 165 c->matentry = Jentry; 166 167 if (isBAIJ) { 168 ierr = MatGetColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 169 } else { 170 ierr = MatGetColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 171 } 172 173 ierr = PetscMalloc2(c->m,PetscInt,&rowhit,c->m,PetscScalar*,&valaddrhit);CHKERRQ(ierr); 174 ierr = PetscMemzero(rowhit,c->m*sizeof(PetscInt));CHKERRQ(ierr); 175 176 nz = 0; 177 for (i=0; i<nis; i++) { /* loop over colors */ 178 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 179 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 180 181 c->ncolumns[i] = n; 182 if (n) { 183 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 184 ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr); 185 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 186 } else { 187 c->columns[i] = 0; 188 } 189 190 /* fast, crude version requires O(N*N) work */ 191 bs2 = bs*bs; 192 nrows = 0; 193 for (j=0; j<n; j++) { /* loop over columns */ 194 col = is[j]; 195 row = cj + ci[col]; 196 m = ci[col+1] - ci[col]; 197 nrows += m; 198 for (k=0; k<m; k++) { /* loop over columns marking them in rowhit */ 199 rowhit[*row] = col + 1; 200 valaddrhit[*row++] = &A_val[bs2*spidx[ci[col] + k]]; 201 } 202 } 203 c->nrows[i] = nrows; /* total num of rows for this color */ 204 205 for (j=0; j<N; j++) { /* loop over rows */ 206 if (rowhit[j]) { 207 Jentry[nz].row = j; /* local row index */ 208 Jentry[nz].col = rowhit[j] - 1; /* local column index */ 209 Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 210 nz++; 211 rowhit[j] = 0.0; /* zero rowhit for reuse */ 212 } 213 } 214 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 215 } 216 if (nz != spA->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz,spA->nz); 217 218 if (isBAIJ) { 219 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 220 } else { 221 ierr = MatRestoreColumnIJ_SeqAIJ_Color(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&spidx,NULL);CHKERRQ(ierr); 222 } 223 ierr = PetscFree2(rowhit,valaddrhit);CHKERRQ(ierr); 224 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 225 226 c->ctype = IS_COLORING_GHOSTED; 227 if (isBAIJ) { 228 mat->ops->fdcoloringapply = MatFDColoringApply_SeqBAIJ; 229 ierr = PetscMalloc(bs*mat->cmap->N*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr); 230 } else { 231 mat->ops->fdcoloringapply = MatFDColoringApply_AIJ_new; 232 } 233 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 234 PetscFunctionReturn(0); 235 } 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "MatFDColoringCreate_SeqAIJ" 239 /* 240 This routine is shared by AIJ and BAIJ matrices, since it operators only on the nonzero structure of the elements or blocks. 241 This is why it has the ugly code with the MatGetBlockSize() 242 */ 243 PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 244 { 245 PetscErrorCode ierr; 246 PetscInt i,n,nrows,N,j,k,m,ncols,col; 247 const PetscInt *is,*rows,*ci,*cj; 248 PetscInt nis = iscoloring->n,*rowhit,*columnsforrow,l,bs = 1; 249 IS *isa; 250 PetscBool done,flg = PETSC_FALSE; 251 PetscBool flg1; 252 PetscBool new_impl=PETSC_FALSE; 253 254 PetscFunctionBegin; 255 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 256 if (new_impl){ 257 ierr = MatFDColoringCreate_SeqAIJ_new(mat,iscoloring,c);CHKERRQ(ierr); 258 PetscFunctionReturn(0); 259 } 260 if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled by calls to MatAssemblyBegin/End();"); 261 262 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 263 /* this is ugly way to get blocksize but cannot call MatGetBlockSize() because AIJ can have bs > 1. 264 SeqBAIJ calls this routine, thus check it */ 265 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQBAIJ,&flg1);CHKERRQ(ierr); 266 if (flg1) { 267 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 268 } 269 270 N = mat->cmap->N/bs; 271 c->M = mat->rmap->N/bs; /* set total rows, columns and local rows */ 272 c->N = mat->cmap->N/bs; 273 c->m = mat->rmap->N/bs; 274 c->rstart = 0; 275 276 c->ncolors = nis; 277 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 278 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 279 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 280 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 281 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 282 283 ierr = MatGetColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 284 if (!done) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"MatGetColumnIJ() not supported for matrix type %s",((PetscObject)mat)->type_name); 285 286 /* 287 Temporary option to allow for debugging/testing 288 */ 289 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 290 291 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 292 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 293 294 for (i=0; i<nis; i++) { 295 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 296 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 297 298 c->ncolumns[i] = n; 299 if (n) { 300 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 301 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 302 } else { 303 c->columns[i] = 0; 304 } 305 306 if (!flg) { /* ------------------------------------------------------------------------------*/ 307 /* fast, crude version requires O(N*N) work */ 308 ierr = PetscMemzero(rowhit,N*sizeof(PetscInt));CHKERRQ(ierr); 309 /* loop over columns*/ 310 for (j=0; j<n; j++) { 311 col = is[j]; 312 rows = cj + ci[col]; 313 m = ci[col+1] - ci[col]; 314 /* loop over columns marking them in rowhit */ 315 for (k=0; k<m; k++) { 316 rowhit[*rows++] = col + 1; 317 } 318 } 319 /* count the number of hits */ 320 nrows = 0; 321 for (j=0; j<N; j++) { 322 if (rowhit[j]) nrows++; 323 } 324 c->nrows[i] = nrows; 325 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 326 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 327 nrows = 0; 328 for (j=0; j<N; j++) { 329 if (rowhit[j]) { 330 c->rows[i][nrows] = j; 331 c->columnsforrow[i][nrows] = rowhit[j] - 1; 332 nrows++; 333 } 334 } 335 } else { /*-------------------------------------------------------------------------------*/ 336 /* slow version, using rowhit as a linked list */ 337 PetscInt currentcol,fm,mfm; 338 rowhit[N] = N; 339 nrows = 0; 340 /* loop over columns */ 341 for (j=0; j<n; j++) { 342 col = is[j]; 343 rows = cj + ci[col]; 344 m = ci[col+1] - ci[col]; 345 /* loop over columns marking them in rowhit */ 346 fm = N; /* fm points to first entry in linked list */ 347 for (k=0; k<m; k++) { 348 currentcol = *rows++; 349 /* is it already in the list? */ 350 do { 351 mfm = fm; 352 fm = rowhit[fm]; 353 } while (fm < currentcol); 354 /* not in list so add it */ 355 if (fm != currentcol) { 356 nrows++; 357 columnsforrow[currentcol] = col; 358 /* next three lines insert new entry into linked list */ 359 rowhit[mfm] = currentcol; 360 rowhit[currentcol] = fm; 361 fm = currentcol; 362 /* fm points to present position in list since we know the columns are sorted */ 363 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Detected invalid coloring"); 364 } 365 } 366 c->nrows[i] = nrows; 367 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 368 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 369 /* now store the linked list of rows into c->rows[i] */ 370 nrows = 0; 371 fm = rowhit[N]; 372 do { 373 c->rows[i][nrows] = fm; 374 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 375 fm = rowhit[fm]; 376 } while (fm < N); 377 } /* ---------------------------------------------------------------------------------------*/ 378 ierr = ISRestoreIndices(isa[i],&is);CHKERRQ(ierr); 379 } 380 ierr = MatRestoreColumnIJ(mat,0,PETSC_FALSE,PETSC_FALSE,&ncols,&ci,&cj,&done);CHKERRQ(ierr); 381 382 ierr = PetscFree(rowhit);CHKERRQ(ierr); 383 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 384 385 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 386 /* 387 see the version for MPIAIJ 388 */ 389 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->rmap->n,PETSC_DETERMINE,0,NULL,&c->vscale);CHKERRQ(ierr); 390 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 391 for (k=0; k<c->ncolors; k++) { 392 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 393 for (l=0; l<c->nrows[k]; l++) { 394 col = c->columnsforrow[k][l]; 395 396 c->vscaleforrow[k][l] = col; 397 } 398 } 399 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 400 PetscFunctionReturn(0); 401 } 402 403