1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3 #include <../src/mat/impls/baij/mpi/mpibaij.h> 4 #include <petsc-private/isimpl.h> 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatFDColoringApply_BAIJ" 8 PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,void *sctx) 9 { 10 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 11 PetscErrorCode ierr; 12 PetscInt k,cstart,cend,l,row,col,nz,spidx,i,j; 13 PetscScalar dx=0.0,*xx,*w3_array,*dy_i,*dy=coloring->dy; 14 PetscScalar *vscale_array; 15 PetscReal epsilon=coloring->error_rel,umin=coloring->umin,unorm; 16 Vec w1=coloring->w1,w2=coloring->w2,w3,vscale=coloring->vscale; 17 void *fctx=coloring->fctx; 18 PetscInt ctype=coloring->ctype,nxloc,nrows_k; 19 PetscScalar *valaddr; 20 MatEntry *Jentry=coloring->matentry; 21 MatEntry2 *Jentry2=coloring->matentry2; 22 const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 23 PetscInt bs=J->rmap->bs; 24 25 PetscFunctionBegin; 26 /* (1) Set w1 = F(x1) */ 27 if (!coloring->fset) { 28 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 29 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 30 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 31 } else { 32 coloring->fset = PETSC_FALSE; 33 } 34 35 /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */ 36 ierr = VecGetLocalSize(x1,&nxloc);CHKERRQ(ierr); 37 if (coloring->htype[0] == 'w') { 38 /* vscale = dx is a constant scalar */ 39 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 40 dx = 1.0/(PetscSqrtReal(1.0 + unorm)*epsilon); 41 } else { 42 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 43 ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr); 44 for (col=0; col<nxloc; col++) { 45 dx = xx[col]; 46 if (PetscAbsScalar(dx) < umin) { 47 if (PetscRealPart(dx) >= 0.0) dx = umin; 48 else if (PetscRealPart(dx) < 0.0 ) dx = -umin; 49 } 50 dx *= epsilon; 51 vscale_array[col] = 1.0/dx; 52 } 53 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 54 ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr); 55 } 56 if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') { 57 ierr = VecGhostUpdateBegin(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 58 ierr = VecGhostUpdateEnd(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 59 } 60 61 /* (3) Loop over each color */ 62 if (!coloring->w3) { 63 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 64 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 65 } 66 w3 = coloring->w3; 67 68 ierr = VecGetOwnershipRange(x1,&cstart,&cend);CHKERRQ(ierr); /* used by ghosted vscale */ 69 if (vscale) { 70 ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr); 71 } 72 nz = 0; 73 for (k=0; k<ncolors; k++) { 74 coloring->currentcolor = k; 75 76 /* 77 (3-1) Loop over each column associated with color 78 adding the perturbation to the vector w3 = x1 + dx. 79 */ 80 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 81 dy_i = dy; 82 for (i=0; i<bs; i++) { /* Loop over a block of columns */ 83 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 84 if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */ 85 if (coloring->htype[0] == 'w') { 86 for (l=0; l<ncolumns[k]; l++) { 87 col = i + bs*coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 88 w3_array[col] += 1.0/dx; 89 if (i) w3_array[col-1] -= 1.0/dx; /* resume original w3[col-1] */ 90 } 91 } else { /* htype == 'ds' */ 92 vscale_array -= cstart; /* shift pointer so global index can be used */ 93 for (l=0; l<ncolumns[k]; l++) { 94 col = i + bs*coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 95 w3_array[col] += 1.0/vscale_array[col]; 96 if (i) w3_array[col-1] -= 1.0/vscale_array[col-1]; /* resume original w3[col-1] */ 97 } 98 vscale_array += cstart; 99 } 100 if (ctype == IS_COLORING_GLOBAL) w3_array += cstart; 101 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 102 103 /* 104 (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 105 w2 = F(x1 + dx) - F(x1) 106 */ 107 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 108 ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */ 109 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 110 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 111 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 112 ierr = VecResetArray(w2);CHKERRQ(ierr); 113 dy_i += nxloc; /* points to dy+i*nxloc */ 114 } 115 116 /* 117 (3-3) Loop over rows of vector, putting results into Jacobian matrix 118 */ 119 nrows_k = nrows[k]; 120 if (coloring->htype[0] == 'w') { 121 for (l=0; l<nrows_k; l++) { 122 row = bs*Jentry2[nz].row; /* local row index */ 123 valaddr = Jentry2[nz++].valaddr; 124 spidx = 0; 125 dy_i = dy; 126 for (i=0; i<bs; i++) { /* column of the block */ 127 for (j=0; j<bs; j++) { /* row of the block */ 128 valaddr[spidx++] = dy_i[row+j]*dx; 129 } 130 dy_i += nxloc; /* points to dy+i*nxloc */ 131 } 132 } 133 } else { /* htype == 'ds' */ 134 for (l=0; l<nrows_k; l++) { 135 row = bs*Jentry[nz].row; /* local row index */ 136 col = bs*Jentry[nz].col; /* local column index */ 137 valaddr = Jentry[nz++].valaddr; 138 spidx = 0; 139 dy_i = dy; 140 for (i=0; i<bs; i++) { /* column of the block */ 141 for (j=0; j<bs; j++) { /* row of the block */ 142 valaddr[spidx++] = dy_i[row+j]*vscale_array[col+i]; 143 } 144 dy_i += nxloc; /* points to dy+i*nxloc */ 145 } 146 } 147 } 148 } 149 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 150 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 151 if (vscale) { 152 ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr); 153 } 154 155 coloring->currentcolor = -1; 156 PetscFunctionReturn(0); 157 } 158 159 #undef __FUNCT__ 160 #define __FUNCT__ "MatFDColoringApply_AIJ" 161 PetscErrorCode MatFDColoringApply_AIJ(Mat J,MatFDColoring coloring,Vec x1,void *sctx) 162 { 163 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 164 PetscErrorCode ierr; 165 PetscInt k,cstart,cend,l,row,col,nz; 166 PetscScalar dx=0.0,*y,*xx,*w3_array; 167 PetscScalar *vscale_array; 168 PetscReal epsilon=coloring->error_rel,umin=coloring->umin,unorm; 169 Vec w1=coloring->w1,w2=coloring->w2,w3,vscale=coloring->vscale; 170 void *fctx=coloring->fctx; 171 PetscInt ctype=coloring->ctype,nxloc,nrows_k; 172 MatEntry *Jentry=coloring->matentry; 173 MatEntry2 *Jentry2=coloring->matentry2; 174 const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 175 176 PetscFunctionBegin; 177 /* (1) Set w1 = F(x1) */ 178 if (!coloring->fset) { 179 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 180 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 181 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 182 } else { 183 coloring->fset = PETSC_FALSE; 184 } 185 186 /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */ 187 if (coloring->htype[0] == 'w') { 188 /* vscale = 1./dx is a constant scalar */ 189 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 190 dx = 1.0/(PetscSqrtReal(1.0 + unorm)*epsilon); 191 } else { 192 ierr = VecGetLocalSize(x1,&nxloc);CHKERRQ(ierr); 193 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 194 ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr); 195 for (col=0; col<nxloc; col++) { 196 dx = xx[col]; 197 if (PetscAbsScalar(dx) < umin) { 198 if (PetscRealPart(dx) >= 0.0) dx = umin; 199 else if (PetscRealPart(dx) < 0.0 ) dx = -umin; 200 } 201 dx *= epsilon; 202 vscale_array[col] = 1.0/dx; 203 } 204 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 205 ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr); 206 } 207 if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') { 208 ierr = VecGhostUpdateBegin(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 209 ierr = VecGhostUpdateEnd(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 210 } 211 212 /* (3) Loop over each color */ 213 if (!coloring->w3) { 214 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 215 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 216 } 217 w3 = coloring->w3; 218 219 ierr = VecGetOwnershipRange(x1,&cstart,&cend);CHKERRQ(ierr); /* used by ghosted vscale */ 220 if (vscale) { 221 ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr); 222 } 223 nz = 0; 224 225 if (coloring->bcols > 1) { /* use blocked insertion of Jentry */ 226 PetscInt i,m=J->rmap->n,nbcols,bcols=coloring->bcols; 227 PetscScalar *dy=coloring->dy,*dy_k; 228 229 nbcols = 0; 230 for (k=0; k<ncolors; k+=bcols) { 231 coloring->currentcolor = k; 232 233 /* 234 (3-1) Loop over each column associated with color 235 adding the perturbation to the vector w3 = x1 + dx. 236 */ 237 238 dy_k = dy; 239 if (k + bcols > ncolors) bcols = ncolors - k; 240 for (i=0; i<bcols; i++) { 241 242 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 243 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 244 if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */ 245 if (coloring->htype[0] == 'w') { 246 for (l=0; l<ncolumns[k+i]; l++) { 247 col = coloring->columns[k+i][l]; /* local column (in global index!) of the matrix we are probing for */ 248 w3_array[col] += 1.0/dx; 249 } 250 } else { /* htype == 'ds' */ 251 vscale_array -= cstart; /* shift pointer so global index can be used */ 252 for (l=0; l<ncolumns[k+i]; l++) { 253 col = coloring->columns[k+i][l]; /* local column (in global index!) of the matrix we are probing for */ 254 w3_array[col] += 1.0/vscale_array[col]; 255 } 256 vscale_array += cstart; 257 } 258 if (ctype == IS_COLORING_GLOBAL) w3_array += cstart; 259 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 260 261 /* 262 (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 263 w2 = F(x1 + dx) - F(x1) 264 */ 265 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 266 ierr = VecPlaceArray(w2,dy_k);CHKERRQ(ierr); /* place w2 to the array dy_i */ 267 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 268 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 269 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 270 ierr = VecResetArray(w2);CHKERRQ(ierr); 271 dy_k += m; /* points to dy+i*nxloc */ 272 } 273 274 /* 275 (3-3) Loop over block rows of vector, putting results into Jacobian matrix 276 */ 277 nrows_k = nrows[nbcols++]; 278 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 279 280 if (coloring->htype[0] == 'w') { 281 for (l=0; l<nrows_k; l++) { 282 row = Jentry2[nz].row; /* local row index */ 283 *(Jentry2[nz++].valaddr) = dy[row]*dx; 284 } 285 } else { /* htype == 'ds' */ 286 for (l=0; l<nrows_k; l++) { 287 row = Jentry[nz].row; /* local row index */ 288 *(Jentry[nz].valaddr) = dy[row]*vscale_array[Jentry[nz].col]; 289 nz++; 290 } 291 } 292 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 293 } 294 } else { /* bcols == 1 */ 295 for (k=0; k<ncolors; k++) { 296 coloring->currentcolor = k; 297 298 /* 299 (3-1) Loop over each column associated with color 300 adding the perturbation to the vector w3 = x1 + dx. 301 */ 302 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 303 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 304 if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */ 305 if (coloring->htype[0] == 'w') { 306 for (l=0; l<ncolumns[k]; l++) { 307 col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 308 w3_array[col] += 1.0/dx; 309 } 310 } else { /* htype == 'ds' */ 311 vscale_array -= cstart; /* shift pointer so global index can be used */ 312 for (l=0; l<ncolumns[k]; l++) { 313 col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 314 w3_array[col] += 1.0/vscale_array[col]; 315 } 316 vscale_array += cstart; 317 } 318 if (ctype == IS_COLORING_GLOBAL) w3_array += cstart; 319 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 320 321 /* 322 (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 323 w2 = F(x1 + dx) - F(x1) 324 */ 325 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 326 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 327 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 328 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 329 330 /* 331 (3-3) Loop over rows of vector, putting results into Jacobian matrix 332 */ 333 nrows_k = nrows[k]; 334 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 335 if (coloring->htype[0] == 'w') { 336 for (l=0; l<nrows_k; l++) { 337 row = Jentry2[nz].row; /* local row index */ 338 *(Jentry2[nz++].valaddr) = y[row]*dx; 339 } 340 } else { /* htype == 'ds' */ 341 for (l=0; l<nrows_k; l++) { 342 row = Jentry[nz].row; /* local row index */ 343 *(Jentry[nz].valaddr) = y[row]*vscale_array[Jentry[nz].col]; 344 nz++; 345 } 346 } 347 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 348 } 349 } 350 351 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 352 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 353 if (vscale) { 354 ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr); 355 } 356 coloring->currentcolor = -1; 357 PetscFunctionReturn(0); 358 } 359 360 #undef __FUNCT__ 361 #define __FUNCT__ "MatFDColoringSetUp_MPIXAIJ" 362 PetscErrorCode MatFDColoringSetUp_MPIXAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 363 { 364 PetscErrorCode ierr; 365 PetscMPIInt size,*ncolsonproc,*disp,nn; 366 PetscInt i,n,nrows,nrows_i,j,k,m,ncols,col,*rowhit,cstart,cend,colb; 367 const PetscInt *is,*A_ci,*A_cj,*B_ci,*B_cj,*row=NULL,*ltog=NULL; 368 PetscInt nis=iscoloring->n,nctot,*cols; 369 IS *isa; 370 ISLocalToGlobalMapping map=mat->cmap->mapping; 371 PetscInt ctype=c->ctype,*spidxA,*spidxB,nz,bs,bs2,spidx; 372 Mat A,B; 373 PetscScalar *A_val,*B_val,**valaddrhit; 374 MatEntry *Jentry; 375 MatEntry2 *Jentry2; 376 PetscBool isBAIJ; 377 PetscInt bcols=c->bcols; 378 #if defined(PETSC_USE_CTABLE) 379 PetscTable colmap=NULL; 380 #else 381 PetscInt *colmap=NULL; /* local col number of off-diag col */ 382 #endif 383 384 PetscFunctionBegin; 385 if (ctype == IS_COLORING_GHOSTED) { 386 if (!map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping"); 387 ierr = ISLocalToGlobalMappingGetIndices(map,<og);CHKERRQ(ierr); 388 } 389 390 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 391 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr); 392 if (isBAIJ) { 393 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 394 Mat_SeqBAIJ *spA,*spB; 395 A = baij->A; spA = (Mat_SeqBAIJ*)A->data; A_val = spA->a; 396 B = baij->B; spB = (Mat_SeqBAIJ*)B->data; B_val = spB->a; 397 nz = spA->nz + spB->nz; /* total nonzero entries of mat */ 398 if (!baij->colmap) { 399 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 400 colmap = baij->colmap; 401 } 402 ierr = MatGetColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 403 ierr = MatGetColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 404 405 if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */ 406 PetscInt *garray; 407 ierr = PetscMalloc1(B->cmap->n,&garray);CHKERRQ(ierr); 408 for (i=0; i<baij->B->cmap->n/bs; i++) { 409 for (j=0; j<bs; j++) { 410 garray[i*bs+j] = bs*baij->garray[i]+j; 411 } 412 } 413 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->cmap->n,PETSC_DETERMINE,B->cmap->n,garray,&c->vscale);CHKERRQ(ierr); 414 ierr = PetscFree(garray);CHKERRQ(ierr); 415 } 416 } else { 417 Mat_MPIAIJ *aij=(Mat_MPIAIJ*)mat->data; 418 Mat_SeqAIJ *spA,*spB; 419 A = aij->A; spA = (Mat_SeqAIJ*)A->data; A_val = spA->a; 420 B = aij->B; spB = (Mat_SeqAIJ*)B->data; B_val = spB->a; 421 nz = spA->nz + spB->nz; /* total nonzero entries of mat */ 422 if (!aij->colmap) { 423 /* Allow access to data structures of local part of matrix 424 - creates aij->colmap which maps global column number to local number in part B */ 425 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 426 colmap = aij->colmap; 427 } 428 ierr = MatGetColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 429 ierr = MatGetColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 430 431 bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */ 432 433 if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */ 434 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),mat->cmap->n,PETSC_DETERMINE,B->cmap->n,aij->garray,&c->vscale);CHKERRQ(ierr); 435 } 436 } 437 438 m = mat->rmap->n/bs; 439 cstart = mat->cmap->rstart/bs; 440 cend = mat->cmap->rend/bs; 441 442 ierr = PetscMalloc1(nis,&c->ncolumns);CHKERRQ(ierr); 443 ierr = PetscMalloc1(nis,&c->columns);CHKERRQ(ierr); 444 ierr = PetscMalloc1(nis,&c->nrows);CHKERRQ(ierr); 445 ierr = PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));CHKERRQ(ierr); 446 447 if (c->htype[0] == 'd') { 448 ierr = PetscMalloc1(nz,&Jentry);CHKERRQ(ierr); 449 ierr = PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry));CHKERRQ(ierr); 450 c->matentry = Jentry; 451 } else if (c->htype[0] == 'w') { 452 ierr = PetscMalloc1(nz,&Jentry2);CHKERRQ(ierr); 453 ierr = PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry2));CHKERRQ(ierr); 454 c->matentry2 = Jentry2; 455 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"htype is not supported"); 456 457 ierr = PetscMalloc2(m+1,&rowhit,m+1,&valaddrhit);CHKERRQ(ierr); 458 nz = 0; 459 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 460 for (i=0; i<nis; i++) { /* for each local color */ 461 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 462 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 463 464 c->ncolumns[i] = n; /* local number of columns of this color on this process */ 465 if (n) { 466 ierr = PetscMalloc1(n,&c->columns[i]);CHKERRQ(ierr); 467 ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr); 468 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 469 } else { 470 c->columns[i] = 0; 471 } 472 473 if (ctype == IS_COLORING_GLOBAL) { 474 /* Determine nctot, the total (parallel) number of columns of this color */ 475 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 476 ierr = PetscMalloc2(size,&ncolsonproc,size,&disp);CHKERRQ(ierr); 477 478 /* ncolsonproc[j]: local ncolumns on proc[j] of this color */ 479 ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); 480 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 481 nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; 482 if (!nctot) { 483 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 484 } 485 486 disp[0] = 0; 487 for (j=1; j<size; j++) { 488 disp[j] = disp[j-1] + ncolsonproc[j-1]; 489 } 490 491 /* Get cols, the complete list of columns for this color on each process */ 492 ierr = PetscMalloc1(nctot+1,&cols);CHKERRQ(ierr); 493 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 494 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 495 } else if (ctype == IS_COLORING_GHOSTED) { 496 /* Determine local number of columns of this color on this process, including ghost points */ 497 nctot = n; 498 ierr = PetscMalloc1(nctot+1,&cols);CHKERRQ(ierr); 499 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 500 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 501 502 /* Mark all rows affect by these columns */ 503 ierr = PetscMemzero(rowhit,m*sizeof(PetscInt));CHKERRQ(ierr); 504 bs2 = bs*bs; 505 nrows_i = 0; 506 for (j=0; j<nctot; j++) { /* loop over columns*/ 507 if (ctype == IS_COLORING_GHOSTED) { 508 col = ltog[cols[j]]; 509 } else { 510 col = cols[j]; 511 } 512 if (col >= cstart && col < cend) { /* column is in A, diagonal block of mat */ 513 row = A_cj + A_ci[col-cstart]; 514 nrows = A_ci[col-cstart+1] - A_ci[col-cstart]; 515 nrows_i += nrows; 516 /* loop over columns of A marking them in rowhit */ 517 for (k=0; k<nrows; k++) { 518 /* set valaddrhit for part A */ 519 spidx = bs2*spidxA[A_ci[col-cstart] + k]; 520 valaddrhit[*row] = &A_val[spidx]; 521 rowhit[*row++] = col - cstart + 1; /* local column index */ 522 } 523 } else { /* column is in B, off-diagonal block of mat */ 524 #if defined(PETSC_USE_CTABLE) 525 ierr = PetscTableFind(colmap,col+1,&colb);CHKERRQ(ierr); 526 colb--; 527 #else 528 colb = colmap[col] - 1; /* local column index */ 529 #endif 530 if (colb == -1) { 531 nrows = 0; 532 } else { 533 colb = colb/bs; 534 row = B_cj + B_ci[colb]; 535 nrows = B_ci[colb+1] - B_ci[colb]; 536 } 537 nrows_i += nrows; 538 /* loop over columns of B marking them in rowhit */ 539 for (k=0; k<nrows; k++) { 540 /* set valaddrhit for part B */ 541 spidx = bs2*spidxB[B_ci[colb] + k]; 542 valaddrhit[*row] = &B_val[spidx]; 543 rowhit[*row++] = colb + 1 + cend - cstart; /* local column index */ 544 } 545 } 546 } 547 c->nrows[i] = nrows_i; 548 549 if (c->htype[0] == 'd') { 550 for (j=0; j<m; j++) { 551 if (rowhit[j]) { 552 Jentry[nz].row = j; /* local row index */ 553 Jentry[nz].col = rowhit[j] - 1; /* local column index */ 554 Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 555 nz++; 556 } 557 } 558 } else { /* c->htype == 'wp' */ 559 for (j=0; j<m; j++) { 560 if (rowhit[j]) { 561 Jentry2[nz].row = j; /* local row index */ 562 Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 563 nz++; 564 } 565 } 566 } 567 ierr = PetscFree(cols);CHKERRQ(ierr); 568 } 569 570 if (bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */ 571 ierr = MatFDColoringSetUpBlocked_AIJ_Private(mat,c,nz);CHKERRQ(ierr); 572 } 573 574 if (isBAIJ) { 575 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 576 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 577 ierr = PetscMalloc1(bs*mat->rmap->n,&c->dy);CHKERRQ(ierr); 578 } else { 579 ierr = MatRestoreColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 580 ierr = MatRestoreColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 581 } 582 583 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 584 ierr = PetscFree2(rowhit,valaddrhit);CHKERRQ(ierr); 585 586 if (ctype == IS_COLORING_GHOSTED) { 587 ierr = ISLocalToGlobalMappingRestoreIndices(map,<og);CHKERRQ(ierr); 588 } 589 ierr = PetscInfo3(c,"ncolors %D, brows %D and bcols %D are used.\n",c->ncolors,c->brows,c->bcols);CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 } 592 593 #undef __FUNCT__ 594 #define __FUNCT__ "MatFDColoringCreate_MPIXAIJ" 595 PetscErrorCode MatFDColoringCreate_MPIXAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 596 { 597 PetscErrorCode ierr; 598 PetscInt bs,nis=iscoloring->n,m=mat->rmap->n; 599 PetscBool isBAIJ; 600 601 PetscFunctionBegin; 602 /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian; 603 bcols is chosen s.t. dy-array takes 50% of memory space as mat */ 604 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 605 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr); 606 if (isBAIJ) { 607 c->brows = m; 608 c->bcols = 1; 609 } else { /* mpiaij matrix */ 610 /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */ 611 Mat_MPIAIJ *aij=(Mat_MPIAIJ*)mat->data; 612 Mat_SeqAIJ *spA,*spB; 613 Mat A,B; 614 PetscInt nz,brows,bcols; 615 PetscReal mem; 616 617 bs = 1; /* only bs=1 is supported for MPIAIJ matrix */ 618 619 A = aij->A; spA = (Mat_SeqAIJ*)A->data; 620 B = aij->B; spB = (Mat_SeqAIJ*)B->data; 621 nz = spA->nz + spB->nz; /* total local nonzero entries of mat */ 622 mem = nz*(sizeof(PetscScalar) + sizeof(PetscInt)) + 3*m*sizeof(PetscInt); 623 bcols = (PetscInt)(0.5*mem /(m*sizeof(PetscScalar))); 624 brows = 1000/bcols; 625 if (bcols > nis) bcols = nis; 626 if (brows == 0 || brows > m) brows = m; 627 c->brows = brows; 628 c->bcols = bcols; 629 } 630 631 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 632 c->N = mat->cmap->N/bs; 633 c->m = mat->rmap->n/bs; 634 c->rstart = mat->rmap->rstart/bs; 635 c->ncolors = nis; 636 PetscFunctionReturn(0); 637 } 638