1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3 #include <../src/mat/impls/baij/mpi/mpibaij.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatFDColoringApply_BAIJ_new" 7 PetscErrorCode MatFDColoringApply_BAIJ_new(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 8 { 9 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 10 PetscErrorCode ierr; 11 PetscInt k,cstart,cend,l,row,col,nz,spidx,i,j; 12 PetscScalar dx=0.0,*y,*xx,*w3_array,*dy_i,*dy=coloring->dy; 13 PetscScalar *vscale_array; 14 PetscReal epsilon=coloring->error_rel,umin=coloring->umin,unorm; 15 Vec w1=coloring->w1,w2=coloring->w2,w3,vscale=coloring->vscale; 16 void *fctx=coloring->fctx; 17 PetscBool flg=PETSC_FALSE; 18 PetscInt ctype=coloring->ctype,nxloc,nrows_k; 19 Mat_MPIBAIJ *aij=(Mat_MPIBAIJ*)J->data; 20 PetscScalar *valaddr; 21 MatEntry *Jentry=coloring->matentry; 22 const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 23 PetscInt bs=J->rmap->bs; 24 25 PetscFunctionBegin; 26 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 27 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 28 if (flg) { 29 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 30 } else { 31 PetscBool assembled; 32 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 33 if (assembled) { 34 ierr = MatZeroEntries(J);CHKERRQ(ierr); 35 } 36 } 37 38 /* create vscale for storing dx */ 39 if (!vscale) { 40 if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') { 41 ierr = VecCreateGhost(PetscObjectComm((PetscObject)J),J->cmap->n,PETSC_DETERMINE,aij->B->cmap->n,aij->garray,&vscale);CHKERRQ(ierr); 42 43 } else if (ctype == IS_COLORING_GHOSTED) { 44 ierr = VecDuplicate(x1,&vscale);CHKERRQ(ierr); 45 } 46 coloring->vscale = vscale; 47 } 48 49 /* (1) Set w1 = F(x1) */ 50 if (!coloring->fset) { 51 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 52 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 53 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 54 } else { 55 coloring->fset = PETSC_FALSE; 56 } 57 58 /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */ 59 ierr = VecGetLocalSize(x1,&nxloc);CHKERRQ(ierr); 60 //PetscMPIInt rank; 61 //ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 62 //printf("[%d] nxloc %d\n",rank,nxloc); 63 if (coloring->htype[0] == 'w') { 64 /* vscale = dx is a constant scalar */ 65 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 66 dx = 1.0/(PetscSqrtReal(1.0 + unorm)*epsilon); 67 } else { 68 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 69 ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr); 70 for (col=0; col<nxloc; col++) { 71 dx = xx[col]; 72 if (PetscAbsScalar(dx) < umin) { 73 if (PetscRealPart(dx) >= 0.0) dx = umin; 74 else if (PetscRealPart(dx) < 0.0 ) dx = -umin; 75 } 76 dx *= epsilon; 77 vscale_array[col] = 1.0/dx; 78 } 79 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 80 ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr); 81 } 82 if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] != 'w') { 83 ierr = VecGhostUpdateBegin(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 84 ierr = VecGhostUpdateEnd(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 85 } 86 87 /* (3) Loop over each color */ 88 if (!coloring->w3) { 89 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 90 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 91 } 92 w3 = coloring->w3; 93 94 ierr = VecGetOwnershipRange(x1,&cstart,&cend);CHKERRQ(ierr); /* used by ghosted vscale */ 95 if (vscale) { 96 ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr); 97 } 98 nz = 0; 99 for (k=0; k<ncolors; k++) { 100 coloring->currentcolor = k; 101 102 /* 103 (3-1) Loop over each column associated with color 104 adding the perturbation to the vector w3 = x1 + dx. 105 */ 106 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 107 dy_i = dy; 108 for (i=0; i<bs; i++) { /* Loop over a block of columns */ 109 //------------------------------------------------ 110 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 111 if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */ 112 if (coloring->htype[0] == 'w') { 113 for (l=0; l<ncolumns[k]; l++) { 114 col = i + bs*coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 115 w3_array[col] += 1.0/dx; 116 117 if (i) { 118 w3_array[col-1] -= 1.0/dx; /* resume original w3[col-1] */ 119 } 120 121 } 122 } else { /* htype == 'ds' */ 123 vscale_array -= cstart; /* shift pointer so global index can be used */ 124 for (l=0; l<ncolumns[k]; l++) { 125 col = i + bs*coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 126 w3_array[col] += 1.0/vscale_array[col]; 127 128 if (i) { 129 w3_array[col-1] -= 1.0/vscale_array[col-1]; /* resume original w3[col-1] */ 130 } 131 132 } 133 vscale_array += cstart; 134 } 135 if (ctype == IS_COLORING_GLOBAL) w3_array += cstart; 136 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 137 138 /* 139 (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 140 w2 = F(x1 + dx) - F(x1) 141 */ 142 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 143 ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */ 144 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 145 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 146 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 147 //--------------------------------------------- 148 ierr = VecResetArray(w2);CHKERRQ(ierr); 149 dy_i += nxloc; /* points to dy+i*nxloc */ 150 } 151 152 /* 153 (3-3) Loop over rows of vector, putting results into Jacobian matrix 154 */ 155 nrows_k = nrows[k]; 156 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 157 if (coloring->htype[0] == 'w') { 158 for (l=0; l<nrows_k; l++) { 159 row = bs*Jentry[nz].row; /* local row index */ 160 valaddr = Jentry[nz].valaddr; 161 nz++; 162 spidx = 0; 163 dy_i = dy; 164 for (i=0; i<bs; i++) { /* column of the block */ 165 for (j=0; j<bs; j++) { /* row of the block */ 166 valaddr[spidx++] = dy_i[row+j]*dx; 167 } 168 dy_i += nxloc; /* points to dy+i*nxloc */ 169 } 170 } 171 } else { /* htype == 'ds' */ 172 for (l=0; l<nrows_k; l++) { 173 row = bs*Jentry[nz].row; /* local row index */ 174 col = bs*Jentry[nz].col; /* local column index */ 175 valaddr = Jentry[nz].valaddr; 176 nz++; 177 spidx = 0; 178 dy_i = dy; 179 for (i=0; i<bs; i++) { /* column of the block */ 180 for (j=0; j<bs; j++) { /* row of the block */ 181 valaddr[spidx++] = dy_i[row+j]*vscale_array[col+i]; 182 } 183 dy_i += nxloc; /* points to dy+i*nxloc */ 184 } 185 } 186 } 187 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 188 } 189 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 190 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 191 if (vscale) { 192 ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr); 193 } 194 195 coloring->currentcolor = -1; 196 PetscFunctionReturn(0); 197 } 198 199 extern PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat); 200 #undef __FUNCT__ 201 #define __FUNCT__ "MatFDColoringApply_AIJ_new" 202 PetscErrorCode MatFDColoringApply_AIJ_new(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 203 { 204 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 205 PetscErrorCode ierr; 206 PetscInt k,cstart,cend,l,row,col,nz; 207 PetscScalar dx=0.0,*y,*xx,*w3_array; 208 PetscScalar *vscale_array; 209 PetscReal epsilon=coloring->error_rel,umin=coloring->umin,unorm; 210 Vec w1=coloring->w1,w2=coloring->w2,w3,vscale=coloring->vscale; 211 void *fctx=coloring->fctx; 212 PetscBool flg=PETSC_FALSE; 213 PetscInt ctype=coloring->ctype,nxloc,nrows_k; 214 Mat_MPIAIJ *aij=(Mat_MPIAIJ*)J->data; 215 MatEntry *Jentry=coloring->matentry; 216 const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 217 218 PetscFunctionBegin; 219 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 220 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 221 if (flg) { 222 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 223 } else { 224 PetscBool assembled; 225 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 226 if (assembled) { 227 ierr = MatZeroEntries(J);CHKERRQ(ierr); 228 } 229 } 230 231 /* create vscale for storing dx */ 232 if (!vscale) { 233 if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') { 234 ierr = VecCreateGhost(PetscObjectComm((PetscObject)J),J->cmap->n,PETSC_DETERMINE,aij->B->cmap->n,aij->garray,&vscale);CHKERRQ(ierr); 235 } else if (ctype == IS_COLORING_GHOSTED) { 236 ierr = VecDuplicate(x1,&vscale);CHKERRQ(ierr); 237 } 238 coloring->vscale = vscale; 239 } 240 241 /* (1) Set w1 = F(x1) */ 242 if (!coloring->fset) { 243 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 244 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 245 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 246 } else { 247 coloring->fset = PETSC_FALSE; 248 } 249 250 /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */ 251 if (coloring->htype[0] == 'w') { 252 /* vscale = dx is a constant scalar */ 253 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 254 dx = 1.0/(PetscSqrtReal(1.0 + unorm)*epsilon); 255 } else { 256 ierr = VecGetLocalSize(x1,&nxloc);CHKERRQ(ierr); 257 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 258 ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr); 259 for (col=0; col<nxloc; col++) { 260 dx = xx[col]; 261 if (PetscAbsScalar(dx) < umin) { 262 if (PetscRealPart(dx) >= 0.0) dx = umin; 263 else if (PetscRealPart(dx) < 0.0 ) dx = -umin; 264 } 265 dx *= epsilon; 266 vscale_array[col] = 1.0/dx; 267 } 268 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 269 ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr); 270 } 271 if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] != 'w') { 272 ierr = VecGhostUpdateBegin(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 273 ierr = VecGhostUpdateEnd(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 274 } 275 276 /* (3) Loop over each color */ 277 if (!coloring->w3) { 278 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 279 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 280 } 281 w3 = coloring->w3; 282 283 ierr = VecGetOwnershipRange(x1,&cstart,&cend);CHKERRQ(ierr); /* used by ghosted vscale */ 284 if (vscale) { 285 ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr); 286 } 287 nz = 0; 288 for (k=0; k<ncolors; k++) { 289 coloring->currentcolor = k; 290 291 /* 292 (3-1) Loop over each column associated with color 293 adding the perturbation to the vector w3 = x1 + dx. 294 */ 295 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 296 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 297 if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */ 298 if (coloring->htype[0] == 'w') { 299 for (l=0; l<ncolumns[k]; l++) { 300 col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 301 w3_array[col] += 1.0/dx; 302 } 303 } else { /* htype == 'ds' */ 304 vscale_array -= cstart; /* shift pointer so global index can be used */ 305 for (l=0; l<ncolumns[k]; l++) { 306 col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 307 w3_array[col] += 1.0/vscale_array[col]; 308 } 309 vscale_array += cstart; 310 } 311 if (ctype == IS_COLORING_GLOBAL) w3_array += cstart; 312 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 313 314 /* 315 (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 316 w2 = F(x1 + dx) - F(x1) 317 */ 318 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 319 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 320 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 321 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 322 323 /* 324 (3-3) Loop over rows of vector, putting results into Jacobian matrix 325 */ 326 nrows_k = nrows[k]; 327 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 328 if (coloring->htype[0] == 'w') { 329 for (l=0; l<nrows_k; l++) { 330 row = Jentry[nz].row; /* local row index */ 331 *(Jentry[nz++].valaddr) = y[row]*dx; 332 } 333 } else { /* htype == 'ds' */ 334 for (l=0; l<nrows_k; l++) { 335 row = Jentry[nz].row; /* local row index */ 336 *(Jentry[nz].valaddr) = y[row]*vscale_array[Jentry[nz].col]; 337 nz++; 338 } 339 } 340 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 341 } 342 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 if (vscale) { 345 ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr); 346 } 347 348 coloring->currentcolor = -1; 349 PetscFunctionReturn(0); 350 } 351 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatFDColoringCreate_MPIAIJ_new" 354 PetscErrorCode MatFDColoringCreate_MPIAIJ_new(Mat mat,ISColoring iscoloring,MatFDColoring c) 355 { 356 //Mat_MPIAIJ *aij=(Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscMPIInt size,*ncolsonproc,*disp,nn; 359 PetscInt i,n,nrows,nrows_i,j,k,m,ncols,col; 360 const PetscInt *is,*A_ci,*A_cj,*B_ci,*B_cj,*row = NULL,*ltog=NULL; 361 PetscInt nis=iscoloring->n,nctot,*cols; 362 PetscInt *rowhit,cstart,cend,colb; 363 IS *isa; 364 ISLocalToGlobalMapping map=mat->cmap->mapping; 365 PetscInt ctype=c->ctype; 366 Mat A,B; 367 //Mat A=aij->A,B=aij->B; 368 //Mat_SeqAIJ *spA=(Mat_SeqAIJ*)A->data,*spB=(Mat_SeqAIJ*)B->data; 369 //PetscScalar *A_val=spA->a,*B_val=spB->a; 370 PetscScalar *A_val,*B_val; 371 PetscInt spidx; 372 PetscInt *spidxA,*spidxB,nz,bs,bs2; 373 PetscScalar **valaddrhit; 374 MatEntry *Jentry; 375 PetscBool isBAIJ; 376 #if defined(PETSC_USE_CTABLE) 377 PetscTable colmap=NULL; 378 #else 379 PetscInt *colmap=NULL; /* local col number of off-diag col */ 380 #endif 381 382 PetscFunctionBegin; 383 if (ctype == IS_COLORING_GHOSTED) { 384 if (!map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping"); 385 ierr = ISLocalToGlobalMappingGetIndices(map,<og);CHKERRQ(ierr); 386 } 387 388 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 389 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr); 390 if (!isBAIJ) { 391 bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */ 392 Mat_MPIAIJ *aij=(Mat_MPIAIJ*)mat->data; 393 A=aij->A; B=aij->B; 394 Mat_SeqAIJ *spA=(Mat_SeqAIJ*)A->data,*spB=(Mat_SeqAIJ*)B->data; 395 A_val=spA->a; B_val=spB->a; 396 nz = spA->nz + spB->nz; /* total nonzero entries of mat */ 397 if (!aij->colmap) { 398 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 399 colmap = aij->colmap; 400 } 401 ierr = MatGetColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 402 ierr = MatGetColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 403 } else { 404 Mat_MPIBAIJ *aij=(Mat_MPIBAIJ*)mat->data; 405 A=aij->A; B=aij->B; 406 Mat_SeqBAIJ *spA=(Mat_SeqBAIJ*)A->data,*spB=(Mat_SeqBAIJ*)B->data; 407 A_val=spA->a; B_val=spB->a; 408 nz = spA->nz + spB->nz; /* total nonzero entries of mat */ 409 if (!aij->colmap) { 410 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 411 colmap = aij->colmap; 412 } 413 ierr = MatGetColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 414 ierr = MatGetColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 415 } 416 417 m = mat->rmap->n/bs; 418 cstart = mat->cmap->rstart/bs; 419 cend = mat->cmap->rend/bs; 420 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 421 c->N = mat->cmap->N/bs; 422 c->m = m; 423 c->rstart = mat->rmap->rstart/bs; 424 425 c->ncolors = nis; 426 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 427 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 428 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 429 ierr = PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));CHKERRQ(ierr); 430 431 //nz = spA->nz + spB->nz; /* total nonzero entries of mat */ 432 ierr = PetscMalloc(nz*sizeof(MatEntry),&Jentry);CHKERRQ(ierr); 433 ierr = PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry));CHKERRQ(ierr); 434 c->matentry = Jentry; 435 436 /* Allow access to data structures of local part of matrix 437 - creates aij->colmap which maps global column number to local number in part B */ 438 //if (!aij->colmap) { 439 // ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 440 //} 441 //ierr = MatGetColumnIJ_SeqAIJ_Color(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 442 //ierr = MatGetColumnIJ_SeqAIJ_Color(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 443 444 ierr = PetscMalloc2(m+1,PetscInt,&rowhit,m+1,PetscScalar*,&valaddrhit);CHKERRQ(ierr); 445 nz = 0; 446 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 447 for (i=0; i<nis; i++) { /* for each local color */ 448 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 449 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 450 451 c->ncolumns[i] = n; /* local number of columns of this color on this process */ 452 if (n) { 453 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 454 ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr); 455 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 456 /* convert global column indices to local ones! */ 457 458 } else { 459 c->columns[i] = 0; 460 } 461 462 if (ctype == IS_COLORING_GLOBAL) { 463 /* Determine nctot, the total (parallel) number of columns of this color */ 464 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 465 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 466 467 /* ncolsonproc[j]: local ncolumns on proc[j] of this color */ 468 ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); 469 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 470 nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; 471 if (!nctot) { 472 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 473 } 474 475 disp[0] = 0; 476 for (j=1; j<size; j++) { 477 disp[j] = disp[j-1] + ncolsonproc[j-1]; 478 } 479 480 /* Get cols, the complete list of columns for this color on each process */ 481 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 482 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 483 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 484 } else if (ctype == IS_COLORING_GHOSTED) { 485 /* Determine local number of columns of this color on this process, including ghost points */ 486 nctot = n; 487 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 488 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 489 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 490 491 /* Mark all rows affect by these columns */ 492 ierr = PetscMemzero(rowhit,m*sizeof(PetscInt));CHKERRQ(ierr); 493 bs2 = bs*bs; 494 nrows_i = 0; 495 for (j=0; j<nctot; j++) { /* loop over columns*/ 496 if (ctype == IS_COLORING_GHOSTED) { 497 col = ltog[cols[j]]; 498 } else { 499 col = cols[j]; 500 } 501 if (col >= cstart && col < cend) { /* column is in diagonal block of matrix A */ 502 row = A_cj + A_ci[col-cstart]; 503 nrows = A_ci[col-cstart+1] - A_ci[col-cstart]; 504 nrows_i += nrows; 505 /* loop over columns of A marking them in rowhit */ 506 for (k=0; k<nrows; k++) { 507 /* set valaddrhit for part A */ 508 spidx = spidxA[A_ci[col-cstart] + k]; 509 valaddrhit[*row] = &A_val[bs2*spidx]; 510 rowhit[*row++] = col - cstart + 1; /* local column index */ 511 } 512 } else { /* column is in off-diagonal block of matrix B */ 513 #if defined(PETSC_USE_CTABLE) 514 //ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr); 515 ierr = PetscTableFind(colmap,col+1,&colb);CHKERRQ(ierr); 516 colb--; 517 #else 518 //colb = aij->colmap[col] - 1; /* local column index */ 519 colb = colmap[col] - 1; /* local column index */ 520 #endif 521 if (colb == -1) { 522 nrows = 0; 523 } else { 524 colb = colb/bs; 525 row = B_cj + B_ci[colb]; 526 nrows = B_ci[colb+1] - B_ci[colb]; 527 } 528 nrows_i += nrows; 529 /* loop over columns of B marking them in rowhit */ 530 for (k=0; k<nrows; k++) { 531 /* set valaddrhit for part B */ 532 spidx = spidxB[B_ci[colb] + k]; 533 valaddrhit[*row] = &B_val[bs2*spidx]; 534 rowhit[*row++] = colb + 1 + cend - cstart; /* local column index */ 535 } 536 } 537 } 538 c->nrows[i] = nrows_i; 539 540 for (j=0; j<m; j++) { 541 if (rowhit[j]) { 542 Jentry[nz].row = j; /* local row index */ 543 Jentry[nz].col = rowhit[j] - 1; /* local column index */ 544 Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 545 nz++; 546 } 547 } 548 ierr = PetscFree(cols);CHKERRQ(ierr); 549 } 550 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 551 //if (nz != spA->nz + spB->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz,spA->nz+spB->nz); 552 553 ierr = PetscFree2(rowhit,valaddrhit);CHKERRQ(ierr); 554 if (isBAIJ) { 555 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 556 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 557 ierr = PetscMalloc(bs*mat->cmap->n*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr); 558 mat->ops->fdcoloringapply = MatFDColoringApply_BAIJ_new; 559 } else { 560 ierr = MatRestoreColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 561 ierr = MatRestoreColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 562 mat->ops->fdcoloringapply = MatFDColoringApply_AIJ_new; 563 } 564 565 if (ctype == IS_COLORING_GHOSTED) { 566 ierr = ISLocalToGlobalMappingRestoreIndices(map,<og);CHKERRQ(ierr); 567 } 568 PetscFunctionReturn(0); 569 } 570 571 /*------------------------------------------------------*/ 572 #undef __FUNCT__ 573 #define __FUNCT__ "MatFDColoringCreate_MPIAIJ" 574 PetscErrorCode MatFDColoringCreate_MPIAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 575 { 576 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 577 PetscErrorCode ierr; 578 PetscMPIInt size,*ncolsonproc,*disp,nn; 579 PetscInt i,n,nrows,j,k,m,ncols,col; 580 const PetscInt *is,*A_ci,*A_cj,*B_ci,*B_cj,*rows = 0,*ltog; 581 PetscInt nis = iscoloring->n,nctot,*cols; 582 PetscInt *rowhit,M,cstart,cend,colb; 583 PetscInt *columnsforrow,l; 584 IS *isa; 585 PetscBool done,flg; 586 ISLocalToGlobalMapping map = mat->cmap->mapping; 587 PetscInt ctype=c->ctype; 588 PetscBool new_impl=PETSC_FALSE; 589 590 PetscFunctionBegin; 591 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 592 if (new_impl){ 593 ierr = MatFDColoringCreate_MPIAIJ_new(mat,iscoloring,c);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping"); 597 598 if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,<og);CHKERRQ(ierr);} 599 else ltog = NULL; 600 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 601 602 M = mat->rmap->n; 603 cstart = mat->cmap->rstart; 604 cend = mat->cmap->rend; 605 c->M = mat->rmap->N; /* set the global rows and columns and local rows */ 606 c->N = mat->cmap->N; 607 c->m = mat->rmap->n; 608 c->rstart = mat->rmap->rstart; 609 610 c->ncolors = nis; 611 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 612 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 613 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 614 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 615 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 616 ierr = PetscLogObjectMemory((PetscObject)c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 617 618 /* Allow access to data structures of local part of matrix */ 619 if (!aij->colmap) { 620 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 621 } 622 ierr = MatGetColumnIJ(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 623 ierr = MatGetColumnIJ(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 624 625 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 626 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 627 628 for (i=0; i<nis; i++) { 629 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 630 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 631 632 c->ncolumns[i] = n; /* local number of columns of this color on this process */ 633 if (n) { 634 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 635 ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr); 636 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 637 } else { 638 c->columns[i] = 0; 639 } 640 641 if (ctype == IS_COLORING_GLOBAL) { 642 /* Determine the total (parallel) number of columns of this color */ 643 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 644 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 645 646 ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); 647 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 648 nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; 649 if (!nctot) { 650 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 651 } 652 653 disp[0] = 0; 654 for (j=1; j<size; j++) { 655 disp[j] = disp[j-1] + ncolsonproc[j-1]; 656 } 657 658 /* Get complete list of columns for color on each processor */ 659 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 660 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 661 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 662 } else if (ctype == IS_COLORING_GHOSTED) { 663 /* Determine local number of columns of this color on this process, including ghost points */ 664 nctot = n; 665 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 666 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 667 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 668 669 /* 670 Mark all rows affect by these columns 671 */ 672 /* Temporary option to allow for debugging/testing */ 673 flg = PETSC_FALSE; 674 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 675 if (!flg) { /*-----------------------------------------------------------------------------*/ 676 /* crude, fast version */ 677 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 678 /* loop over columns*/ 679 for (j=0; j<nctot; j++) { 680 if (ctype == IS_COLORING_GHOSTED) { 681 col = ltog[cols[j]]; 682 } else { 683 col = cols[j]; 684 } 685 if (col >= cstart && col < cend) { 686 /* column is in diagonal block of matrix */ 687 rows = A_cj + A_ci[col-cstart]; 688 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 689 } else { 690 #if defined(PETSC_USE_CTABLE) 691 ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr); 692 colb--; 693 #else 694 colb = aij->colmap[col] - 1; 695 #endif 696 if (colb == -1) { 697 m = 0; 698 } else { 699 rows = B_cj + B_ci[colb]; 700 m = B_ci[colb+1] - B_ci[colb]; 701 } 702 } 703 /* loop over columns marking them in rowhit */ 704 for (k=0; k<m; k++) { 705 rowhit[*rows++] = col + 1; 706 } 707 } 708 709 /* count the number of hits */ 710 nrows = 0; 711 for (j=0; j<M; j++) { 712 if (rowhit[j]) nrows++; 713 } 714 c->nrows[i] = nrows; 715 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 716 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 717 ierr = PetscLogObjectMemory((PetscObject)c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 718 nrows = 0; 719 for (j=0; j<M; j++) { 720 if (rowhit[j]) { 721 c->rows[i][nrows] = j; /* local row index */ 722 c->columnsforrow[i][nrows] = rowhit[j] - 1; /* global column index */ 723 nrows++; 724 } 725 } 726 } else { /*-------------------------------------------------------------------------------*/ 727 /* slow version, using rowhit as a linked list */ 728 PetscInt currentcol,fm,mfm; 729 rowhit[M] = M; 730 nrows = 0; 731 /* loop over columns*/ 732 for (j=0; j<nctot; j++) { 733 if (ctype == IS_COLORING_GHOSTED) { 734 col = ltog[cols[j]]; 735 } else { 736 col = cols[j]; 737 } 738 if (col >= cstart && col < cend) { 739 /* column is in diagonal block of matrix */ 740 rows = A_cj + A_ci[col-cstart]; 741 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 742 } else { 743 #if defined(PETSC_USE_CTABLE) 744 ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr); 745 colb--; 746 #else 747 colb = aij->colmap[col] - 1; 748 #endif 749 if (colb == -1) { 750 m = 0; 751 } else { 752 rows = B_cj + B_ci[colb]; 753 m = B_ci[colb+1] - B_ci[colb]; 754 } 755 } 756 757 /* loop over columns marking them in rowhit */ 758 fm = M; /* fm points to first entry in linked list */ 759 for (k=0; k<m; k++) { 760 currentcol = *rows++; 761 /* is it already in the list? */ 762 do { 763 mfm = fm; 764 fm = rowhit[fm]; 765 } while (fm < currentcol); 766 /* not in list so add it */ 767 if (fm != currentcol) { 768 nrows++; 769 columnsforrow[currentcol] = col; 770 /* next three lines insert new entry into linked list */ 771 rowhit[mfm] = currentcol; 772 rowhit[currentcol] = fm; 773 fm = currentcol; 774 /* fm points to present position in list since we know the columns are sorted */ 775 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 776 } 777 } 778 c->nrows[i] = nrows; 779 780 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 781 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 782 ierr = PetscLogObjectMemory((PetscObject)c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 783 /* now store the linked list of rows into c->rows[i] */ 784 nrows = 0; 785 fm = rowhit[M]; 786 do { 787 c->rows[i][nrows] = fm; 788 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 789 fm = rowhit[fm]; 790 } while (fm < M); 791 } /* ---------------------------------------------------------------------------------------*/ 792 ierr = PetscFree(cols);CHKERRQ(ierr); 793 } 794 795 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 796 /* 797 vscale will contain the "diagonal" on processor scalings followed by the off processor 798 */ 799 if (ctype == IS_COLORING_GLOBAL) { 800 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),aij->A->rmap->n,PETSC_DETERMINE,aij->B->cmap->n,aij->garray,&c->vscale);CHKERRQ(ierr); 801 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 802 for (k=0; k<c->ncolors; k++) { 803 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 804 for (l=0; l<c->nrows[k]; l++) { 805 col = c->columnsforrow[k][l]; 806 if (col >= cstart && col < cend) { 807 /* column is in diagonal block of matrix */ 808 colb = col - cstart; 809 } else { 810 /* column is in "off-processor" part */ 811 #if defined(PETSC_USE_CTABLE) 812 ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr); 813 colb--; 814 #else 815 colb = aij->colmap[col] - 1; 816 #endif 817 colb += cend - cstart; 818 } 819 c->vscaleforrow[k][l] = colb; 820 } 821 } 822 } else if (ctype == IS_COLORING_GHOSTED) { 823 /* Get gtol mapping */ 824 PetscInt N = mat->cmap->N,nlocal,*gtol; 825 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 826 for (i=0; i<N; i++) gtol[i] = -1; 827 ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr); 828 for (i=0; i<nlocal; i++) gtol[ltog[i]] = i; 829 830 c->vscale = 0; /* will be created in MatFDColoringApply() */ 831 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 832 for (k=0; k<c->ncolors; k++) { 833 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 834 for (l=0; l<c->nrows[k]; l++) { 835 col = c->columnsforrow[k][l]; /* global column index */ 836 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 837 } 838 } 839 ierr = PetscFree(gtol);CHKERRQ(ierr); 840 } 841 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 842 843 ierr = PetscFree(rowhit);CHKERRQ(ierr); 844 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 845 ierr = MatRestoreColumnIJ(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 846 ierr = MatRestoreColumnIJ(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 847 if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,<og);CHKERRQ(ierr);} 848 PetscFunctionReturn(0); 849 } 850 851 852 853 854 855 856