1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3 #include <../src/mat/impls/baij/mpi/mpibaij.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "MatFDColoringApply_BAIJ_new" 7 PetscErrorCode MatFDColoringApply_BAIJ_new(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 8 { 9 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 10 PetscErrorCode ierr; 11 PetscInt k,cstart,cend,l,row,col,nz,spidx,i,j; 12 PetscScalar dx=0.0,*y,*xx,*w3_array,*dy_i,*dy=coloring->dy; 13 PetscScalar *vscale_array; 14 PetscReal epsilon=coloring->error_rel,umin=coloring->umin,unorm; 15 Vec w1=coloring->w1,w2=coloring->w2,w3,vscale=coloring->vscale; 16 void *fctx=coloring->fctx; 17 PetscBool flg=PETSC_FALSE; 18 PetscInt ctype=coloring->ctype,nxloc,nrows_k; 19 Mat_MPIBAIJ *aij=(Mat_MPIBAIJ*)J->data; 20 PetscScalar *valaddr; 21 MatEntry *Jentry=coloring->matentry; 22 const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 23 PetscInt bs=J->rmap->bs; 24 25 PetscFunctionBegin; 26 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 27 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 28 if (flg) { 29 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 30 } else { 31 PetscBool assembled; 32 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 33 if (assembled) { 34 ierr = MatZeroEntries(J);CHKERRQ(ierr); 35 } 36 } 37 38 /* create vscale for storing dx */ 39 if (!vscale) { 40 if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') { 41 ierr = VecCreateGhost(PetscObjectComm((PetscObject)J),J->cmap->n,PETSC_DETERMINE,aij->B->cmap->n,aij->garray,&vscale);CHKERRQ(ierr); 42 43 } else if (ctype == IS_COLORING_GHOSTED) { 44 ierr = VecDuplicate(x1,&vscale);CHKERRQ(ierr); 45 } 46 coloring->vscale = vscale; 47 } 48 49 /* (1) Set w1 = F(x1) */ 50 if (!coloring->fset) { 51 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 52 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 53 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 54 } else { 55 coloring->fset = PETSC_FALSE; 56 } 57 58 /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */ 59 ierr = VecGetLocalSize(x1,&nxloc);CHKERRQ(ierr); 60 //PetscMPIInt rank; 61 //ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 62 //printf("[%d] nxloc %d\n",rank,nxloc); 63 if (coloring->htype[0] == 'w') { 64 /* vscale = dx is a constant scalar */ 65 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 66 dx = 1.0/(PetscSqrtReal(1.0 + unorm)*epsilon); 67 } else { 68 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 69 ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr); 70 for (col=0; col<nxloc; col++) { 71 dx = xx[col]; 72 if (PetscAbsScalar(dx) < umin) { 73 if (PetscRealPart(dx) >= 0.0) dx = umin; 74 else if (PetscRealPart(dx) < 0.0 ) dx = -umin; 75 } 76 dx *= epsilon; 77 vscale_array[col] = 1.0/dx; 78 } 79 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 80 ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr); 81 } 82 if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] != 'w') { 83 ierr = VecGhostUpdateBegin(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 84 ierr = VecGhostUpdateEnd(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 85 } 86 87 /* (3) Loop over each color */ 88 if (!coloring->w3) { 89 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 90 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 91 } 92 w3 = coloring->w3; 93 94 ierr = VecGetOwnershipRange(x1,&cstart,&cend);CHKERRQ(ierr); /* used by ghosted vscale */ 95 if (vscale) { 96 ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr); 97 } 98 nz = 0; 99 for (k=0; k<ncolors; k++) { 100 coloring->currentcolor = k; 101 102 /* 103 (3-1) Loop over each column associated with color 104 adding the perturbation to the vector w3 = x1 + dx. 105 */ 106 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 107 dy_i = dy; 108 for (i=0; i<bs; i++) { /* Loop over a block of columns */ 109 //------------------------------------------------ 110 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 111 if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */ 112 if (coloring->htype[0] == 'w') { 113 for (l=0; l<ncolumns[k]; l++) { 114 col = i + bs*coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 115 w3_array[col] += 1.0/dx; 116 117 if (i) { 118 w3_array[col-1] -= 1.0/dx; /* resume original w3[col-1] */ 119 } 120 121 } 122 } else { /* htype == 'ds' */ 123 vscale_array -= cstart; /* shift pointer so global index can be used */ 124 for (l=0; l<ncolumns[k]; l++) { 125 col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 126 w3_array[col] += 1.0/vscale_array[col]; 127 128 if (i) { 129 w3_array[col-1] -= 1.0/vscale_array[col-1]; /* resume original w3[col-1] */ 130 } 131 132 } 133 vscale_array += cstart; 134 } 135 if (ctype == IS_COLORING_GLOBAL) w3_array += cstart; 136 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 137 138 /* 139 (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 140 w2 = F(x1 + dx) - F(x1) 141 */ 142 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 143 ierr = VecPlaceArray(w2,dy_i);CHKERRQ(ierr); /* place w2 to the array dy_i */ 144 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 145 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 146 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 147 //--------------------------------------------- 148 ierr = VecResetArray(w2);CHKERRQ(ierr); 149 dy_i += nxloc; /* points to dy+i*nxloc */ 150 } 151 152 /* 153 (3-3) Loop over rows of vector, putting results into Jacobian matrix 154 */ 155 nrows_k = nrows[k]; 156 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 157 if (coloring->htype[0] == 'w') { 158 for (l=0; l<nrows_k; l++) { 159 row = bs*Jentry[nz].row; /* local row index */ 160 valaddr = Jentry[nz].valaddr; 161 nz++; 162 spidx = 0; 163 dy_i = dy; 164 for (i=0; i<bs; i++) { /* column of the block */ 165 for (j=0; j<bs; j++) { /* row of the block */ 166 valaddr[spidx++] = dy_i[row+j]*dx; 167 } 168 dy_i += nxloc; /* points to dy+i*N */ 169 } 170 } 171 } else { /* htype == 'ds' */ 172 for (l=0; l<nrows_k; l++) { 173 row = bs*Jentry[nz].row; /* local row index */ 174 col = bs*Jentry[nz].col; /* local column index */ 175 //*(Jentry[nz].valaddr) = y[row]*vscale_array[col]; 176 nz++; 177 } 178 } 179 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 180 } 181 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 182 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 183 if (vscale) { 184 ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr); 185 } 186 187 coloring->currentcolor = -1; 188 PetscFunctionReturn(0); 189 } 190 191 extern PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat); 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatFDColoringApply_AIJ_new" 194 PetscErrorCode MatFDColoringApply_AIJ_new(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 195 { 196 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 197 PetscErrorCode ierr; 198 PetscInt k,cstart,cend,l,row,col,nz; 199 PetscScalar dx=0.0,*y,*xx,*w3_array; 200 PetscScalar *vscale_array; 201 PetscReal epsilon=coloring->error_rel,umin=coloring->umin,unorm; 202 Vec w1=coloring->w1,w2=coloring->w2,w3,vscale=coloring->vscale; 203 void *fctx=coloring->fctx; 204 PetscBool flg=PETSC_FALSE; 205 PetscInt ctype=coloring->ctype,nxloc,nrows_k; 206 Mat_MPIAIJ *aij=(Mat_MPIAIJ*)J->data; 207 MatEntry *Jentry=coloring->matentry; 208 const PetscInt ncolors=coloring->ncolors,*ncolumns=coloring->ncolumns,*nrows=coloring->nrows; 209 210 PetscFunctionBegin; 211 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 212 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 213 if (flg) { 214 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 215 } else { 216 PetscBool assembled; 217 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 218 if (assembled) { 219 ierr = MatZeroEntries(J);CHKERRQ(ierr); 220 } 221 } 222 223 /* create vscale for storing dx */ 224 if (!vscale) { 225 if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') { 226 ierr = VecCreateGhost(PetscObjectComm((PetscObject)J),J->cmap->n,PETSC_DETERMINE,aij->B->cmap->n,aij->garray,&vscale);CHKERRQ(ierr); 227 } else if (ctype == IS_COLORING_GHOSTED) { 228 ierr = VecDuplicate(x1,&vscale);CHKERRQ(ierr); 229 } 230 coloring->vscale = vscale; 231 } 232 233 /* (1) Set w1 = F(x1) */ 234 if (!coloring->fset) { 235 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 236 ierr = (*f)(sctx,x1,w1,fctx);CHKERRQ(ierr); 237 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 238 } else { 239 coloring->fset = PETSC_FALSE; 240 } 241 242 /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */ 243 if (coloring->htype[0] == 'w') { 244 /* vscale = dx is a constant scalar */ 245 ierr = VecNorm(x1,NORM_2,&unorm);CHKERRQ(ierr); 246 dx = 1.0/(PetscSqrtReal(1.0 + unorm)*epsilon); 247 } else { 248 ierr = VecGetLocalSize(x1,&nxloc);CHKERRQ(ierr); 249 ierr = VecGetArray(x1,&xx);CHKERRQ(ierr); 250 ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr); 251 for (col=0; col<nxloc; col++) { 252 dx = xx[col]; 253 if (PetscAbsScalar(dx) < umin) { 254 if (PetscRealPart(dx) >= 0.0) dx = umin; 255 else if (PetscRealPart(dx) < 0.0 ) dx = -umin; 256 } 257 dx *= epsilon; 258 vscale_array[col] = 1.0/dx; 259 } 260 ierr = VecRestoreArray(x1,&xx);CHKERRQ(ierr); 261 ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr); 262 } 263 if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] != 'w') { 264 ierr = VecGhostUpdateBegin(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 265 ierr = VecGhostUpdateEnd(vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 266 } 267 268 /* (3) Loop over each color */ 269 if (!coloring->w3) { 270 ierr = VecDuplicate(x1,&coloring->w3);CHKERRQ(ierr); 271 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 272 } 273 w3 = coloring->w3; 274 275 ierr = VecGetOwnershipRange(x1,&cstart,&cend);CHKERRQ(ierr); /* used by ghosted vscale */ 276 if (vscale) { 277 ierr = VecGetArray(vscale,&vscale_array);CHKERRQ(ierr); 278 } 279 nz = 0; 280 for (k=0; k<ncolors; k++) { 281 coloring->currentcolor = k; 282 283 /* 284 (3-1) Loop over each column associated with color 285 adding the perturbation to the vector w3 = x1 + dx. 286 */ 287 ierr = VecCopy(x1,w3);CHKERRQ(ierr); 288 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 289 if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */ 290 if (coloring->htype[0] == 'w') { 291 for (l=0; l<ncolumns[k]; l++) { 292 col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 293 w3_array[col] += 1.0/dx; 294 } 295 } else { /* htype == 'ds' */ 296 vscale_array -= cstart; /* shift pointer so global index can be used */ 297 for (l=0; l<ncolumns[k]; l++) { 298 col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 299 w3_array[col] += 1.0/vscale_array[col]; 300 } 301 vscale_array += cstart; 302 } 303 if (ctype == IS_COLORING_GLOBAL) w3_array += cstart; 304 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 305 306 /* 307 (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 308 w2 = F(x1 + dx) - F(x1) 309 */ 310 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 311 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 312 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 313 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 314 315 /* 316 (3-3) Loop over rows of vector, putting results into Jacobian matrix 317 */ 318 nrows_k = nrows[k]; 319 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 320 if (coloring->htype[0] == 'w') { 321 for (l=0; l<nrows_k; l++) { 322 row = Jentry[nz].row; /* local row index */ 323 *(Jentry[nz++].valaddr) = y[row]*dx; 324 } 325 } else { /* htype == 'ds' */ 326 for (l=0; l<nrows_k; l++) { 327 row = Jentry[nz].row; /* local row index */ 328 *(Jentry[nz].valaddr) = y[row]*vscale_array[Jentry[nz].col]; 329 nz++; 330 } 331 } 332 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 333 } 334 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 335 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 336 if (vscale) { 337 ierr = VecRestoreArray(vscale,&vscale_array);CHKERRQ(ierr); 338 } 339 340 coloring->currentcolor = -1; 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatFDColoringCreate_MPIAIJ_new" 346 PetscErrorCode MatFDColoringCreate_MPIAIJ_new(Mat mat,ISColoring iscoloring,MatFDColoring c) 347 { 348 //Mat_MPIAIJ *aij=(Mat_MPIAIJ*)mat->data; 349 PetscErrorCode ierr; 350 PetscMPIInt size,*ncolsonproc,*disp,nn; 351 PetscInt i,n,nrows,nrows_i,j,k,m,ncols,col; 352 const PetscInt *is,*A_ci,*A_cj,*B_ci,*B_cj,*row = NULL,*ltog=NULL; 353 PetscInt nis=iscoloring->n,nctot,*cols; 354 PetscInt *rowhit,cstart,cend,colb; 355 IS *isa; 356 ISLocalToGlobalMapping map=mat->cmap->mapping; 357 PetscInt ctype=c->ctype; 358 Mat A,B; 359 //Mat A=aij->A,B=aij->B; 360 //Mat_SeqAIJ *spA=(Mat_SeqAIJ*)A->data,*spB=(Mat_SeqAIJ*)B->data; 361 //PetscScalar *A_val=spA->a,*B_val=spB->a; 362 PetscScalar *A_val,*B_val; 363 PetscInt spidx; 364 PetscInt *spidxA,*spidxB,nz,bs,bs2; 365 PetscScalar **valaddrhit; 366 MatEntry *Jentry; 367 PetscBool isBAIJ; 368 #if defined(PETSC_USE_CTABLE) 369 PetscTable colmap=NULL; 370 #else 371 PetscInt *colmap=NULL; /* local col number of off-diag col */ 372 #endif 373 374 PetscFunctionBegin; 375 if (ctype == IS_COLORING_GHOSTED) { 376 if (!map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping"); 377 ierr = ISLocalToGlobalMappingGetIndices(map,<og);CHKERRQ(ierr); 378 } 379 380 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 381 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIBAIJ,&isBAIJ);CHKERRQ(ierr); 382 if (!isBAIJ) { 383 bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */ 384 Mat_MPIAIJ *aij=(Mat_MPIAIJ*)mat->data; 385 A=aij->A; B=aij->B; 386 Mat_SeqAIJ *spA=(Mat_SeqAIJ*)A->data,*spB=(Mat_SeqAIJ*)B->data; 387 A_val=spA->a; B_val=spB->a; 388 nz = spA->nz + spB->nz; /* total nonzero entries of mat */ 389 if (!aij->colmap) { 390 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 391 colmap = aij->colmap; 392 } 393 ierr = MatGetColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 394 ierr = MatGetColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 395 } else { 396 Mat_MPIBAIJ *aij=(Mat_MPIBAIJ*)mat->data; 397 A=aij->A; B=aij->B; 398 Mat_SeqBAIJ *spA=(Mat_SeqBAIJ*)A->data,*spB=(Mat_SeqBAIJ*)B->data; 399 A_val=spA->a; B_val=spB->a; 400 nz = spA->nz + spB->nz; /* total nonzero entries of mat */ 401 if (!aij->colmap) { 402 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 403 colmap = aij->colmap; 404 } 405 ierr = MatGetColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 406 ierr = MatGetColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 407 } 408 409 m = mat->rmap->n/bs; 410 cstart = mat->cmap->rstart/bs; 411 cend = mat->cmap->rend/bs; 412 c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */ 413 c->N = mat->cmap->N/bs; 414 c->m = m; 415 c->rstart = mat->rmap->rstart/bs; 416 417 c->ncolors = nis; 418 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 419 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 420 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)c,3*nis*sizeof(PetscInt));CHKERRQ(ierr); 422 423 //nz = spA->nz + spB->nz; /* total nonzero entries of mat */ 424 ierr = PetscMalloc(nz*sizeof(MatEntry),&Jentry);CHKERRQ(ierr); 425 ierr = PetscLogObjectMemory((PetscObject)c,nz*sizeof(MatEntry));CHKERRQ(ierr); 426 c->matentry = Jentry; 427 428 /* Allow access to data structures of local part of matrix 429 - creates aij->colmap which maps global column number to local number in part B */ 430 //if (!aij->colmap) { 431 // ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 432 //} 433 //ierr = MatGetColumnIJ_SeqAIJ_Color(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 434 //ierr = MatGetColumnIJ_SeqAIJ_Color(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 435 436 ierr = PetscMalloc2(m+1,PetscInt,&rowhit,m+1,PetscScalar*,&valaddrhit);CHKERRQ(ierr); 437 nz = 0; 438 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 439 for (i=0; i<nis; i++) { /* for each local color */ 440 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 441 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 442 443 c->ncolumns[i] = n; /* local number of columns of this color on this process */ 444 if (n) { 445 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 446 ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr); 447 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 448 /* convert global column indices to local ones! */ 449 450 } else { 451 c->columns[i] = 0; 452 } 453 454 if (ctype == IS_COLORING_GLOBAL) { 455 /* Determine nctot, the total (parallel) number of columns of this color */ 456 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 457 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 458 459 /* ncolsonproc[j]: local ncolumns on proc[j] of this color */ 460 ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); 461 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 462 nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; 463 if (!nctot) { 464 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 465 } 466 467 disp[0] = 0; 468 for (j=1; j<size; j++) { 469 disp[j] = disp[j-1] + ncolsonproc[j-1]; 470 } 471 472 /* Get cols, the complete list of columns for this color on each process */ 473 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 474 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 475 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 476 } else if (ctype == IS_COLORING_GHOSTED) { 477 /* Determine local number of columns of this color on this process, including ghost points */ 478 nctot = n; 479 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 480 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 481 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 482 483 /* Mark all rows affect by these columns */ 484 ierr = PetscMemzero(rowhit,m*sizeof(PetscInt));CHKERRQ(ierr); 485 bs2 = bs*bs; 486 nrows_i = 0; 487 for (j=0; j<nctot; j++) { /* loop over columns*/ 488 if (ctype == IS_COLORING_GHOSTED) { 489 col = ltog[cols[j]]; 490 } else { 491 col = cols[j]; 492 } 493 if (col >= cstart && col < cend) { /* column is in diagonal block of matrix A */ 494 row = A_cj + A_ci[col-cstart]; 495 nrows = A_ci[col-cstart+1] - A_ci[col-cstart]; 496 nrows_i += nrows; 497 /* loop over columns of A marking them in rowhit */ 498 for (k=0; k<nrows; k++) { 499 /* set valaddrhit for part A */ 500 spidx = spidxA[A_ci[col-cstart] + k]; 501 valaddrhit[*row] = &A_val[bs2*spidx]; 502 rowhit[*row++] = col - cstart + 1; /* local column index */ 503 } 504 } else { /* column is in off-diagonal block of matrix B */ 505 #if defined(PETSC_USE_CTABLE) 506 //ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr); 507 ierr = PetscTableFind(colmap,col+1,&colb);CHKERRQ(ierr); 508 colb--; 509 #else 510 //colb = aij->colmap[col] - 1; /* local column index */ 511 colb = colmap[col] - 1; /* local column index */ 512 #endif 513 if (colb == -1) { 514 nrows = 0; 515 } else { 516 colb = colb/bs; 517 row = B_cj + B_ci[colb]; 518 nrows = B_ci[colb+1] - B_ci[colb]; 519 } 520 nrows_i += nrows; 521 /* loop over columns of B marking them in rowhit */ 522 for (k=0; k<nrows; k++) { 523 /* set valaddrhit for part B */ 524 spidx = spidxB[B_ci[colb] + k]; 525 valaddrhit[*row] = &B_val[bs2*spidx]; 526 rowhit[*row++] = colb + 1 + cend - cstart; /* local column index */ 527 } 528 } 529 } 530 c->nrows[i] = nrows_i; 531 532 for (j=0; j<m; j++) { 533 if (rowhit[j]) { 534 Jentry[nz].row = j; /* local row index */ 535 Jentry[nz].col = rowhit[j] - 1; /* local column index */ 536 Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 537 nz++; 538 } 539 } 540 ierr = PetscFree(cols);CHKERRQ(ierr); 541 } 542 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 543 //if (nz != spA->nz + spB->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz,spA->nz+spB->nz); 544 545 ierr = PetscFree2(rowhit,valaddrhit);CHKERRQ(ierr); 546 if (isBAIJ) { 547 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 548 ierr = MatRestoreColumnIJ_SeqBAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 549 ierr = PetscMalloc(bs*mat->cmap->n*sizeof(PetscScalar),&c->dy);CHKERRQ(ierr); 550 mat->ops->fdcoloringapply = MatFDColoringApply_BAIJ_new; 551 } else { 552 ierr = MatRestoreColumnIJ_SeqAIJ_Color(A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 553 ierr = MatRestoreColumnIJ_SeqAIJ_Color(B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 554 mat->ops->fdcoloringapply = MatFDColoringApply_AIJ_new; 555 } 556 557 if (ctype == IS_COLORING_GHOSTED) { 558 ierr = ISLocalToGlobalMappingRestoreIndices(map,<og);CHKERRQ(ierr); 559 } 560 PetscFunctionReturn(0); 561 } 562 563 /*------------------------------------------------------*/ 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatFDColoringCreate_MPIAIJ" 566 PetscErrorCode MatFDColoringCreate_MPIAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 567 { 568 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 569 PetscErrorCode ierr; 570 PetscMPIInt size,*ncolsonproc,*disp,nn; 571 PetscInt i,n,nrows,j,k,m,ncols,col; 572 const PetscInt *is,*A_ci,*A_cj,*B_ci,*B_cj,*rows = 0,*ltog; 573 PetscInt nis = iscoloring->n,nctot,*cols; 574 PetscInt *rowhit,M,cstart,cend,colb; 575 PetscInt *columnsforrow,l; 576 IS *isa; 577 PetscBool done,flg; 578 ISLocalToGlobalMapping map = mat->cmap->mapping; 579 PetscInt ctype=c->ctype; 580 PetscBool new_impl=PETSC_FALSE; 581 582 PetscFunctionBegin; 583 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 584 if (new_impl){ 585 ierr = MatFDColoringCreate_MPIAIJ_new(mat,iscoloring,c);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping"); 589 590 if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,<og);CHKERRQ(ierr);} 591 else ltog = NULL; 592 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 593 594 M = mat->rmap->n; 595 cstart = mat->cmap->rstart; 596 cend = mat->cmap->rend; 597 c->M = mat->rmap->N; /* set the global rows and columns and local rows */ 598 c->N = mat->cmap->N; 599 c->m = mat->rmap->n; 600 c->rstart = mat->rmap->rstart; 601 602 c->ncolors = nis; 603 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 604 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 605 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 606 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 607 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 608 ierr = PetscLogObjectMemory((PetscObject)c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 609 610 /* Allow access to data structures of local part of matrix */ 611 if (!aij->colmap) { 612 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 613 } 614 ierr = MatGetColumnIJ(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 615 ierr = MatGetColumnIJ(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 616 617 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 618 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 619 620 for (i=0; i<nis; i++) { 621 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 622 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 623 624 c->ncolumns[i] = n; /* local number of columns of this color on this process */ 625 if (n) { 626 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 627 ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr); 628 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 629 } else { 630 c->columns[i] = 0; 631 } 632 633 if (ctype == IS_COLORING_GLOBAL) { 634 /* Determine the total (parallel) number of columns of this color */ 635 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 636 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 637 638 ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); 639 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; 641 if (!nctot) { 642 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 643 } 644 645 disp[0] = 0; 646 for (j=1; j<size; j++) { 647 disp[j] = disp[j-1] + ncolsonproc[j-1]; 648 } 649 650 /* Get complete list of columns for color on each processor */ 651 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 652 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 653 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 654 } else if (ctype == IS_COLORING_GHOSTED) { 655 /* Determine local number of columns of this color on this process, including ghost points */ 656 nctot = n; 657 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 658 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 659 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 660 661 /* 662 Mark all rows affect by these columns 663 */ 664 /* Temporary option to allow for debugging/testing */ 665 flg = PETSC_FALSE; 666 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 667 if (!flg) { /*-----------------------------------------------------------------------------*/ 668 /* crude, fast version */ 669 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 670 /* loop over columns*/ 671 for (j=0; j<nctot; j++) { 672 if (ctype == IS_COLORING_GHOSTED) { 673 col = ltog[cols[j]]; 674 } else { 675 col = cols[j]; 676 } 677 if (col >= cstart && col < cend) { 678 /* column is in diagonal block of matrix */ 679 rows = A_cj + A_ci[col-cstart]; 680 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 681 } else { 682 #if defined(PETSC_USE_CTABLE) 683 ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr); 684 colb--; 685 #else 686 colb = aij->colmap[col] - 1; 687 #endif 688 if (colb == -1) { 689 m = 0; 690 } else { 691 rows = B_cj + B_ci[colb]; 692 m = B_ci[colb+1] - B_ci[colb]; 693 } 694 } 695 /* loop over columns marking them in rowhit */ 696 for (k=0; k<m; k++) { 697 rowhit[*rows++] = col + 1; 698 } 699 } 700 701 /* count the number of hits */ 702 nrows = 0; 703 for (j=0; j<M; j++) { 704 if (rowhit[j]) nrows++; 705 } 706 c->nrows[i] = nrows; 707 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 708 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 709 ierr = PetscLogObjectMemory((PetscObject)c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 710 nrows = 0; 711 for (j=0; j<M; j++) { 712 if (rowhit[j]) { 713 c->rows[i][nrows] = j; /* local row index */ 714 c->columnsforrow[i][nrows] = rowhit[j] - 1; /* global column index */ 715 nrows++; 716 } 717 } 718 } else { /*-------------------------------------------------------------------------------*/ 719 /* slow version, using rowhit as a linked list */ 720 PetscInt currentcol,fm,mfm; 721 rowhit[M] = M; 722 nrows = 0; 723 /* loop over columns*/ 724 for (j=0; j<nctot; j++) { 725 if (ctype == IS_COLORING_GHOSTED) { 726 col = ltog[cols[j]]; 727 } else { 728 col = cols[j]; 729 } 730 if (col >= cstart && col < cend) { 731 /* column is in diagonal block of matrix */ 732 rows = A_cj + A_ci[col-cstart]; 733 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 734 } else { 735 #if defined(PETSC_USE_CTABLE) 736 ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr); 737 colb--; 738 #else 739 colb = aij->colmap[col] - 1; 740 #endif 741 if (colb == -1) { 742 m = 0; 743 } else { 744 rows = B_cj + B_ci[colb]; 745 m = B_ci[colb+1] - B_ci[colb]; 746 } 747 } 748 749 /* loop over columns marking them in rowhit */ 750 fm = M; /* fm points to first entry in linked list */ 751 for (k=0; k<m; k++) { 752 currentcol = *rows++; 753 /* is it already in the list? */ 754 do { 755 mfm = fm; 756 fm = rowhit[fm]; 757 } while (fm < currentcol); 758 /* not in list so add it */ 759 if (fm != currentcol) { 760 nrows++; 761 columnsforrow[currentcol] = col; 762 /* next three lines insert new entry into linked list */ 763 rowhit[mfm] = currentcol; 764 rowhit[currentcol] = fm; 765 fm = currentcol; 766 /* fm points to present position in list since we know the columns are sorted */ 767 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 768 } 769 } 770 c->nrows[i] = nrows; 771 772 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 773 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 774 ierr = PetscLogObjectMemory((PetscObject)c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 775 /* now store the linked list of rows into c->rows[i] */ 776 nrows = 0; 777 fm = rowhit[M]; 778 do { 779 c->rows[i][nrows] = fm; 780 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 781 fm = rowhit[fm]; 782 } while (fm < M); 783 } /* ---------------------------------------------------------------------------------------*/ 784 ierr = PetscFree(cols);CHKERRQ(ierr); 785 } 786 787 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 788 /* 789 vscale will contain the "diagonal" on processor scalings followed by the off processor 790 */ 791 if (ctype == IS_COLORING_GLOBAL) { 792 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),aij->A->rmap->n,PETSC_DETERMINE,aij->B->cmap->n,aij->garray,&c->vscale);CHKERRQ(ierr); 793 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 794 for (k=0; k<c->ncolors; k++) { 795 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 796 for (l=0; l<c->nrows[k]; l++) { 797 col = c->columnsforrow[k][l]; 798 if (col >= cstart && col < cend) { 799 /* column is in diagonal block of matrix */ 800 colb = col - cstart; 801 } else { 802 /* column is in "off-processor" part */ 803 #if defined(PETSC_USE_CTABLE) 804 ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr); 805 colb--; 806 #else 807 colb = aij->colmap[col] - 1; 808 #endif 809 colb += cend - cstart; 810 } 811 c->vscaleforrow[k][l] = colb; 812 } 813 } 814 } else if (ctype == IS_COLORING_GHOSTED) { 815 /* Get gtol mapping */ 816 PetscInt N = mat->cmap->N,nlocal,*gtol; 817 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 818 for (i=0; i<N; i++) gtol[i] = -1; 819 ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr); 820 for (i=0; i<nlocal; i++) gtol[ltog[i]] = i; 821 822 c->vscale = 0; /* will be created in MatFDColoringApply() */ 823 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 824 for (k=0; k<c->ncolors; k++) { 825 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 826 for (l=0; l<c->nrows[k]; l++) { 827 col = c->columnsforrow[k][l]; /* global column index */ 828 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 829 } 830 } 831 ierr = PetscFree(gtol);CHKERRQ(ierr); 832 } 833 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 834 835 ierr = PetscFree(rowhit);CHKERRQ(ierr); 836 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 837 ierr = MatRestoreColumnIJ(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 838 ierr = MatRestoreColumnIJ(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 839 if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,<og);CHKERRQ(ierr);} 840 PetscFunctionReturn(0); 841 } 842 843 844 845 846 847 848