1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3 4 extern PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat); 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "MatFDColoringApply_MPIAIJ" 8 PetscErrorCode MatFDColoringApply_MPIAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 9 { 10 PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void*))coloring->f; 11 PetscErrorCode ierr; 12 PetscInt k,start,end,l,row,col,**vscaleforrow; 13 PetscScalar dx,*y,*xx,*w3_array; 14 PetscScalar *vscale_array; 15 PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 16 Vec w1 = coloring->w1,w2=coloring->w2,w3; 17 void *fctx = coloring->fctx; 18 PetscBool flg = PETSC_FALSE; 19 PetscInt ctype = coloring->ctype,N,col_start=0,col_end=0; 20 Vec x1_tmp; 21 PetscInt nz; 22 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)J->data; 23 Mat A=aij->A,B=aij->B; 24 Mat_SeqAIJ *cspA=(Mat_SeqAIJ*)A->data, *cspB=(Mat_SeqAIJ*)B->data; 25 26 PetscFunctionBegin; 27 ierr = MatSetUnfactored(J);CHKERRQ(ierr); 28 ierr = PetscOptionsGetBool(NULL,"-mat_fd_coloring_dont_rezero",&flg,NULL);CHKERRQ(ierr); 29 if (flg) { 30 ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 31 } else { 32 PetscBool assembled; 33 ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 34 if (assembled) { 35 ierr = MatZeroEntries(J);CHKERRQ(ierr); 36 } 37 } 38 39 x1_tmp = x1; 40 if (!coloring->vscale) { 41 ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 42 } 43 44 if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 45 ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 46 } 47 ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 48 49 /* Set w1 = F(x1) */ 50 if (!coloring->fset) { 51 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 52 ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 53 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 54 } else { 55 coloring->fset = PETSC_FALSE; 56 } 57 58 if (!coloring->w3) { 59 ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 60 ierr = PetscLogObjectParent((PetscObject)coloring,(PetscObject)coloring->w3);CHKERRQ(ierr); 61 } 62 w3 = coloring->w3; 63 64 /* Compute all the local scale factors, including ghost points */ 65 ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 66 ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 67 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 68 if (ctype == IS_COLORING_GHOSTED) { 69 col_start = 0; col_end = N; 70 } else if (ctype == IS_COLORING_GLOBAL) { 71 xx = xx - start; 72 vscale_array = vscale_array - start; 73 col_start = start; col_end = N + start; 74 } 75 for (col=col_start; col<col_end; col++) { 76 /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 77 if (coloring->htype[0] == 'w') { 78 dx = 1.0 + unorm; 79 } else { 80 dx = xx[col]; 81 } 82 if (dx == (PetscScalar)0.0) dx = 1.0; 83 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 84 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 85 dx *= epsilon; 86 vscale_array[col] = (PetscScalar)1.0/dx; 87 } 88 if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 89 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 90 if (ctype == IS_COLORING_GLOBAL) { 91 ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 92 ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 93 } 94 95 if (coloring->vscaleforrow) { 96 vscaleforrow = coloring->vscaleforrow; 97 } else SETERRQ(PetscObjectComm((PetscObject)J),PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 98 99 /* Loop over each color */ 100 nz = 0; 101 ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 102 for (k=0; k<coloring->ncolors; k++) { 103 coloring->currentcolor = k; 104 105 ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 106 ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 107 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 108 /* 109 Loop over each column associated with color 110 adding the perturbation to the vector w3. 111 */ 112 for (l=0; l<coloring->ncolumns[k]; l++) { 113 col = coloring->columns[k][l]; /* local column of the matrix we are probing for */ 114 if (coloring->htype[0] == 'w') { 115 dx = 1.0 + unorm; 116 } else { 117 dx = xx[col]; 118 } 119 if (dx == (PetscScalar)0.0) dx = 1.0; 120 if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 121 else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 122 dx *= epsilon; 123 if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 124 w3_array[col] += dx; 125 } 126 if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 127 ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 128 129 /* 130 Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 131 w2 = F(x1 + dx) - F(x1) 132 */ 133 ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 134 ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 135 ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 136 ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 137 138 /* Loop over rows of vector, putting results into Jacobian matrix */ 139 ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 140 for (l=0; l<coloring->nrows[k]; l++) { 141 row = coloring->rows[k][l]; /* local row index */ 142 *(coloring->spaddr[nz++]) = vscale_array[vscaleforrow[k][l]]*y[row]; 143 } 144 ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 145 } /* endof for each color */ 146 if (nz != cspA->nz + cspB->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz,cspA->nz+cspB->nz); 147 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 148 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 149 150 if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 151 ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 152 ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 153 154 coloring->currentcolor = -1; 155 PetscFunctionReturn(0); 156 } 157 158 #undef __FUNCT__ 159 #define __FUNCT__ "MatFDColoringCreate_MPIAIJ_new" 160 PetscErrorCode MatFDColoringCreate_MPIAIJ_new(Mat mat,ISColoring iscoloring,MatFDColoring c) 161 { 162 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 163 PetscErrorCode ierr; 164 PetscMPIInt size,*ncolsonproc,*disp,nn; 165 PetscInt i,n,nrows,j,k,m,ncols,col; 166 const PetscInt *is,*A_ci,*A_cj,*B_ci,*B_cj,*rows = 0,*ltog; 167 PetscInt nis = iscoloring->n,nctot,*cols; 168 PetscInt *rowhit,M,cstart,cend,colb; 169 PetscInt *columnsforrow,l; 170 IS *isa; 171 ISLocalToGlobalMapping map = mat->cmap->mapping; 172 PetscInt ctype=c->ctype; 173 Mat A=aij->A,B=aij->B; 174 Mat_SeqAIJ *cspA=(Mat_SeqAIJ*)A->data, *cspB=(Mat_SeqAIJ*)B->data; 175 PetscScalar *A_val=cspA->a,*B_val=cspB->a; 176 PetscInt spidx; 177 PetscInt *spidxA,*spidxB,nz; 178 PetscScalar **spaddrhit; 179 180 PetscFunctionBegin; 181 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping"); 182 183 if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,<og);CHKERRQ(ierr);} 184 else ltog = NULL; 185 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 186 187 M = mat->rmap->n; 188 cstart = mat->cmap->rstart; 189 cend = mat->cmap->rend; 190 c->M = mat->rmap->N; /* set the global rows and columns and local rows */ 191 c->N = mat->cmap->N; 192 c->m = mat->rmap->n; 193 c->rstart = mat->rmap->rstart; 194 195 c->ncolors = nis; 196 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 197 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 198 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 199 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 200 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 201 ierr = PetscLogObjectMemory((PetscObject)c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 202 203 /* Allow access to data structures of local part of matrix */ 204 if (!aij->colmap) { 205 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 206 } 207 ierr = MatGetColumnIJ_SeqAIJ_Color(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 208 ierr = MatGetColumnIJ_SeqAIJ_Color(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 209 210 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 211 ierr = PetscMalloc((M+1)*sizeof(PetscScalar*),&spaddrhit);CHKERRQ(ierr); 212 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 213 nz = cspA->nz + cspB->nz; /* total nonzero entries of mat */ 214 ierr = PetscMalloc(nz*sizeof(PetscScalar*),&c->spaddr);CHKERRQ(ierr); 215 216 nz = 0; 217 for (i=0; i<nis; i++) { 218 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 219 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 220 221 c->ncolumns[i] = n; /* local number of columns of this color on this process */ 222 if (n) { 223 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 224 ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr); 225 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 226 } else { 227 c->columns[i] = 0; 228 } 229 230 if (ctype == IS_COLORING_GLOBAL) { 231 /* Determine the total (parallel) number of columns of this color */ 232 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 233 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 234 235 ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); 236 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 237 nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; 238 if (!nctot) { 239 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 240 } 241 242 disp[0] = 0; 243 for (j=1; j<size; j++) { 244 disp[j] = disp[j-1] + ncolsonproc[j-1]; 245 } 246 247 /* Get complete list of columns for color on each processor */ 248 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 249 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 250 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 251 } else if (ctype == IS_COLORING_GHOSTED) { 252 /* Determine local number of columns of this color on this process, including ghost points */ 253 nctot = n; 254 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 255 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 256 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 257 258 /* Mark all rows affect by these columns */ 259 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 260 for (j=0; j<nctot; j++) { /* loop over columns*/ 261 if (ctype == IS_COLORING_GHOSTED) { 262 col = ltog[cols[j]]; 263 } else { 264 col = cols[j]; 265 } 266 if (col >= cstart && col < cend) { /* column is in diagonal block of matrix A */ 267 rows = A_cj + A_ci[col-cstart]; 268 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 269 /* loop over columns of A marking them in rowhit */ 270 for (k=0; k<m; k++) { 271 /* set spaddrhit for part A */ 272 spidx = spidxA[A_ci[col-cstart] + k]; 273 spaddrhit[*rows] = &A_val[spidx]; 274 rowhit[*rows++] = col + 1; 275 } 276 } else { /* column is in off-diagonal block of matrix B */ 277 #if defined(PETSC_USE_CTABLE) 278 ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr); 279 colb--; 280 #else 281 colb = aij->colmap[col] - 1; /* local column index */ 282 #endif 283 if (colb == -1) { 284 m = 0; 285 } else { 286 rows = B_cj + B_ci[colb]; 287 m = B_ci[colb+1] - B_ci[colb]; 288 } 289 /* loop over columns of B marking them in rowhit */ 290 for (k=0; k<m; k++) { 291 /* set spaddrhit for part B */ 292 spidx = spidxB[B_ci[colb] + k]; 293 spaddrhit[*rows] = &B_val[spidx]; 294 rowhit[*rows++] = col + 1; 295 } 296 } 297 } 298 299 /* count the number of hits */ 300 nrows = 0; 301 for (j=0; j<M; j++) { 302 if (rowhit[j]) nrows++; 303 } 304 c->nrows[i] = nrows; 305 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 306 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 307 ierr = PetscLogObjectMemory((PetscObject)c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 308 nrows = 0; 309 for (j=0; j<M; j++) { 310 if (rowhit[j]) { 311 c->rows[i][nrows] = j; /* local row index */ 312 c->columnsforrow[i][nrows] = rowhit[j] - 1; /* global column index */ 313 c->spaddr[nz++] = spaddrhit[j]; /* address of mat value for this entry */ 314 nrows++; 315 } 316 } 317 ierr = PetscFree(cols);CHKERRQ(ierr); 318 } 319 if (nz != cspA->nz + cspB->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"nz %d != mat->nz %d\n",nz,cspA->nz+cspB->nz); 320 321 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 322 /* 323 vscale will contain the "diagonal" on processor scalings followed by the off processor 324 */ 325 if (ctype == IS_COLORING_GLOBAL) { 326 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),aij->A->rmap->n,PETSC_DETERMINE,aij->B->cmap->n,aij->garray,&c->vscale);CHKERRQ(ierr); 327 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 328 for (k=0; k<c->ncolors; k++) { 329 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 330 for (l=0; l<c->nrows[k]; l++) { 331 col = c->columnsforrow[k][l]; 332 if (col >= cstart && col < cend) { 333 /* column is in diagonal block of matrix */ 334 colb = col - cstart; 335 } else { 336 /* column is in "off-processor" part */ 337 #if defined(PETSC_USE_CTABLE) 338 ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr); 339 colb--; 340 #else 341 colb = aij->colmap[col] - 1; 342 #endif 343 colb += cend - cstart; 344 } 345 c->vscaleforrow[k][l] = colb; 346 } 347 } 348 } else if (ctype == IS_COLORING_GHOSTED) { 349 /* Get gtol mapping */ 350 PetscInt N = mat->cmap->N,nlocal,*gtol; 351 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 352 for (i=0; i<N; i++) gtol[i] = -1; 353 ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr); 354 for (i=0; i<nlocal; i++) gtol[ltog[i]] = i; 355 356 c->vscale = 0; /* will be created in MatFDColoringApply() */ 357 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 358 for (k=0; k<c->ncolors; k++) { 359 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 360 for (l=0; l<c->nrows[k]; l++) { 361 col = c->columnsforrow[k][l]; /* global column index */ 362 363 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 364 } 365 } 366 ierr = PetscFree(gtol);CHKERRQ(ierr); 367 } 368 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 369 370 ierr = PetscFree(rowhit);CHKERRQ(ierr); 371 ierr = PetscFree(spaddrhit);CHKERRQ(ierr); 372 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 373 ierr = MatRestoreColumnIJ_SeqAIJ_Color(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&spidxA,NULL);CHKERRQ(ierr); 374 ierr = MatRestoreColumnIJ_SeqAIJ_Color(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&spidxB,NULL);CHKERRQ(ierr); 375 if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,<og);CHKERRQ(ierr);} 376 377 mat->ops->fdcoloringapply = MatFDColoringApply_MPIAIJ; 378 PetscFunctionReturn(0); 379 } 380 381 /*------------------------------------------------------*/ 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatFDColoringCreate_MPIAIJ" 384 PetscErrorCode MatFDColoringCreate_MPIAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c) 385 { 386 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 387 PetscErrorCode ierr; 388 PetscMPIInt size,*ncolsonproc,*disp,nn; 389 PetscInt i,n,nrows,j,k,m,ncols,col; 390 const PetscInt *is,*A_ci,*A_cj,*B_ci,*B_cj,*rows = 0,*ltog; 391 PetscInt nis = iscoloring->n,nctot,*cols; 392 PetscInt *rowhit,M,cstart,cend,colb; 393 PetscInt *columnsforrow,l; 394 IS *isa; 395 PetscBool done,flg; 396 ISLocalToGlobalMapping map = mat->cmap->mapping; 397 PetscInt ctype=c->ctype; 398 PetscBool new_impl=PETSC_FALSE; 399 400 PetscFunctionBegin; 401 ierr = PetscOptionsName("-new","using new impls","",&new_impl);CHKERRQ(ierr); 402 if (new_impl){ 403 ierr = MatFDColoringCreate_MPIAIJ_new(mat,iscoloring,c);CHKERRQ(ierr); 404 PetscFunctionReturn(0); 405 } 406 if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping"); 407 408 if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,<og);CHKERRQ(ierr);} 409 else ltog = NULL; 410 ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr); 411 412 M = mat->rmap->n; 413 cstart = mat->cmap->rstart; 414 cend = mat->cmap->rend; 415 c->M = mat->rmap->N; /* set the global rows and columns and local rows */ 416 c->N = mat->cmap->N; 417 c->m = mat->rmap->n; 418 c->rstart = mat->rmap->rstart; 419 420 c->ncolors = nis; 421 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr); 422 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr); 423 ierr = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr); 424 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr); 425 ierr = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr); 426 ierr = PetscLogObjectMemory((PetscObject)c,5*nis*sizeof(PetscInt));CHKERRQ(ierr); 427 428 /* Allow access to data structures of local part of matrix */ 429 if (!aij->colmap) { 430 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 431 } 432 ierr = MatGetColumnIJ(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 433 ierr = MatGetColumnIJ(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 434 435 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr); 436 ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr); 437 438 for (i=0; i<nis; i++) { 439 ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr); 440 ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr); 441 442 c->ncolumns[i] = n; /* local number of columns of this color on this process */ 443 if (n) { 444 ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr); 445 ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr); 446 ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr); 447 } else { 448 c->columns[i] = 0; 449 } 450 451 if (ctype == IS_COLORING_GLOBAL) { 452 /* Determine the total (parallel) number of columns of this color */ 453 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 454 ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr); 455 456 ierr = PetscMPIIntCast(n,&nn);CHKERRQ(ierr); 457 ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 458 nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j]; 459 if (!nctot) { 460 ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr); 461 } 462 463 disp[0] = 0; 464 for (j=1; j<size; j++) { 465 disp[j] = disp[j-1] + ncolsonproc[j-1]; 466 } 467 468 /* Get complete list of columns for color on each processor */ 469 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 470 ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 471 ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr); 472 } else if (ctype == IS_COLORING_GHOSTED) { 473 /* Determine local number of columns of this color on this process, including ghost points */ 474 nctot = n; 475 ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 476 ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr); 477 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type"); 478 479 /* 480 Mark all rows affect by these columns 481 */ 482 /* Temporary option to allow for debugging/testing */ 483 flg = PETSC_FALSE; 484 ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr); 485 if (!flg) { /*-----------------------------------------------------------------------------*/ 486 /* crude, fast version */ 487 ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr); 488 /* loop over columns*/ 489 for (j=0; j<nctot; j++) { 490 if (ctype == IS_COLORING_GHOSTED) { 491 col = ltog[cols[j]]; 492 } else { 493 col = cols[j]; 494 } 495 if (col >= cstart && col < cend) { 496 /* column is in diagonal block of matrix */ 497 rows = A_cj + A_ci[col-cstart]; 498 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 499 } else { 500 #if defined(PETSC_USE_CTABLE) 501 ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr); 502 colb--; 503 #else 504 colb = aij->colmap[col] - 1; 505 #endif 506 if (colb == -1) { 507 m = 0; 508 } else { 509 rows = B_cj + B_ci[colb]; 510 m = B_ci[colb+1] - B_ci[colb]; 511 } 512 } 513 /* loop over columns marking them in rowhit */ 514 for (k=0; k<m; k++) { 515 rowhit[*rows++] = col + 1; 516 } 517 } 518 519 /* count the number of hits */ 520 nrows = 0; 521 for (j=0; j<M; j++) { 522 if (rowhit[j]) nrows++; 523 } 524 c->nrows[i] = nrows; 525 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 526 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 527 ierr = PetscLogObjectMemory((PetscObject)c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 528 nrows = 0; 529 for (j=0; j<M; j++) { 530 if (rowhit[j]) { 531 c->rows[i][nrows] = j; /* local row index */ 532 c->columnsforrow[i][nrows] = rowhit[j] - 1; /* global column index */ 533 nrows++; 534 } 535 } 536 } else { /*-------------------------------------------------------------------------------*/ 537 /* slow version, using rowhit as a linked list */ 538 PetscInt currentcol,fm,mfm; 539 rowhit[M] = M; 540 nrows = 0; 541 /* loop over columns*/ 542 for (j=0; j<nctot; j++) { 543 if (ctype == IS_COLORING_GHOSTED) { 544 col = ltog[cols[j]]; 545 } else { 546 col = cols[j]; 547 } 548 if (col >= cstart && col < cend) { 549 /* column is in diagonal block of matrix */ 550 rows = A_cj + A_ci[col-cstart]; 551 m = A_ci[col-cstart+1] - A_ci[col-cstart]; 552 } else { 553 #if defined(PETSC_USE_CTABLE) 554 ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr); 555 colb--; 556 #else 557 colb = aij->colmap[col] - 1; 558 #endif 559 if (colb == -1) { 560 m = 0; 561 } else { 562 rows = B_cj + B_ci[colb]; 563 m = B_ci[colb+1] - B_ci[colb]; 564 } 565 } 566 567 /* loop over columns marking them in rowhit */ 568 fm = M; /* fm points to first entry in linked list */ 569 for (k=0; k<m; k++) { 570 currentcol = *rows++; 571 /* is it already in the list? */ 572 do { 573 mfm = fm; 574 fm = rowhit[fm]; 575 } while (fm < currentcol); 576 /* not in list so add it */ 577 if (fm != currentcol) { 578 nrows++; 579 columnsforrow[currentcol] = col; 580 /* next three lines insert new entry into linked list */ 581 rowhit[mfm] = currentcol; 582 rowhit[currentcol] = fm; 583 fm = currentcol; 584 /* fm points to present position in list since we know the columns are sorted */ 585 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected"); 586 } 587 } 588 c->nrows[i] = nrows; 589 590 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr); 591 ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr); 592 ierr = PetscLogObjectMemory((PetscObject)c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr); 593 /* now store the linked list of rows into c->rows[i] */ 594 nrows = 0; 595 fm = rowhit[M]; 596 do { 597 c->rows[i][nrows] = fm; 598 c->columnsforrow[i][nrows++] = columnsforrow[fm]; 599 fm = rowhit[fm]; 600 } while (fm < M); 601 } /* ---------------------------------------------------------------------------------------*/ 602 ierr = PetscFree(cols);CHKERRQ(ierr); 603 } 604 605 /* Optimize by adding the vscale, and scaleforrow[][] fields */ 606 /* 607 vscale will contain the "diagonal" on processor scalings followed by the off processor 608 */ 609 if (ctype == IS_COLORING_GLOBAL) { 610 ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),aij->A->rmap->n,PETSC_DETERMINE,aij->B->cmap->n,aij->garray,&c->vscale);CHKERRQ(ierr); 611 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 612 for (k=0; k<c->ncolors; k++) { 613 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 614 for (l=0; l<c->nrows[k]; l++) { 615 col = c->columnsforrow[k][l]; 616 if (col >= cstart && col < cend) { 617 /* column is in diagonal block of matrix */ 618 colb = col - cstart; 619 } else { 620 /* column is in "off-processor" part */ 621 #if defined(PETSC_USE_CTABLE) 622 ierr = PetscTableFind(aij->colmap,col+1,&colb);CHKERRQ(ierr); 623 colb--; 624 #else 625 colb = aij->colmap[col] - 1; 626 #endif 627 colb += cend - cstart; 628 } 629 c->vscaleforrow[k][l] = colb; 630 } 631 } 632 } else if (ctype == IS_COLORING_GHOSTED) { 633 /* Get gtol mapping */ 634 PetscInt N = mat->cmap->N,nlocal,*gtol; 635 ierr = PetscMalloc((N+1)*sizeof(PetscInt),>ol);CHKERRQ(ierr); 636 for (i=0; i<N; i++) gtol[i] = -1; 637 ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr); 638 for (i=0; i<nlocal; i++) gtol[ltog[i]] = i; 639 640 c->vscale = 0; /* will be created in MatFDColoringApply() */ 641 ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr); 642 for (k=0; k<c->ncolors; k++) { 643 ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr); 644 for (l=0; l<c->nrows[k]; l++) { 645 col = c->columnsforrow[k][l]; /* global column index */ 646 647 c->vscaleforrow[k][l] = gtol[col]; /* local column index */ 648 } 649 } 650 ierr = PetscFree(gtol);CHKERRQ(ierr); 651 } 652 ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr); 653 654 ierr = PetscFree(rowhit);CHKERRQ(ierr); 655 ierr = PetscFree(columnsforrow);CHKERRQ(ierr); 656 ierr = MatRestoreColumnIJ(aij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr); 657 ierr = MatRestoreColumnIJ(aij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr); 658 if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,<og);CHKERRQ(ierr);} 659 PetscFunctionReturn(0); 660 } 661 662 663 664 665 666 667