1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 51 PetscFunctionBegin; 52 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 53 A->boundtocpu = flg; 54 #endif 55 if (a->A) { 56 PetscCall(MatBindToCPU(a->A,flg)); 57 } 58 if (a->B) { 59 PetscCall(MatBindToCPU(a->B,flg)); 60 } 61 62 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 63 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 64 * to differ from the parent matrix. */ 65 if (a->lvec) { 66 PetscCall(VecBindToCPU(a->lvec,flg)); 67 } 68 if (a->diag) { 69 PetscCall(VecBindToCPU(a->diag,flg)); 70 } 71 72 PetscFunctionReturn(0); 73 } 74 75 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 76 { 77 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 78 79 PetscFunctionBegin; 80 if (mat->A) { 81 PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 82 PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 83 } 84 PetscFunctionReturn(0); 85 } 86 87 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 88 { 89 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 90 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 91 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 92 const PetscInt *ia,*ib; 93 const MatScalar *aa,*bb,*aav,*bav; 94 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 95 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 96 97 PetscFunctionBegin; 98 *keptrows = NULL; 99 100 ia = a->i; 101 ib = b->i; 102 PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 103 PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 104 for (i=0; i<m; i++) { 105 na = ia[i+1] - ia[i]; 106 nb = ib[i+1] - ib[i]; 107 if (!na && !nb) { 108 cnt++; 109 goto ok1; 110 } 111 aa = aav + ia[i]; 112 for (j=0; j<na; j++) { 113 if (aa[j] != 0.0) goto ok1; 114 } 115 bb = bav + ib[i]; 116 for (j=0; j <nb; j++) { 117 if (bb[j] != 0.0) goto ok1; 118 } 119 cnt++; 120 ok1:; 121 } 122 PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 123 if (!n0rows) { 124 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 125 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 126 PetscFunctionReturn(0); 127 } 128 PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 129 cnt = 0; 130 for (i=0; i<m; i++) { 131 na = ia[i+1] - ia[i]; 132 nb = ib[i+1] - ib[i]; 133 if (!na && !nb) continue; 134 aa = aav + ia[i]; 135 for (j=0; j<na;j++) { 136 if (aa[j] != 0.0) { 137 rows[cnt++] = rstart + i; 138 goto ok2; 139 } 140 } 141 bb = bav + ib[i]; 142 for (j=0; j<nb; j++) { 143 if (bb[j] != 0.0) { 144 rows[cnt++] = rstart + i; 145 goto ok2; 146 } 147 } 148 ok2:; 149 } 150 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 151 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 152 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 159 PetscBool cong; 160 161 PetscFunctionBegin; 162 PetscCall(MatHasCongruentLayouts(Y,&cong)); 163 if (Y->assembled && cong) { 164 PetscCall(MatDiagonalSet(aij->A,D,is)); 165 } else { 166 PetscCall(MatDiagonalSet_Default(Y,D,is)); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 172 { 173 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 174 PetscInt i,rstart,nrows,*rows; 175 176 PetscFunctionBegin; 177 *zrows = NULL; 178 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 179 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 180 for (i=0; i<nrows; i++) rows[i] += rstart; 181 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 182 PetscFunctionReturn(0); 183 } 184 185 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 188 PetscInt i,m,n,*garray = aij->garray; 189 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 190 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 191 PetscReal *work; 192 const PetscScalar *dummy; 193 194 PetscFunctionBegin; 195 PetscCall(MatGetSize(A,&m,&n)); 196 PetscCall(PetscCalloc1(n,&work)); 197 PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 198 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 199 PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 200 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 201 if (type == NORM_2) { 202 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 203 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 204 } 205 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 206 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 207 } 208 } else if (type == NORM_1) { 209 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 210 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 211 } 212 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 213 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 214 } 215 } else if (type == NORM_INFINITY) { 216 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 217 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 218 } 219 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 220 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 221 } 222 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 223 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 224 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 225 } 226 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 227 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 228 } 229 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 230 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 231 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 232 } 233 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 234 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 235 } 236 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 237 if (type == NORM_INFINITY) { 238 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 239 } else { 240 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 241 } 242 PetscCall(PetscFree(work)); 243 if (type == NORM_2) { 244 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 245 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 246 for (i=0; i<n; i++) reductions[i] /= m; 247 } 248 PetscFunctionReturn(0); 249 } 250 251 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 252 { 253 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 254 IS sis,gis; 255 const PetscInt *isis,*igis; 256 PetscInt n,*iis,nsis,ngis,rstart,i; 257 258 PetscFunctionBegin; 259 PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 260 PetscCall(MatFindNonzeroRows(a->B,&gis)); 261 PetscCall(ISGetSize(gis,&ngis)); 262 PetscCall(ISGetSize(sis,&nsis)); 263 PetscCall(ISGetIndices(sis,&isis)); 264 PetscCall(ISGetIndices(gis,&igis)); 265 266 PetscCall(PetscMalloc1(ngis+nsis,&iis)); 267 PetscCall(PetscArraycpy(iis,igis,ngis)); 268 PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 269 n = ngis + nsis; 270 PetscCall(PetscSortRemoveDupsInt(&n,iis)); 271 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 272 for (i=0; i<n; i++) iis[i] += rstart; 273 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 274 275 PetscCall(ISRestoreIndices(sis,&isis)); 276 PetscCall(ISRestoreIndices(gis,&igis)); 277 PetscCall(ISDestroy(&sis)); 278 PetscCall(ISDestroy(&gis)); 279 PetscFunctionReturn(0); 280 } 281 282 /* 283 Local utility routine that creates a mapping from the global column 284 number to the local number in the off-diagonal part of the local 285 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 286 a slightly higher hash table cost; without it it is not scalable (each processor 287 has an order N integer array but is fast to access. 288 */ 289 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 290 { 291 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 292 PetscInt n = aij->B->cmap->n,i; 293 294 PetscFunctionBegin; 295 PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 296 #if defined(PETSC_USE_CTABLE) 297 PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 298 for (i=0; i<n; i++) { 299 PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 300 } 301 #else 302 PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 303 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 304 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 305 #endif 306 PetscFunctionReturn(0); 307 } 308 309 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 310 { \ 311 if (col <= lastcol1) low1 = 0; \ 312 else high1 = nrow1; \ 313 lastcol1 = col;\ 314 while (high1-low1 > 5) { \ 315 t = (low1+high1)/2; \ 316 if (rp1[t] > col) high1 = t; \ 317 else low1 = t; \ 318 } \ 319 for (_i=low1; _i<high1; _i++) { \ 320 if (rp1[_i] > col) break; \ 321 if (rp1[_i] == col) { \ 322 if (addv == ADD_VALUES) { \ 323 ap1[_i] += value; \ 324 /* Not sure LogFlops will slow dow the code or not */ \ 325 (void)PetscLogFlops(1.0); \ 326 } \ 327 else ap1[_i] = value; \ 328 goto a_noinsert; \ 329 } \ 330 } \ 331 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 332 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 333 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 334 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 335 N = nrow1++ - 1; a->nz++; high1++; \ 336 /* shift up all the later entries in this row */ \ 337 PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 338 PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 339 rp1[_i] = col; \ 340 ap1[_i] = value; \ 341 A->nonzerostate++;\ 342 a_noinsert: ; \ 343 ailen[row] = nrow1; \ 344 } 345 346 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 347 { \ 348 if (col <= lastcol2) low2 = 0; \ 349 else high2 = nrow2; \ 350 lastcol2 = col; \ 351 while (high2-low2 > 5) { \ 352 t = (low2+high2)/2; \ 353 if (rp2[t] > col) high2 = t; \ 354 else low2 = t; \ 355 } \ 356 for (_i=low2; _i<high2; _i++) { \ 357 if (rp2[_i] > col) break; \ 358 if (rp2[_i] == col) { \ 359 if (addv == ADD_VALUES) { \ 360 ap2[_i] += value; \ 361 (void)PetscLogFlops(1.0); \ 362 } \ 363 else ap2[_i] = value; \ 364 goto b_noinsert; \ 365 } \ 366 } \ 367 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 368 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 369 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 370 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 371 N = nrow2++ - 1; b->nz++; high2++; \ 372 /* shift up all the later entries in this row */ \ 373 PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 374 PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 375 rp2[_i] = col; \ 376 ap2[_i] = value; \ 377 B->nonzerostate++; \ 378 b_noinsert: ; \ 379 bilen[row] = nrow2; \ 380 } 381 382 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 383 { 384 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 385 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 386 PetscInt l,*garray = mat->garray,diag; 387 PetscScalar *aa,*ba; 388 389 PetscFunctionBegin; 390 /* code only works for square matrices A */ 391 392 /* find size of row to the left of the diagonal part */ 393 PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 394 row = row - diag; 395 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 396 if (garray[b->j[b->i[row]+l]] > diag) break; 397 } 398 if (l) { 399 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 400 PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 401 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 402 } 403 404 /* diagonal part */ 405 if (a->i[row+1]-a->i[row]) { 406 PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 407 PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 408 PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 409 } 410 411 /* right of diagonal part */ 412 if (b->i[row+1]-b->i[row]-l) { 413 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 414 PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 415 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 416 } 417 PetscFunctionReturn(0); 418 } 419 420 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 421 { 422 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 423 PetscScalar value = 0.0; 424 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 425 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 426 PetscBool roworiented = aij->roworiented; 427 428 /* Some Variables required in the macro */ 429 Mat A = aij->A; 430 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 431 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 432 PetscBool ignorezeroentries = a->ignorezeroentries; 433 Mat B = aij->B; 434 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 435 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 436 MatScalar *aa,*ba; 437 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 438 PetscInt nonew; 439 MatScalar *ap1,*ap2; 440 441 PetscFunctionBegin; 442 PetscCall(MatSeqAIJGetArray(A,&aa)); 443 PetscCall(MatSeqAIJGetArray(B,&ba)); 444 for (i=0; i<m; i++) { 445 if (im[i] < 0) continue; 446 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 447 if (im[i] >= rstart && im[i] < rend) { 448 row = im[i] - rstart; 449 lastcol1 = -1; 450 rp1 = aj + ai[row]; 451 ap1 = aa + ai[row]; 452 rmax1 = aimax[row]; 453 nrow1 = ailen[row]; 454 low1 = 0; 455 high1 = nrow1; 456 lastcol2 = -1; 457 rp2 = bj + bi[row]; 458 ap2 = ba + bi[row]; 459 rmax2 = bimax[row]; 460 nrow2 = bilen[row]; 461 low2 = 0; 462 high2 = nrow2; 463 464 for (j=0; j<n; j++) { 465 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 466 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 467 if (in[j] >= cstart && in[j] < cend) { 468 col = in[j] - cstart; 469 nonew = a->nonew; 470 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 471 } else if (in[j] < 0) continue; 472 else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 473 else { 474 if (mat->was_assembled) { 475 if (!aij->colmap) { 476 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 477 } 478 #if defined(PETSC_USE_CTABLE) 479 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 480 col--; 481 #else 482 col = aij->colmap[in[j]] - 1; 483 #endif 484 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 485 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 486 col = in[j]; 487 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 488 B = aij->B; 489 b = (Mat_SeqAIJ*)B->data; 490 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 491 rp2 = bj + bi[row]; 492 ap2 = ba + bi[row]; 493 rmax2 = bimax[row]; 494 nrow2 = bilen[row]; 495 low2 = 0; 496 high2 = nrow2; 497 bm = aij->B->rmap->n; 498 ba = b->a; 499 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 500 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 501 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 502 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 503 } 504 } else col = in[j]; 505 nonew = b->nonew; 506 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 507 } 508 } 509 } else { 510 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 511 if (!aij->donotstash) { 512 mat->assembled = PETSC_FALSE; 513 if (roworiented) { 514 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 515 } else { 516 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 517 } 518 } 519 } 520 } 521 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 522 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 523 PetscFunctionReturn(0); 524 } 525 526 /* 527 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 528 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 529 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 530 */ 531 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 532 { 533 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 534 Mat A = aij->A; /* diagonal part of the matrix */ 535 Mat B = aij->B; /* offdiagonal part of the matrix */ 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 538 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 539 PetscInt *ailen = a->ilen,*aj = a->j; 540 PetscInt *bilen = b->ilen,*bj = b->j; 541 PetscInt am = aij->A->rmap->n,j; 542 PetscInt diag_so_far = 0,dnz; 543 PetscInt offd_so_far = 0,onz; 544 545 PetscFunctionBegin; 546 /* Iterate over all rows of the matrix */ 547 for (j=0; j<am; j++) { 548 dnz = onz = 0; 549 /* Iterate over all non-zero columns of the current row */ 550 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 551 /* If column is in the diagonal */ 552 if (mat_j[col] >= cstart && mat_j[col] < cend) { 553 aj[diag_so_far++] = mat_j[col] - cstart; 554 dnz++; 555 } else { /* off-diagonal entries */ 556 bj[offd_so_far++] = mat_j[col]; 557 onz++; 558 } 559 } 560 ailen[j] = dnz; 561 bilen[j] = onz; 562 } 563 PetscFunctionReturn(0); 564 } 565 566 /* 567 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 568 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 569 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 570 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 571 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 572 */ 573 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 574 { 575 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 576 Mat A = aij->A; /* diagonal part of the matrix */ 577 Mat B = aij->B; /* offdiagonal part of the matrix */ 578 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 579 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 580 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 581 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 582 PetscInt *ailen = a->ilen,*aj = a->j; 583 PetscInt *bilen = b->ilen,*bj = b->j; 584 PetscInt am = aij->A->rmap->n,j; 585 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 586 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 587 PetscScalar *aa = a->a,*ba = b->a; 588 589 PetscFunctionBegin; 590 /* Iterate over all rows of the matrix */ 591 for (j=0; j<am; j++) { 592 dnz_row = onz_row = 0; 593 rowstart_offd = full_offd_i[j]; 594 rowstart_diag = full_diag_i[j]; 595 /* Iterate over all non-zero columns of the current row */ 596 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 597 /* If column is in the diagonal */ 598 if (mat_j[col] >= cstart && mat_j[col] < cend) { 599 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 600 aa[rowstart_diag+dnz_row] = mat_a[col]; 601 dnz_row++; 602 } else { /* off-diagonal entries */ 603 bj[rowstart_offd+onz_row] = mat_j[col]; 604 ba[rowstart_offd+onz_row] = mat_a[col]; 605 onz_row++; 606 } 607 } 608 ailen[j] = dnz_row; 609 bilen[j] = onz_row; 610 } 611 PetscFunctionReturn(0); 612 } 613 614 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 615 { 616 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 617 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 618 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 619 620 PetscFunctionBegin; 621 for (i=0; i<m; i++) { 622 if (idxm[i] < 0) continue; /* negative row */ 623 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 624 if (idxm[i] >= rstart && idxm[i] < rend) { 625 row = idxm[i] - rstart; 626 for (j=0; j<n; j++) { 627 if (idxn[j] < 0) continue; /* negative column */ 628 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 629 if (idxn[j] >= cstart && idxn[j] < cend) { 630 col = idxn[j] - cstart; 631 PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 632 } else { 633 if (!aij->colmap) { 634 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 635 } 636 #if defined(PETSC_USE_CTABLE) 637 PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 638 col--; 639 #else 640 col = aij->colmap[idxn[j]] - 1; 641 #endif 642 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 643 else { 644 PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 645 } 646 } 647 } 648 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 649 } 650 PetscFunctionReturn(0); 651 } 652 653 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 654 { 655 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 656 PetscInt nstash,reallocs; 657 658 PetscFunctionBegin; 659 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 660 661 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 662 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 663 PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 664 PetscFunctionReturn(0); 665 } 666 667 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 668 { 669 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 670 PetscMPIInt n; 671 PetscInt i,j,rstart,ncols,flg; 672 PetscInt *row,*col; 673 PetscBool other_disassembled; 674 PetscScalar *val; 675 676 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 677 678 PetscFunctionBegin; 679 if (!aij->donotstash && !mat->nooffprocentries) { 680 while (1) { 681 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 682 if (!flg) break; 683 684 for (i=0; i<n;) { 685 /* Now identify the consecutive vals belonging to the same row */ 686 for (j=i,rstart=row[j]; j<n; j++) { 687 if (row[j] != rstart) break; 688 } 689 if (j < n) ncols = j-i; 690 else ncols = n-i; 691 /* Now assemble all these values with a single function call */ 692 PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 693 i = j; 694 } 695 } 696 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 697 } 698 #if defined(PETSC_HAVE_DEVICE) 699 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 700 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 701 if (mat->boundtocpu) { 702 PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 703 PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 704 } 705 #endif 706 PetscCall(MatAssemblyBegin(aij->A,mode)); 707 PetscCall(MatAssemblyEnd(aij->A,mode)); 708 709 /* determine if any processor has disassembled, if so we must 710 also disassemble ourself, in order that we may reassemble. */ 711 /* 712 if nonzero structure of submatrix B cannot change then we know that 713 no processor disassembled thus we can skip this stuff 714 */ 715 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 716 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 717 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 718 PetscCall(MatDisAssemble_MPIAIJ(mat)); 719 } 720 } 721 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 722 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 723 } 724 PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 725 #if defined(PETSC_HAVE_DEVICE) 726 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 727 #endif 728 PetscCall(MatAssemblyBegin(aij->B,mode)); 729 PetscCall(MatAssemblyEnd(aij->B,mode)); 730 731 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 732 733 aij->rowvalues = NULL; 734 735 PetscCall(VecDestroy(&aij->diag)); 736 737 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 738 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 739 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 740 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 741 } 742 #if defined(PETSC_HAVE_DEVICE) 743 mat->offloadmask = PETSC_OFFLOAD_BOTH; 744 #endif 745 PetscFunctionReturn(0); 746 } 747 748 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 749 { 750 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 751 752 PetscFunctionBegin; 753 PetscCall(MatZeroEntries(l->A)); 754 PetscCall(MatZeroEntries(l->B)); 755 PetscFunctionReturn(0); 756 } 757 758 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 759 { 760 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 761 PetscObjectState sA, sB; 762 PetscInt *lrows; 763 PetscInt r, len; 764 PetscBool cong, lch, gch; 765 766 PetscFunctionBegin; 767 /* get locally owned rows */ 768 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 769 PetscCall(MatHasCongruentLayouts(A,&cong)); 770 /* fix right hand side if needed */ 771 if (x && b) { 772 const PetscScalar *xx; 773 PetscScalar *bb; 774 775 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 776 PetscCall(VecGetArrayRead(x, &xx)); 777 PetscCall(VecGetArray(b, &bb)); 778 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 779 PetscCall(VecRestoreArrayRead(x, &xx)); 780 PetscCall(VecRestoreArray(b, &bb)); 781 } 782 783 sA = mat->A->nonzerostate; 784 sB = mat->B->nonzerostate; 785 786 if (diag != 0.0 && cong) { 787 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 788 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 789 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 790 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 791 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 792 PetscInt nnwA, nnwB; 793 PetscBool nnzA, nnzB; 794 795 nnwA = aijA->nonew; 796 nnwB = aijB->nonew; 797 nnzA = aijA->keepnonzeropattern; 798 nnzB = aijB->keepnonzeropattern; 799 if (!nnzA) { 800 PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 801 aijA->nonew = 0; 802 } 803 if (!nnzB) { 804 PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 805 aijB->nonew = 0; 806 } 807 /* Must zero here before the next loop */ 808 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 809 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 810 for (r = 0; r < len; ++r) { 811 const PetscInt row = lrows[r] + A->rmap->rstart; 812 if (row >= A->cmap->N) continue; 813 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 814 } 815 aijA->nonew = nnwA; 816 aijB->nonew = nnwB; 817 } else { 818 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 819 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 820 } 821 PetscCall(PetscFree(lrows)); 822 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 823 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 824 825 /* reduce nonzerostate */ 826 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 827 PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 828 if (gch) A->nonzerostate++; 829 PetscFunctionReturn(0); 830 } 831 832 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 833 { 834 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 835 PetscMPIInt n = A->rmap->n; 836 PetscInt i,j,r,m,len = 0; 837 PetscInt *lrows,*owners = A->rmap->range; 838 PetscMPIInt p = 0; 839 PetscSFNode *rrows; 840 PetscSF sf; 841 const PetscScalar *xx; 842 PetscScalar *bb,*mask,*aij_a; 843 Vec xmask,lmask; 844 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 845 const PetscInt *aj, *ii,*ridx; 846 PetscScalar *aa; 847 848 PetscFunctionBegin; 849 /* Create SF where leaves are input rows and roots are owned rows */ 850 PetscCall(PetscMalloc1(n, &lrows)); 851 for (r = 0; r < n; ++r) lrows[r] = -1; 852 PetscCall(PetscMalloc1(N, &rrows)); 853 for (r = 0; r < N; ++r) { 854 const PetscInt idx = rows[r]; 855 PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 856 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 857 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 858 } 859 rrows[r].rank = p; 860 rrows[r].index = rows[r] - owners[p]; 861 } 862 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 863 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 864 /* Collect flags for rows to be zeroed */ 865 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 866 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 867 PetscCall(PetscSFDestroy(&sf)); 868 /* Compress and put in row numbers */ 869 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 870 /* zero diagonal part of matrix */ 871 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 872 /* handle off diagonal part of matrix */ 873 PetscCall(MatCreateVecs(A,&xmask,NULL)); 874 PetscCall(VecDuplicate(l->lvec,&lmask)); 875 PetscCall(VecGetArray(xmask,&bb)); 876 for (i=0; i<len; i++) bb[lrows[i]] = 1; 877 PetscCall(VecRestoreArray(xmask,&bb)); 878 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 879 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 880 PetscCall(VecDestroy(&xmask)); 881 if (x && b) { /* this code is buggy when the row and column layout don't match */ 882 PetscBool cong; 883 884 PetscCall(MatHasCongruentLayouts(A,&cong)); 885 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 886 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 887 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 888 PetscCall(VecGetArrayRead(l->lvec,&xx)); 889 PetscCall(VecGetArray(b,&bb)); 890 } 891 PetscCall(VecGetArray(lmask,&mask)); 892 /* remove zeroed rows of off diagonal matrix */ 893 PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 894 ii = aij->i; 895 for (i=0; i<len; i++) { 896 PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 897 } 898 /* loop over all elements of off process part of matrix zeroing removed columns*/ 899 if (aij->compressedrow.use) { 900 m = aij->compressedrow.nrows; 901 ii = aij->compressedrow.i; 902 ridx = aij->compressedrow.rindex; 903 for (i=0; i<m; i++) { 904 n = ii[i+1] - ii[i]; 905 aj = aij->j + ii[i]; 906 aa = aij_a + ii[i]; 907 908 for (j=0; j<n; j++) { 909 if (PetscAbsScalar(mask[*aj])) { 910 if (b) bb[*ridx] -= *aa*xx[*aj]; 911 *aa = 0.0; 912 } 913 aa++; 914 aj++; 915 } 916 ridx++; 917 } 918 } else { /* do not use compressed row format */ 919 m = l->B->rmap->n; 920 for (i=0; i<m; i++) { 921 n = ii[i+1] - ii[i]; 922 aj = aij->j + ii[i]; 923 aa = aij_a + ii[i]; 924 for (j=0; j<n; j++) { 925 if (PetscAbsScalar(mask[*aj])) { 926 if (b) bb[i] -= *aa*xx[*aj]; 927 *aa = 0.0; 928 } 929 aa++; 930 aj++; 931 } 932 } 933 } 934 if (x && b) { 935 PetscCall(VecRestoreArray(b,&bb)); 936 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 937 } 938 PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 939 PetscCall(VecRestoreArray(lmask,&mask)); 940 PetscCall(VecDestroy(&lmask)); 941 PetscCall(PetscFree(lrows)); 942 943 /* only change matrix nonzero state if pattern was allowed to be changed */ 944 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 945 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 946 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 952 { 953 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 954 PetscInt nt; 955 VecScatter Mvctx = a->Mvctx; 956 957 PetscFunctionBegin; 958 PetscCall(VecGetLocalSize(xx,&nt)); 959 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 960 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 961 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 962 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 963 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 964 PetscFunctionReturn(0); 965 } 966 967 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 968 { 969 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 970 971 PetscFunctionBegin; 972 PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 973 PetscFunctionReturn(0); 974 } 975 976 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 977 { 978 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 979 VecScatter Mvctx = a->Mvctx; 980 981 PetscFunctionBegin; 982 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 983 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 984 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 985 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 986 PetscFunctionReturn(0); 987 } 988 989 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 990 { 991 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 992 993 PetscFunctionBegin; 994 /* do nondiagonal part */ 995 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 996 /* do local part */ 997 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 998 /* add partial results together */ 999 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1000 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1005 { 1006 MPI_Comm comm; 1007 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1008 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1009 IS Me,Notme; 1010 PetscInt M,N,first,last,*notme,i; 1011 PetscBool lf; 1012 PetscMPIInt size; 1013 1014 PetscFunctionBegin; 1015 /* Easy test: symmetric diagonal block */ 1016 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1017 PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 1018 PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1019 if (!*f) PetscFunctionReturn(0); 1020 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 1021 PetscCallMPI(MPI_Comm_size(comm,&size)); 1022 if (size == 1) PetscFunctionReturn(0); 1023 1024 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1025 PetscCall(MatGetSize(Amat,&M,&N)); 1026 PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 1027 PetscCall(PetscMalloc1(N-last+first,¬me)); 1028 for (i=0; i<first; i++) notme[i] = i; 1029 for (i=last; i<M; i++) notme[i-last+first] = i; 1030 PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 1031 PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 1032 PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 1033 Aoff = Aoffs[0]; 1034 PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 1035 Boff = Boffs[0]; 1036 PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 1037 PetscCall(MatDestroyMatrices(1,&Aoffs)); 1038 PetscCall(MatDestroyMatrices(1,&Boffs)); 1039 PetscCall(ISDestroy(&Me)); 1040 PetscCall(ISDestroy(&Notme)); 1041 PetscCall(PetscFree(notme)); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1046 { 1047 PetscFunctionBegin; 1048 PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1055 1056 PetscFunctionBegin; 1057 /* do nondiagonal part */ 1058 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1059 /* do local part */ 1060 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1061 /* add partial results together */ 1062 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1063 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 /* 1068 This only works correctly for square matrices where the subblock A->A is the 1069 diagonal block 1070 */ 1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 1075 PetscFunctionBegin; 1076 PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1077 PetscCheckFalse(A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1078 PetscCall(MatGetDiagonal(a->A,v)); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1083 { 1084 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1085 1086 PetscFunctionBegin; 1087 PetscCall(MatScale(a->A,aa)); 1088 PetscCall(MatScale(a->B,aa)); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1093 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1094 { 1095 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1096 1097 PetscFunctionBegin; 1098 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1099 PetscCall(PetscFree(aij->Aperm1)); 1100 PetscCall(PetscFree(aij->Bperm1)); 1101 PetscCall(PetscFree(aij->Ajmap1)); 1102 PetscCall(PetscFree(aij->Bjmap1)); 1103 1104 PetscCall(PetscFree(aij->Aimap2)); 1105 PetscCall(PetscFree(aij->Bimap2)); 1106 PetscCall(PetscFree(aij->Aperm2)); 1107 PetscCall(PetscFree(aij->Bperm2)); 1108 PetscCall(PetscFree(aij->Ajmap2)); 1109 PetscCall(PetscFree(aij->Bjmap2)); 1110 1111 PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 1112 PetscCall(PetscFree(aij->Cperm1)); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1117 { 1118 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1119 1120 PetscFunctionBegin; 1121 #if defined(PETSC_USE_LOG) 1122 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1123 #endif 1124 PetscCall(MatStashDestroy_Private(&mat->stash)); 1125 PetscCall(VecDestroy(&aij->diag)); 1126 PetscCall(MatDestroy(&aij->A)); 1127 PetscCall(MatDestroy(&aij->B)); 1128 #if defined(PETSC_USE_CTABLE) 1129 PetscCall(PetscTableDestroy(&aij->colmap)); 1130 #else 1131 PetscCall(PetscFree(aij->colmap)); 1132 #endif 1133 PetscCall(PetscFree(aij->garray)); 1134 PetscCall(VecDestroy(&aij->lvec)); 1135 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1136 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 1137 PetscCall(PetscFree(aij->ld)); 1138 1139 /* Free COO */ 1140 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1141 1142 PetscCall(PetscFree(mat->data)); 1143 1144 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1145 PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 1146 1147 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1148 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1149 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1150 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 1151 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 1152 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 1153 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 1154 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1155 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 1156 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 1157 #if defined(PETSC_HAVE_CUDA) 1158 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 1159 #endif 1160 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1161 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 1162 #endif 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 1164 #if defined(PETSC_HAVE_ELEMENTAL) 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 1166 #endif 1167 #if defined(PETSC_HAVE_SCALAPACK) 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1169 #endif 1170 #if defined(PETSC_HAVE_HYPRE) 1171 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 1172 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 1173 #endif 1174 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1175 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 1176 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 1177 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 1179 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 1180 #if defined(PETSC_HAVE_MKL_SPARSE) 1181 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 1182 #endif 1183 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 1184 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1185 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 1186 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 1187 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1192 { 1193 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1194 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1195 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1196 const PetscInt *garray = aij->garray; 1197 const PetscScalar *aa,*ba; 1198 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1199 PetscInt *rowlens; 1200 PetscInt *colidxs; 1201 PetscScalar *matvals; 1202 1203 PetscFunctionBegin; 1204 PetscCall(PetscViewerSetUp(viewer)); 1205 1206 M = mat->rmap->N; 1207 N = mat->cmap->N; 1208 m = mat->rmap->n; 1209 rs = mat->rmap->rstart; 1210 cs = mat->cmap->rstart; 1211 nz = A->nz + B->nz; 1212 1213 /* write matrix header */ 1214 header[0] = MAT_FILE_CLASSID; 1215 header[1] = M; header[2] = N; header[3] = nz; 1216 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1217 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1218 1219 /* fill in and store row lengths */ 1220 PetscCall(PetscMalloc1(m,&rowlens)); 1221 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1222 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1223 PetscCall(PetscFree(rowlens)); 1224 1225 /* fill in and store column indices */ 1226 PetscCall(PetscMalloc1(nz,&colidxs)); 1227 for (cnt=0, i=0; i<m; i++) { 1228 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1229 if (garray[B->j[jb]] > cs) break; 1230 colidxs[cnt++] = garray[B->j[jb]]; 1231 } 1232 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1233 colidxs[cnt++] = A->j[ja] + cs; 1234 for (; jb<B->i[i+1]; jb++) 1235 colidxs[cnt++] = garray[B->j[jb]]; 1236 } 1237 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1238 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 1239 PetscCall(PetscFree(colidxs)); 1240 1241 /* fill in and store nonzero values */ 1242 PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 1243 PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 1244 PetscCall(PetscMalloc1(nz,&matvals)); 1245 for (cnt=0, i=0; i<m; i++) { 1246 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1247 if (garray[B->j[jb]] > cs) break; 1248 matvals[cnt++] = ba[jb]; 1249 } 1250 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1251 matvals[cnt++] = aa[ja]; 1252 for (; jb<B->i[i+1]; jb++) 1253 matvals[cnt++] = ba[jb]; 1254 } 1255 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 1256 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 1257 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1258 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 1259 PetscCall(PetscFree(matvals)); 1260 1261 /* write block size option to the viewer's .info file */ 1262 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #include <petscdraw.h> 1267 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1268 { 1269 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1270 PetscMPIInt rank = aij->rank,size = aij->size; 1271 PetscBool isdraw,iascii,isbinary; 1272 PetscViewer sviewer; 1273 PetscViewerFormat format; 1274 1275 PetscFunctionBegin; 1276 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1277 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1278 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1279 if (iascii) { 1280 PetscCall(PetscViewerGetFormat(viewer,&format)); 1281 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1282 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1283 PetscCall(PetscMalloc1(size,&nz)); 1284 PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1285 for (i=0; i<(PetscInt)size; i++) { 1286 nmax = PetscMax(nmax,nz[i]); 1287 nmin = PetscMin(nmin,nz[i]); 1288 navg += nz[i]; 1289 } 1290 PetscCall(PetscFree(nz)); 1291 navg = navg/size; 1292 PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1293 PetscFunctionReturn(0); 1294 } 1295 PetscCall(PetscViewerGetFormat(viewer,&format)); 1296 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1297 MatInfo info; 1298 PetscInt *inodes=NULL; 1299 1300 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 1301 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 1302 PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 1303 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1304 if (!inodes) { 1305 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1306 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1307 } else { 1308 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1309 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1310 } 1311 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 1312 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1313 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 1314 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1315 PetscCall(PetscViewerFlush(viewer)); 1316 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1317 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 1318 PetscCall(VecScatterView(aij->Mvctx,viewer)); 1319 PetscFunctionReturn(0); 1320 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1321 PetscInt inodecount,inodelimit,*inodes; 1322 PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1323 if (inodes) { 1324 PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1325 } else { 1326 PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1327 } 1328 PetscFunctionReturn(0); 1329 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1330 PetscFunctionReturn(0); 1331 } 1332 } else if (isbinary) { 1333 if (size == 1) { 1334 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1335 PetscCall(MatView(aij->A,viewer)); 1336 } else { 1337 PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 1338 } 1339 PetscFunctionReturn(0); 1340 } else if (iascii && size == 1) { 1341 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1342 PetscCall(MatView(aij->A,viewer)); 1343 PetscFunctionReturn(0); 1344 } else if (isdraw) { 1345 PetscDraw draw; 1346 PetscBool isnull; 1347 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1348 PetscCall(PetscDrawIsNull(draw,&isnull)); 1349 if (isnull) PetscFunctionReturn(0); 1350 } 1351 1352 { /* assemble the entire matrix onto first processor */ 1353 Mat A = NULL, Av; 1354 IS isrow,iscol; 1355 1356 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1357 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1358 PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 1359 PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 1360 /* The commented code uses MatCreateSubMatrices instead */ 1361 /* 1362 Mat *AA, A = NULL, Av; 1363 IS isrow,iscol; 1364 1365 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1366 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1367 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1368 if (rank == 0) { 1369 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1370 A = AA[0]; 1371 Av = AA[0]; 1372 } 1373 PetscCall(MatDestroySubMatrices(1,&AA)); 1374 */ 1375 PetscCall(ISDestroy(&iscol)); 1376 PetscCall(ISDestroy(&isrow)); 1377 /* 1378 Everyone has to call to draw the matrix since the graphics waits are 1379 synchronized across all processors that share the PetscDraw object 1380 */ 1381 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1382 if (rank == 0) { 1383 if (((PetscObject)mat)->name) { 1384 PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 1385 } 1386 PetscCall(MatView_SeqAIJ(Av,sviewer)); 1387 } 1388 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1389 PetscCall(PetscViewerFlush(viewer)); 1390 PetscCall(MatDestroy(&A)); 1391 } 1392 PetscFunctionReturn(0); 1393 } 1394 1395 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1396 { 1397 PetscBool iascii,isdraw,issocket,isbinary; 1398 1399 PetscFunctionBegin; 1400 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1401 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1402 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1403 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1404 if (iascii || isdraw || isbinary || issocket) { 1405 PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1411 { 1412 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1413 Vec bb1 = NULL; 1414 PetscBool hasop; 1415 1416 PetscFunctionBegin; 1417 if (flag == SOR_APPLY_UPPER) { 1418 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1423 PetscCall(VecDuplicate(bb,&bb1)); 1424 } 1425 1426 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1427 if (flag & SOR_ZERO_INITIAL_GUESS) { 1428 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1429 its--; 1430 } 1431 1432 while (its--) { 1433 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1434 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1435 1436 /* update rhs: bb1 = bb - B*x */ 1437 PetscCall(VecScale(mat->lvec,-1.0)); 1438 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1439 1440 /* local sweep */ 1441 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 1442 } 1443 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1444 if (flag & SOR_ZERO_INITIAL_GUESS) { 1445 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1446 its--; 1447 } 1448 while (its--) { 1449 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1450 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1451 1452 /* update rhs: bb1 = bb - B*x */ 1453 PetscCall(VecScale(mat->lvec,-1.0)); 1454 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1455 1456 /* local sweep */ 1457 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 1458 } 1459 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1460 if (flag & SOR_ZERO_INITIAL_GUESS) { 1461 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1462 its--; 1463 } 1464 while (its--) { 1465 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1466 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1467 1468 /* update rhs: bb1 = bb - B*x */ 1469 PetscCall(VecScale(mat->lvec,-1.0)); 1470 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1471 1472 /* local sweep */ 1473 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 1474 } 1475 } else if (flag & SOR_EISENSTAT) { 1476 Vec xx1; 1477 1478 PetscCall(VecDuplicate(bb,&xx1)); 1479 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1480 1481 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1482 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1483 if (!mat->diag) { 1484 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 1485 PetscCall(MatGetDiagonal(matin,mat->diag)); 1486 } 1487 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1488 if (hasop) { 1489 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1490 } else { 1491 PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1492 } 1493 PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1494 1495 PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1496 1497 /* local sweep */ 1498 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 1499 PetscCall(VecAXPY(xx,1.0,xx1)); 1500 PetscCall(VecDestroy(&xx1)); 1501 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1502 1503 PetscCall(VecDestroy(&bb1)); 1504 1505 matin->factorerrortype = mat->A->factorerrortype; 1506 PetscFunctionReturn(0); 1507 } 1508 1509 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1510 { 1511 Mat aA,aB,Aperm; 1512 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1513 PetscScalar *aa,*ba; 1514 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1515 PetscSF rowsf,sf; 1516 IS parcolp = NULL; 1517 PetscBool done; 1518 1519 PetscFunctionBegin; 1520 PetscCall(MatGetLocalSize(A,&m,&n)); 1521 PetscCall(ISGetIndices(rowp,&rwant)); 1522 PetscCall(ISGetIndices(colp,&cwant)); 1523 PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 1524 1525 /* Invert row permutation to find out where my rows should go */ 1526 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 1527 PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 1528 PetscCall(PetscSFSetFromOptions(rowsf)); 1529 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1530 PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1531 PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1532 1533 /* Invert column permutation to find out where my columns should go */ 1534 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1535 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 1536 PetscCall(PetscSFSetFromOptions(sf)); 1537 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1538 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1539 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1540 PetscCall(PetscSFDestroy(&sf)); 1541 1542 PetscCall(ISRestoreIndices(rowp,&rwant)); 1543 PetscCall(ISRestoreIndices(colp,&cwant)); 1544 PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 1545 1546 /* Find out where my gcols should go */ 1547 PetscCall(MatGetSize(aB,NULL,&ng)); 1548 PetscCall(PetscMalloc1(ng,&gcdest)); 1549 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1550 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 1551 PetscCall(PetscSFSetFromOptions(sf)); 1552 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1553 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1554 PetscCall(PetscSFDestroy(&sf)); 1555 1556 PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 1557 PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1558 PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1559 for (i=0; i<m; i++) { 1560 PetscInt row = rdest[i]; 1561 PetscMPIInt rowner; 1562 PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 1563 for (j=ai[i]; j<ai[i+1]; j++) { 1564 PetscInt col = cdest[aj[j]]; 1565 PetscMPIInt cowner; 1566 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 1567 if (rowner == cowner) dnnz[i]++; 1568 else onnz[i]++; 1569 } 1570 for (j=bi[i]; j<bi[i+1]; j++) { 1571 PetscInt col = gcdest[bj[j]]; 1572 PetscMPIInt cowner; 1573 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 1574 if (rowner == cowner) dnnz[i]++; 1575 else onnz[i]++; 1576 } 1577 } 1578 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1579 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1580 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1581 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1582 PetscCall(PetscSFDestroy(&rowsf)); 1583 1584 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 1585 PetscCall(MatSeqAIJGetArray(aA,&aa)); 1586 PetscCall(MatSeqAIJGetArray(aB,&ba)); 1587 for (i=0; i<m; i++) { 1588 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1589 PetscInt j0,rowlen; 1590 rowlen = ai[i+1] - ai[i]; 1591 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1592 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1593 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1594 } 1595 rowlen = bi[i+1] - bi[i]; 1596 for (j0=j=0; j<rowlen; j0=j) { 1597 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1598 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1599 } 1600 } 1601 PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 1602 PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 1603 PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1604 PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1605 PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 1606 PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 1607 PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 1608 PetscCall(PetscFree3(work,rdest,cdest)); 1609 PetscCall(PetscFree(gcdest)); 1610 if (parcolp) PetscCall(ISDestroy(&colp)); 1611 *B = Aperm; 1612 PetscFunctionReturn(0); 1613 } 1614 1615 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1616 { 1617 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1618 1619 PetscFunctionBegin; 1620 PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1621 if (ghosts) *ghosts = aij->garray; 1622 PetscFunctionReturn(0); 1623 } 1624 1625 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1626 { 1627 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1628 Mat A = mat->A,B = mat->B; 1629 PetscLogDouble isend[5],irecv[5]; 1630 1631 PetscFunctionBegin; 1632 info->block_size = 1.0; 1633 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1634 1635 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1636 isend[3] = info->memory; isend[4] = info->mallocs; 1637 1638 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1639 1640 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1641 isend[3] += info->memory; isend[4] += info->mallocs; 1642 if (flag == MAT_LOCAL) { 1643 info->nz_used = isend[0]; 1644 info->nz_allocated = isend[1]; 1645 info->nz_unneeded = isend[2]; 1646 info->memory = isend[3]; 1647 info->mallocs = isend[4]; 1648 } else if (flag == MAT_GLOBAL_MAX) { 1649 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1650 1651 info->nz_used = irecv[0]; 1652 info->nz_allocated = irecv[1]; 1653 info->nz_unneeded = irecv[2]; 1654 info->memory = irecv[3]; 1655 info->mallocs = irecv[4]; 1656 } else if (flag == MAT_GLOBAL_SUM) { 1657 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1658 1659 info->nz_used = irecv[0]; 1660 info->nz_allocated = irecv[1]; 1661 info->nz_unneeded = irecv[2]; 1662 info->memory = irecv[3]; 1663 info->mallocs = irecv[4]; 1664 } 1665 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1666 info->fill_ratio_needed = 0; 1667 info->factor_mallocs = 0; 1668 PetscFunctionReturn(0); 1669 } 1670 1671 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1672 { 1673 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1674 1675 PetscFunctionBegin; 1676 switch (op) { 1677 case MAT_NEW_NONZERO_LOCATIONS: 1678 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1679 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1680 case MAT_KEEP_NONZERO_PATTERN: 1681 case MAT_NEW_NONZERO_LOCATION_ERR: 1682 case MAT_USE_INODES: 1683 case MAT_IGNORE_ZERO_ENTRIES: 1684 case MAT_FORM_EXPLICIT_TRANSPOSE: 1685 MatCheckPreallocated(A,1); 1686 PetscCall(MatSetOption(a->A,op,flg)); 1687 PetscCall(MatSetOption(a->B,op,flg)); 1688 break; 1689 case MAT_ROW_ORIENTED: 1690 MatCheckPreallocated(A,1); 1691 a->roworiented = flg; 1692 1693 PetscCall(MatSetOption(a->A,op,flg)); 1694 PetscCall(MatSetOption(a->B,op,flg)); 1695 break; 1696 case MAT_FORCE_DIAGONAL_ENTRIES: 1697 case MAT_SORTED_FULL: 1698 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1699 break; 1700 case MAT_IGNORE_OFF_PROC_ENTRIES: 1701 a->donotstash = flg; 1702 break; 1703 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1704 case MAT_SPD: 1705 case MAT_SYMMETRIC: 1706 case MAT_STRUCTURALLY_SYMMETRIC: 1707 case MAT_HERMITIAN: 1708 case MAT_SYMMETRY_ETERNAL: 1709 break; 1710 case MAT_SUBMAT_SINGLEIS: 1711 A->submat_singleis = flg; 1712 break; 1713 case MAT_STRUCTURE_ONLY: 1714 /* The option is handled directly by MatSetOption() */ 1715 break; 1716 default: 1717 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1718 } 1719 PetscFunctionReturn(0); 1720 } 1721 1722 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1723 { 1724 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1725 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1726 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1727 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1728 PetscInt *cmap,*idx_p; 1729 1730 PetscFunctionBegin; 1731 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1732 mat->getrowactive = PETSC_TRUE; 1733 1734 if (!mat->rowvalues && (idx || v)) { 1735 /* 1736 allocate enough space to hold information from the longest row. 1737 */ 1738 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1739 PetscInt max = 1,tmp; 1740 for (i=0; i<matin->rmap->n; i++) { 1741 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1742 if (max < tmp) max = tmp; 1743 } 1744 PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 1745 } 1746 1747 PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1748 lrow = row - rstart; 1749 1750 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1751 if (!v) {pvA = NULL; pvB = NULL;} 1752 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1753 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1754 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1755 nztot = nzA + nzB; 1756 1757 cmap = mat->garray; 1758 if (v || idx) { 1759 if (nztot) { 1760 /* Sort by increasing column numbers, assuming A and B already sorted */ 1761 PetscInt imark = -1; 1762 if (v) { 1763 *v = v_p = mat->rowvalues; 1764 for (i=0; i<nzB; i++) { 1765 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1766 else break; 1767 } 1768 imark = i; 1769 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1770 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1771 } 1772 if (idx) { 1773 *idx = idx_p = mat->rowindices; 1774 if (imark > -1) { 1775 for (i=0; i<imark; i++) { 1776 idx_p[i] = cmap[cworkB[i]]; 1777 } 1778 } else { 1779 for (i=0; i<nzB; i++) { 1780 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1781 else break; 1782 } 1783 imark = i; 1784 } 1785 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1786 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1787 } 1788 } else { 1789 if (idx) *idx = NULL; 1790 if (v) *v = NULL; 1791 } 1792 } 1793 *nz = nztot; 1794 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1795 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1796 PetscFunctionReturn(0); 1797 } 1798 1799 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1800 { 1801 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1802 1803 PetscFunctionBegin; 1804 PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1805 aij->getrowactive = PETSC_FALSE; 1806 PetscFunctionReturn(0); 1807 } 1808 1809 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1810 { 1811 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1812 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1813 PetscInt i,j,cstart = mat->cmap->rstart; 1814 PetscReal sum = 0.0; 1815 const MatScalar *v,*amata,*bmata; 1816 1817 PetscFunctionBegin; 1818 if (aij->size == 1) { 1819 PetscCall(MatNorm(aij->A,type,norm)); 1820 } else { 1821 PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 1822 PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 1823 if (type == NORM_FROBENIUS) { 1824 v = amata; 1825 for (i=0; i<amat->nz; i++) { 1826 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1827 } 1828 v = bmata; 1829 for (i=0; i<bmat->nz; i++) { 1830 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1831 } 1832 PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1833 *norm = PetscSqrtReal(*norm); 1834 PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 1835 } else if (type == NORM_1) { /* max column norm */ 1836 PetscReal *tmp,*tmp2; 1837 PetscInt *jj,*garray = aij->garray; 1838 PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 1839 PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 1840 *norm = 0.0; 1841 v = amata; jj = amat->j; 1842 for (j=0; j<amat->nz; j++) { 1843 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1844 } 1845 v = bmata; jj = bmat->j; 1846 for (j=0; j<bmat->nz; j++) { 1847 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1848 } 1849 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1850 for (j=0; j<mat->cmap->N; j++) { 1851 if (tmp2[j] > *norm) *norm = tmp2[j]; 1852 } 1853 PetscCall(PetscFree(tmp)); 1854 PetscCall(PetscFree(tmp2)); 1855 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1856 } else if (type == NORM_INFINITY) { /* max row norm */ 1857 PetscReal ntemp = 0.0; 1858 for (j=0; j<aij->A->rmap->n; j++) { 1859 v = amata + amat->i[j]; 1860 sum = 0.0; 1861 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1862 sum += PetscAbsScalar(*v); v++; 1863 } 1864 v = bmata + bmat->i[j]; 1865 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1866 sum += PetscAbsScalar(*v); v++; 1867 } 1868 if (sum > ntemp) ntemp = sum; 1869 } 1870 PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 1871 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1872 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1873 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 1874 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 1875 } 1876 PetscFunctionReturn(0); 1877 } 1878 1879 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1880 { 1881 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1882 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1883 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1884 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1885 Mat B,A_diag,*B_diag; 1886 const MatScalar *pbv,*bv; 1887 1888 PetscFunctionBegin; 1889 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1890 ai = Aloc->i; aj = Aloc->j; 1891 bi = Bloc->i; bj = Bloc->j; 1892 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1893 PetscInt *d_nnz,*g_nnz,*o_nnz; 1894 PetscSFNode *oloc; 1895 PETSC_UNUSED PetscSF sf; 1896 1897 PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 1898 /* compute d_nnz for preallocation */ 1899 PetscCall(PetscArrayzero(d_nnz,na)); 1900 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1901 /* compute local off-diagonal contributions */ 1902 PetscCall(PetscArrayzero(g_nnz,nb)); 1903 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1904 /* map those to global */ 1905 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1906 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 1907 PetscCall(PetscSFSetFromOptions(sf)); 1908 PetscCall(PetscArrayzero(o_nnz,na)); 1909 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1910 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1911 PetscCall(PetscSFDestroy(&sf)); 1912 1913 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1914 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1915 PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1916 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1917 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 1918 PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1919 } else { 1920 B = *matout; 1921 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1922 } 1923 1924 b = (Mat_MPIAIJ*)B->data; 1925 A_diag = a->A; 1926 B_diag = &b->A; 1927 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1928 A_diag_ncol = A_diag->cmap->N; 1929 B_diag_ilen = sub_B_diag->ilen; 1930 B_diag_i = sub_B_diag->i; 1931 1932 /* Set ilen for diagonal of B */ 1933 for (i=0; i<A_diag_ncol; i++) { 1934 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1935 } 1936 1937 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1938 very quickly (=without using MatSetValues), because all writes are local. */ 1939 PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1940 1941 /* copy over the B part */ 1942 PetscCall(PetscMalloc1(bi[mb],&cols)); 1943 PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1944 pbv = bv; 1945 row = A->rmap->rstart; 1946 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1947 cols_tmp = cols; 1948 for (i=0; i<mb; i++) { 1949 ncol = bi[i+1]-bi[i]; 1950 PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 1951 row++; 1952 pbv += ncol; cols_tmp += ncol; 1953 } 1954 PetscCall(PetscFree(cols)); 1955 PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1956 1957 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1958 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1959 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1960 *matout = B; 1961 } else { 1962 PetscCall(MatHeaderMerge(A,&B)); 1963 } 1964 PetscFunctionReturn(0); 1965 } 1966 1967 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1968 { 1969 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1970 Mat a = aij->A,b = aij->B; 1971 PetscInt s1,s2,s3; 1972 1973 PetscFunctionBegin; 1974 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1975 if (rr) { 1976 PetscCall(VecGetLocalSize(rr,&s1)); 1977 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1978 /* Overlap communication with computation. */ 1979 PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1980 } 1981 if (ll) { 1982 PetscCall(VecGetLocalSize(ll,&s1)); 1983 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1984 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1985 } 1986 /* scale the diagonal block */ 1987 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 1988 1989 if (rr) { 1990 /* Do a scatter end and then right scale the off-diagonal block */ 1991 PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1992 PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 1993 } 1994 PetscFunctionReturn(0); 1995 } 1996 1997 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1998 { 1999 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2000 2001 PetscFunctionBegin; 2002 PetscCall(MatSetUnfactored(a->A)); 2003 PetscFunctionReturn(0); 2004 } 2005 2006 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2007 { 2008 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2009 Mat a,b,c,d; 2010 PetscBool flg; 2011 2012 PetscFunctionBegin; 2013 a = matA->A; b = matA->B; 2014 c = matB->A; d = matB->B; 2015 2016 PetscCall(MatEqual(a,c,&flg)); 2017 if (flg) { 2018 PetscCall(MatEqual(b,d,&flg)); 2019 } 2020 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2021 PetscFunctionReturn(0); 2022 } 2023 2024 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2025 { 2026 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2027 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2028 2029 PetscFunctionBegin; 2030 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2031 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2032 /* because of the column compression in the off-processor part of the matrix a->B, 2033 the number of columns in a->B and b->B may be different, hence we cannot call 2034 the MatCopy() directly on the two parts. If need be, we can provide a more 2035 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2036 then copying the submatrices */ 2037 PetscCall(MatCopy_Basic(A,B,str)); 2038 } else { 2039 PetscCall(MatCopy(a->A,b->A,str)); 2040 PetscCall(MatCopy(a->B,b->B,str)); 2041 } 2042 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2043 PetscFunctionReturn(0); 2044 } 2045 2046 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2047 { 2048 PetscFunctionBegin; 2049 PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2050 PetscFunctionReturn(0); 2051 } 2052 2053 /* 2054 Computes the number of nonzeros per row needed for preallocation when X and Y 2055 have different nonzero structure. 2056 */ 2057 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2058 { 2059 PetscInt i,j,k,nzx,nzy; 2060 2061 PetscFunctionBegin; 2062 /* Set the number of nonzeros in the new matrix */ 2063 for (i=0; i<m; i++) { 2064 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2065 nzx = xi[i+1] - xi[i]; 2066 nzy = yi[i+1] - yi[i]; 2067 nnz[i] = 0; 2068 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2069 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2070 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2071 nnz[i]++; 2072 } 2073 for (; k<nzy; k++) nnz[i]++; 2074 } 2075 PetscFunctionReturn(0); 2076 } 2077 2078 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2079 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2080 { 2081 PetscInt m = Y->rmap->N; 2082 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2083 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2084 2085 PetscFunctionBegin; 2086 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2087 PetscFunctionReturn(0); 2088 } 2089 2090 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2091 { 2092 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2093 2094 PetscFunctionBegin; 2095 if (str == SAME_NONZERO_PATTERN) { 2096 PetscCall(MatAXPY(yy->A,a,xx->A,str)); 2097 PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2098 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2099 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2100 } else { 2101 Mat B; 2102 PetscInt *nnz_d,*nnz_o; 2103 2104 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 2105 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 2106 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2107 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2108 PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 2109 PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 2110 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 2111 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 2112 PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 2113 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2114 PetscCall(MatHeaderMerge(Y,&B)); 2115 PetscCall(PetscFree(nnz_d)); 2116 PetscCall(PetscFree(nnz_o)); 2117 } 2118 PetscFunctionReturn(0); 2119 } 2120 2121 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2122 2123 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2124 { 2125 PetscFunctionBegin; 2126 if (PetscDefined(USE_COMPLEX)) { 2127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2128 2129 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2130 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2131 } 2132 PetscFunctionReturn(0); 2133 } 2134 2135 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2136 { 2137 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2138 2139 PetscFunctionBegin; 2140 PetscCall(MatRealPart(a->A)); 2141 PetscCall(MatRealPart(a->B)); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2146 { 2147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2148 2149 PetscFunctionBegin; 2150 PetscCall(MatImaginaryPart(a->A)); 2151 PetscCall(MatImaginaryPart(a->B)); 2152 PetscFunctionReturn(0); 2153 } 2154 2155 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2156 { 2157 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2158 PetscInt i,*idxb = NULL,m = A->rmap->n; 2159 PetscScalar *va,*vv; 2160 Vec vB,vA; 2161 const PetscScalar *vb; 2162 2163 PetscFunctionBegin; 2164 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 2165 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2166 2167 PetscCall(VecGetArrayWrite(vA,&va)); 2168 if (idx) { 2169 for (i=0; i<m; i++) { 2170 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2171 } 2172 } 2173 2174 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 2175 PetscCall(PetscMalloc1(m,&idxb)); 2176 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2177 2178 PetscCall(VecGetArrayWrite(v,&vv)); 2179 PetscCall(VecGetArrayRead(vB,&vb)); 2180 for (i=0; i<m; i++) { 2181 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2182 vv[i] = vb[i]; 2183 if (idx) idx[i] = a->garray[idxb[i]]; 2184 } else { 2185 vv[i] = va[i]; 2186 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2187 idx[i] = a->garray[idxb[i]]; 2188 } 2189 } 2190 PetscCall(VecRestoreArrayWrite(vA,&vv)); 2191 PetscCall(VecRestoreArrayWrite(vA,&va)); 2192 PetscCall(VecRestoreArrayRead(vB,&vb)); 2193 PetscCall(PetscFree(idxb)); 2194 PetscCall(VecDestroy(&vA)); 2195 PetscCall(VecDestroy(&vB)); 2196 PetscFunctionReturn(0); 2197 } 2198 2199 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2200 { 2201 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2202 PetscInt m = A->rmap->n,n = A->cmap->n; 2203 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2204 PetscInt *cmap = mat->garray; 2205 PetscInt *diagIdx, *offdiagIdx; 2206 Vec diagV, offdiagV; 2207 PetscScalar *a, *diagA, *offdiagA; 2208 const PetscScalar *ba,*bav; 2209 PetscInt r,j,col,ncols,*bi,*bj; 2210 Mat B = mat->B; 2211 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2212 2213 PetscFunctionBegin; 2214 /* When a process holds entire A and other processes have no entry */ 2215 if (A->cmap->N == n) { 2216 PetscCall(VecGetArrayWrite(v,&diagA)); 2217 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2218 PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 2219 PetscCall(VecDestroy(&diagV)); 2220 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2221 PetscFunctionReturn(0); 2222 } else if (n == 0) { 2223 if (m) { 2224 PetscCall(VecGetArrayWrite(v,&a)); 2225 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2226 PetscCall(VecRestoreArrayWrite(v,&a)); 2227 } 2228 PetscFunctionReturn(0); 2229 } 2230 2231 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2232 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2233 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2234 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2235 2236 /* Get offdiagIdx[] for implicit 0.0 */ 2237 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2238 ba = bav; 2239 bi = b->i; 2240 bj = b->j; 2241 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2242 for (r = 0; r < m; r++) { 2243 ncols = bi[r+1] - bi[r]; 2244 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2245 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2246 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2247 offdiagA[r] = 0.0; 2248 2249 /* Find first hole in the cmap */ 2250 for (j=0; j<ncols; j++) { 2251 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2252 if (col > j && j < cstart) { 2253 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2254 break; 2255 } else if (col > j + n && j >= cstart) { 2256 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2257 break; 2258 } 2259 } 2260 if (j == ncols && ncols < A->cmap->N - n) { 2261 /* a hole is outside compressed Bcols */ 2262 if (ncols == 0) { 2263 if (cstart) { 2264 offdiagIdx[r] = 0; 2265 } else offdiagIdx[r] = cend; 2266 } else { /* ncols > 0 */ 2267 offdiagIdx[r] = cmap[ncols-1] + 1; 2268 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2269 } 2270 } 2271 } 2272 2273 for (j=0; j<ncols; j++) { 2274 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2275 ba++; bj++; 2276 } 2277 } 2278 2279 PetscCall(VecGetArrayWrite(v, &a)); 2280 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2281 for (r = 0; r < m; ++r) { 2282 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2283 a[r] = diagA[r]; 2284 if (idx) idx[r] = cstart + diagIdx[r]; 2285 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2286 a[r] = diagA[r]; 2287 if (idx) { 2288 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2289 idx[r] = cstart + diagIdx[r]; 2290 } else idx[r] = offdiagIdx[r]; 2291 } 2292 } else { 2293 a[r] = offdiagA[r]; 2294 if (idx) idx[r] = offdiagIdx[r]; 2295 } 2296 } 2297 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2298 PetscCall(VecRestoreArrayWrite(v, &a)); 2299 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2300 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2301 PetscCall(VecDestroy(&diagV)); 2302 PetscCall(VecDestroy(&offdiagV)); 2303 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2308 { 2309 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2310 PetscInt m = A->rmap->n,n = A->cmap->n; 2311 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2312 PetscInt *cmap = mat->garray; 2313 PetscInt *diagIdx, *offdiagIdx; 2314 Vec diagV, offdiagV; 2315 PetscScalar *a, *diagA, *offdiagA; 2316 const PetscScalar *ba,*bav; 2317 PetscInt r,j,col,ncols,*bi,*bj; 2318 Mat B = mat->B; 2319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2320 2321 PetscFunctionBegin; 2322 /* When a process holds entire A and other processes have no entry */ 2323 if (A->cmap->N == n) { 2324 PetscCall(VecGetArrayWrite(v,&diagA)); 2325 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2326 PetscCall(MatGetRowMin(mat->A,diagV,idx)); 2327 PetscCall(VecDestroy(&diagV)); 2328 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2329 PetscFunctionReturn(0); 2330 } else if (n == 0) { 2331 if (m) { 2332 PetscCall(VecGetArrayWrite(v,&a)); 2333 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2334 PetscCall(VecRestoreArrayWrite(v,&a)); 2335 } 2336 PetscFunctionReturn(0); 2337 } 2338 2339 PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 2340 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2341 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2342 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2343 2344 /* Get offdiagIdx[] for implicit 0.0 */ 2345 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2346 ba = bav; 2347 bi = b->i; 2348 bj = b->j; 2349 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2350 for (r = 0; r < m; r++) { 2351 ncols = bi[r+1] - bi[r]; 2352 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2353 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2354 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2355 offdiagA[r] = 0.0; 2356 2357 /* Find first hole in the cmap */ 2358 for (j=0; j<ncols; j++) { 2359 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2360 if (col > j && j < cstart) { 2361 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2362 break; 2363 } else if (col > j + n && j >= cstart) { 2364 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2365 break; 2366 } 2367 } 2368 if (j == ncols && ncols < A->cmap->N - n) { 2369 /* a hole is outside compressed Bcols */ 2370 if (ncols == 0) { 2371 if (cstart) { 2372 offdiagIdx[r] = 0; 2373 } else offdiagIdx[r] = cend; 2374 } else { /* ncols > 0 */ 2375 offdiagIdx[r] = cmap[ncols-1] + 1; 2376 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2377 } 2378 } 2379 } 2380 2381 for (j=0; j<ncols; j++) { 2382 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2383 ba++; bj++; 2384 } 2385 } 2386 2387 PetscCall(VecGetArrayWrite(v, &a)); 2388 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2389 for (r = 0; r < m; ++r) { 2390 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2391 a[r] = diagA[r]; 2392 if (idx) idx[r] = cstart + diagIdx[r]; 2393 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2394 a[r] = diagA[r]; 2395 if (idx) { 2396 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2397 idx[r] = cstart + diagIdx[r]; 2398 } else idx[r] = offdiagIdx[r]; 2399 } 2400 } else { 2401 a[r] = offdiagA[r]; 2402 if (idx) idx[r] = offdiagIdx[r]; 2403 } 2404 } 2405 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2406 PetscCall(VecRestoreArrayWrite(v, &a)); 2407 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2408 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2409 PetscCall(VecDestroy(&diagV)); 2410 PetscCall(VecDestroy(&offdiagV)); 2411 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2412 PetscFunctionReturn(0); 2413 } 2414 2415 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2416 { 2417 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2418 PetscInt m = A->rmap->n,n = A->cmap->n; 2419 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2420 PetscInt *cmap = mat->garray; 2421 PetscInt *diagIdx, *offdiagIdx; 2422 Vec diagV, offdiagV; 2423 PetscScalar *a, *diagA, *offdiagA; 2424 const PetscScalar *ba,*bav; 2425 PetscInt r,j,col,ncols,*bi,*bj; 2426 Mat B = mat->B; 2427 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2428 2429 PetscFunctionBegin; 2430 /* When a process holds entire A and other processes have no entry */ 2431 if (A->cmap->N == n) { 2432 PetscCall(VecGetArrayWrite(v,&diagA)); 2433 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2434 PetscCall(MatGetRowMax(mat->A,diagV,idx)); 2435 PetscCall(VecDestroy(&diagV)); 2436 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2437 PetscFunctionReturn(0); 2438 } else if (n == 0) { 2439 if (m) { 2440 PetscCall(VecGetArrayWrite(v,&a)); 2441 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2442 PetscCall(VecRestoreArrayWrite(v,&a)); 2443 } 2444 PetscFunctionReturn(0); 2445 } 2446 2447 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2448 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2449 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2450 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2451 2452 /* Get offdiagIdx[] for implicit 0.0 */ 2453 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2454 ba = bav; 2455 bi = b->i; 2456 bj = b->j; 2457 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2458 for (r = 0; r < m; r++) { 2459 ncols = bi[r+1] - bi[r]; 2460 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2461 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2462 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2463 offdiagA[r] = 0.0; 2464 2465 /* Find first hole in the cmap */ 2466 for (j=0; j<ncols; j++) { 2467 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2468 if (col > j && j < cstart) { 2469 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2470 break; 2471 } else if (col > j + n && j >= cstart) { 2472 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2473 break; 2474 } 2475 } 2476 if (j == ncols && ncols < A->cmap->N - n) { 2477 /* a hole is outside compressed Bcols */ 2478 if (ncols == 0) { 2479 if (cstart) { 2480 offdiagIdx[r] = 0; 2481 } else offdiagIdx[r] = cend; 2482 } else { /* ncols > 0 */ 2483 offdiagIdx[r] = cmap[ncols-1] + 1; 2484 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2485 } 2486 } 2487 } 2488 2489 for (j=0; j<ncols; j++) { 2490 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2491 ba++; bj++; 2492 } 2493 } 2494 2495 PetscCall(VecGetArrayWrite(v, &a)); 2496 PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 2497 for (r = 0; r < m; ++r) { 2498 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2499 a[r] = diagA[r]; 2500 if (idx) idx[r] = cstart + diagIdx[r]; 2501 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2502 a[r] = diagA[r]; 2503 if (idx) { 2504 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2505 idx[r] = cstart + diagIdx[r]; 2506 } else idx[r] = offdiagIdx[r]; 2507 } 2508 } else { 2509 a[r] = offdiagA[r]; 2510 if (idx) idx[r] = offdiagIdx[r]; 2511 } 2512 } 2513 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2514 PetscCall(VecRestoreArrayWrite(v, &a)); 2515 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2516 PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 2517 PetscCall(VecDestroy(&diagV)); 2518 PetscCall(VecDestroy(&offdiagV)); 2519 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2524 { 2525 Mat *dummy; 2526 2527 PetscFunctionBegin; 2528 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2529 *newmat = *dummy; 2530 PetscCall(PetscFree(dummy)); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2535 { 2536 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2537 2538 PetscFunctionBegin; 2539 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2540 A->factorerrortype = a->A->factorerrortype; 2541 PetscFunctionReturn(0); 2542 } 2543 2544 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2545 { 2546 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2547 2548 PetscFunctionBegin; 2549 PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2550 PetscCall(MatSetRandom(aij->A,rctx)); 2551 if (x->assembled) { 2552 PetscCall(MatSetRandom(aij->B,rctx)); 2553 } else { 2554 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2555 } 2556 PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 2557 PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2562 { 2563 PetscFunctionBegin; 2564 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2565 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2566 PetscFunctionReturn(0); 2567 } 2568 2569 /*@ 2570 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2571 2572 Collective on Mat 2573 2574 Input Parameters: 2575 + A - the matrix 2576 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2577 2578 Level: advanced 2579 2580 @*/ 2581 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2582 { 2583 PetscFunctionBegin; 2584 PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2585 PetscFunctionReturn(0); 2586 } 2587 2588 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2589 { 2590 PetscBool sc = PETSC_FALSE,flg; 2591 2592 PetscFunctionBegin; 2593 PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options"); 2594 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2595 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2596 if (flg) { 2597 PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2598 } 2599 PetscOptionsHeadEnd(); 2600 PetscFunctionReturn(0); 2601 } 2602 2603 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2604 { 2605 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2606 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2607 2608 PetscFunctionBegin; 2609 if (!Y->preallocated) { 2610 PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2611 } else if (!aij->nz) { 2612 PetscInt nonew = aij->nonew; 2613 PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2614 aij->nonew = nonew; 2615 } 2616 PetscCall(MatShift_Basic(Y,a)); 2617 PetscFunctionReturn(0); 2618 } 2619 2620 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2621 { 2622 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2623 2624 PetscFunctionBegin; 2625 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2626 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2627 if (d) { 2628 PetscInt rstart; 2629 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2630 *d += rstart; 2631 2632 } 2633 PetscFunctionReturn(0); 2634 } 2635 2636 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2637 { 2638 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2639 2640 PetscFunctionBegin; 2641 PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2642 PetscFunctionReturn(0); 2643 } 2644 2645 /* -------------------------------------------------------------------*/ 2646 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2647 MatGetRow_MPIAIJ, 2648 MatRestoreRow_MPIAIJ, 2649 MatMult_MPIAIJ, 2650 /* 4*/ MatMultAdd_MPIAIJ, 2651 MatMultTranspose_MPIAIJ, 2652 MatMultTransposeAdd_MPIAIJ, 2653 NULL, 2654 NULL, 2655 NULL, 2656 /*10*/ NULL, 2657 NULL, 2658 NULL, 2659 MatSOR_MPIAIJ, 2660 MatTranspose_MPIAIJ, 2661 /*15*/ MatGetInfo_MPIAIJ, 2662 MatEqual_MPIAIJ, 2663 MatGetDiagonal_MPIAIJ, 2664 MatDiagonalScale_MPIAIJ, 2665 MatNorm_MPIAIJ, 2666 /*20*/ MatAssemblyBegin_MPIAIJ, 2667 MatAssemblyEnd_MPIAIJ, 2668 MatSetOption_MPIAIJ, 2669 MatZeroEntries_MPIAIJ, 2670 /*24*/ MatZeroRows_MPIAIJ, 2671 NULL, 2672 NULL, 2673 NULL, 2674 NULL, 2675 /*29*/ MatSetUp_MPIAIJ, 2676 NULL, 2677 NULL, 2678 MatGetDiagonalBlock_MPIAIJ, 2679 NULL, 2680 /*34*/ MatDuplicate_MPIAIJ, 2681 NULL, 2682 NULL, 2683 NULL, 2684 NULL, 2685 /*39*/ MatAXPY_MPIAIJ, 2686 MatCreateSubMatrices_MPIAIJ, 2687 MatIncreaseOverlap_MPIAIJ, 2688 MatGetValues_MPIAIJ, 2689 MatCopy_MPIAIJ, 2690 /*44*/ MatGetRowMax_MPIAIJ, 2691 MatScale_MPIAIJ, 2692 MatShift_MPIAIJ, 2693 MatDiagonalSet_MPIAIJ, 2694 MatZeroRowsColumns_MPIAIJ, 2695 /*49*/ MatSetRandom_MPIAIJ, 2696 NULL, 2697 NULL, 2698 NULL, 2699 NULL, 2700 /*54*/ MatFDColoringCreate_MPIXAIJ, 2701 NULL, 2702 MatSetUnfactored_MPIAIJ, 2703 MatPermute_MPIAIJ, 2704 NULL, 2705 /*59*/ MatCreateSubMatrix_MPIAIJ, 2706 MatDestroy_MPIAIJ, 2707 MatView_MPIAIJ, 2708 NULL, 2709 NULL, 2710 /*64*/ NULL, 2711 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2712 NULL, 2713 NULL, 2714 NULL, 2715 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2716 MatGetRowMinAbs_MPIAIJ, 2717 NULL, 2718 NULL, 2719 NULL, 2720 NULL, 2721 /*75*/ MatFDColoringApply_AIJ, 2722 MatSetFromOptions_MPIAIJ, 2723 NULL, 2724 NULL, 2725 MatFindZeroDiagonals_MPIAIJ, 2726 /*80*/ NULL, 2727 NULL, 2728 NULL, 2729 /*83*/ MatLoad_MPIAIJ, 2730 MatIsSymmetric_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 NULL, 2735 /*89*/ NULL, 2736 NULL, 2737 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2738 NULL, 2739 NULL, 2740 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2741 NULL, 2742 NULL, 2743 NULL, 2744 MatBindToCPU_MPIAIJ, 2745 /*99*/ MatProductSetFromOptions_MPIAIJ, 2746 NULL, 2747 NULL, 2748 MatConjugate_MPIAIJ, 2749 NULL, 2750 /*104*/MatSetValuesRow_MPIAIJ, 2751 MatRealPart_MPIAIJ, 2752 MatImaginaryPart_MPIAIJ, 2753 NULL, 2754 NULL, 2755 /*109*/NULL, 2756 NULL, 2757 MatGetRowMin_MPIAIJ, 2758 NULL, 2759 MatMissingDiagonal_MPIAIJ, 2760 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2761 NULL, 2762 MatGetGhosts_MPIAIJ, 2763 NULL, 2764 NULL, 2765 /*119*/MatMultDiagonalBlock_MPIAIJ, 2766 NULL, 2767 NULL, 2768 NULL, 2769 MatGetMultiProcBlock_MPIAIJ, 2770 /*124*/MatFindNonzeroRows_MPIAIJ, 2771 MatGetColumnReductions_MPIAIJ, 2772 MatInvertBlockDiagonal_MPIAIJ, 2773 MatInvertVariableBlockDiagonal_MPIAIJ, 2774 MatCreateSubMatricesMPI_MPIAIJ, 2775 /*129*/NULL, 2776 NULL, 2777 NULL, 2778 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2779 NULL, 2780 /*134*/NULL, 2781 NULL, 2782 NULL, 2783 NULL, 2784 NULL, 2785 /*139*/MatSetBlockSizes_MPIAIJ, 2786 NULL, 2787 NULL, 2788 MatFDColoringSetUp_MPIXAIJ, 2789 MatFindOffBlockDiagonalEntries_MPIAIJ, 2790 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2791 /*145*/NULL, 2792 NULL, 2793 NULL 2794 }; 2795 2796 /* ----------------------------------------------------------------------------------------*/ 2797 2798 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2799 { 2800 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2801 2802 PetscFunctionBegin; 2803 PetscCall(MatStoreValues(aij->A)); 2804 PetscCall(MatStoreValues(aij->B)); 2805 PetscFunctionReturn(0); 2806 } 2807 2808 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2809 { 2810 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2811 2812 PetscFunctionBegin; 2813 PetscCall(MatRetrieveValues(aij->A)); 2814 PetscCall(MatRetrieveValues(aij->B)); 2815 PetscFunctionReturn(0); 2816 } 2817 2818 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2819 { 2820 Mat_MPIAIJ *b; 2821 PetscMPIInt size; 2822 2823 PetscFunctionBegin; 2824 PetscCall(PetscLayoutSetUp(B->rmap)); 2825 PetscCall(PetscLayoutSetUp(B->cmap)); 2826 b = (Mat_MPIAIJ*)B->data; 2827 2828 #if defined(PETSC_USE_CTABLE) 2829 PetscCall(PetscTableDestroy(&b->colmap)); 2830 #else 2831 PetscCall(PetscFree(b->colmap)); 2832 #endif 2833 PetscCall(PetscFree(b->garray)); 2834 PetscCall(VecDestroy(&b->lvec)); 2835 PetscCall(VecScatterDestroy(&b->Mvctx)); 2836 2837 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2838 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2839 PetscCall(MatDestroy(&b->B)); 2840 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2841 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2842 PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 2843 PetscCall(MatSetType(b->B,MATSEQAIJ)); 2844 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2845 2846 if (!B->preallocated) { 2847 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2848 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2849 PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 2850 PetscCall(MatSetType(b->A,MATSEQAIJ)); 2851 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2852 } 2853 2854 PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 2855 PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2856 B->preallocated = PETSC_TRUE; 2857 B->was_assembled = PETSC_FALSE; 2858 B->assembled = PETSC_FALSE; 2859 PetscFunctionReturn(0); 2860 } 2861 2862 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2863 { 2864 Mat_MPIAIJ *b; 2865 2866 PetscFunctionBegin; 2867 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2868 PetscCall(PetscLayoutSetUp(B->rmap)); 2869 PetscCall(PetscLayoutSetUp(B->cmap)); 2870 b = (Mat_MPIAIJ*)B->data; 2871 2872 #if defined(PETSC_USE_CTABLE) 2873 PetscCall(PetscTableDestroy(&b->colmap)); 2874 #else 2875 PetscCall(PetscFree(b->colmap)); 2876 #endif 2877 PetscCall(PetscFree(b->garray)); 2878 PetscCall(VecDestroy(&b->lvec)); 2879 PetscCall(VecScatterDestroy(&b->Mvctx)); 2880 2881 PetscCall(MatResetPreallocation(b->A)); 2882 PetscCall(MatResetPreallocation(b->B)); 2883 B->preallocated = PETSC_TRUE; 2884 B->was_assembled = PETSC_FALSE; 2885 B->assembled = PETSC_FALSE; 2886 PetscFunctionReturn(0); 2887 } 2888 2889 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2890 { 2891 Mat mat; 2892 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2893 2894 PetscFunctionBegin; 2895 *newmat = NULL; 2896 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2897 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2898 PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 2899 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2900 a = (Mat_MPIAIJ*)mat->data; 2901 2902 mat->factortype = matin->factortype; 2903 mat->assembled = matin->assembled; 2904 mat->insertmode = NOT_SET_VALUES; 2905 mat->preallocated = matin->preallocated; 2906 2907 a->size = oldmat->size; 2908 a->rank = oldmat->rank; 2909 a->donotstash = oldmat->donotstash; 2910 a->roworiented = oldmat->roworiented; 2911 a->rowindices = NULL; 2912 a->rowvalues = NULL; 2913 a->getrowactive = PETSC_FALSE; 2914 2915 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2916 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2917 2918 if (oldmat->colmap) { 2919 #if defined(PETSC_USE_CTABLE) 2920 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2921 #else 2922 PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 2923 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 2924 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2925 #endif 2926 } else a->colmap = NULL; 2927 if (oldmat->garray) { 2928 PetscInt len; 2929 len = oldmat->B->cmap->n; 2930 PetscCall(PetscMalloc1(len+1,&a->garray)); 2931 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2932 if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2933 } else a->garray = NULL; 2934 2935 /* It may happen MatDuplicate is called with a non-assembled matrix 2936 In fact, MatDuplicate only requires the matrix to be preallocated 2937 This may happen inside a DMCreateMatrix_Shell */ 2938 if (oldmat->lvec) { 2939 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2940 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2941 } 2942 if (oldmat->Mvctx) { 2943 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2944 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2945 } 2946 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2947 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2948 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2949 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2950 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2951 *newmat = mat; 2952 PetscFunctionReturn(0); 2953 } 2954 2955 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2956 { 2957 PetscBool isbinary, ishdf5; 2958 2959 PetscFunctionBegin; 2960 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2961 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2962 /* force binary viewer to load .info file if it has not yet done so */ 2963 PetscCall(PetscViewerSetUp(viewer)); 2964 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2965 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2966 if (isbinary) { 2967 PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 2968 } else if (ishdf5) { 2969 #if defined(PETSC_HAVE_HDF5) 2970 PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 2971 #else 2972 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2973 #endif 2974 } else { 2975 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2976 } 2977 PetscFunctionReturn(0); 2978 } 2979 2980 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2981 { 2982 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2983 PetscInt *rowidxs,*colidxs; 2984 PetscScalar *matvals; 2985 2986 PetscFunctionBegin; 2987 PetscCall(PetscViewerSetUp(viewer)); 2988 2989 /* read in matrix header */ 2990 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 2991 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 2992 M = header[1]; N = header[2]; nz = header[3]; 2993 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 2994 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 2995 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 2996 2997 /* set block sizes from the viewer's .info file */ 2998 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 2999 /* set global sizes if not set already */ 3000 if (mat->rmap->N < 0) mat->rmap->N = M; 3001 if (mat->cmap->N < 0) mat->cmap->N = N; 3002 PetscCall(PetscLayoutSetUp(mat->rmap)); 3003 PetscCall(PetscLayoutSetUp(mat->cmap)); 3004 3005 /* check if the matrix sizes are correct */ 3006 PetscCall(MatGetSize(mat,&rows,&cols)); 3007 PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3008 3009 /* read in row lengths and build row indices */ 3010 PetscCall(MatGetLocalSize(mat,&m,NULL)); 3011 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3012 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3013 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3014 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3015 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3016 /* read in column indices and matrix values */ 3017 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3018 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3019 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3020 /* store matrix indices and values */ 3021 PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 3022 PetscCall(PetscFree(rowidxs)); 3023 PetscCall(PetscFree2(colidxs,matvals)); 3024 PetscFunctionReturn(0); 3025 } 3026 3027 /* Not scalable because of ISAllGather() unless getting all columns. */ 3028 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3029 { 3030 IS iscol_local; 3031 PetscBool isstride; 3032 PetscMPIInt lisstride=0,gisstride; 3033 3034 PetscFunctionBegin; 3035 /* check if we are grabbing all columns*/ 3036 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 3037 3038 if (isstride) { 3039 PetscInt start,len,mstart,mlen; 3040 PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 3041 PetscCall(ISGetLocalSize(iscol,&len)); 3042 PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3043 if (mstart == start && mlen-mstart == len) lisstride = 1; 3044 } 3045 3046 PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3047 if (gisstride) { 3048 PetscInt N; 3049 PetscCall(MatGetSize(mat,NULL,&N)); 3050 PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 3051 PetscCall(ISSetIdentity(iscol_local)); 3052 PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3053 } else { 3054 PetscInt cbs; 3055 PetscCall(ISGetBlockSize(iscol,&cbs)); 3056 PetscCall(ISAllGather(iscol,&iscol_local)); 3057 PetscCall(ISSetBlockSize(iscol_local,cbs)); 3058 } 3059 3060 *isseq = iscol_local; 3061 PetscFunctionReturn(0); 3062 } 3063 3064 /* 3065 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3066 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3067 3068 Input Parameters: 3069 mat - matrix 3070 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3071 i.e., mat->rstart <= isrow[i] < mat->rend 3072 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3073 i.e., mat->cstart <= iscol[i] < mat->cend 3074 Output Parameter: 3075 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3076 iscol_o - sequential column index set for retrieving mat->B 3077 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3078 */ 3079 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3080 { 3081 Vec x,cmap; 3082 const PetscInt *is_idx; 3083 PetscScalar *xarray,*cmaparray; 3084 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3085 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3086 Mat B=a->B; 3087 Vec lvec=a->lvec,lcmap; 3088 PetscInt i,cstart,cend,Bn=B->cmap->N; 3089 MPI_Comm comm; 3090 VecScatter Mvctx=a->Mvctx; 3091 3092 PetscFunctionBegin; 3093 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3094 PetscCall(ISGetLocalSize(iscol,&ncols)); 3095 3096 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3097 PetscCall(MatCreateVecs(mat,&x,NULL)); 3098 PetscCall(VecSet(x,-1.0)); 3099 PetscCall(VecDuplicate(x,&cmap)); 3100 PetscCall(VecSet(cmap,-1.0)); 3101 3102 /* Get start indices */ 3103 PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3104 isstart -= ncols; 3105 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3106 3107 PetscCall(ISGetIndices(iscol,&is_idx)); 3108 PetscCall(VecGetArray(x,&xarray)); 3109 PetscCall(VecGetArray(cmap,&cmaparray)); 3110 PetscCall(PetscMalloc1(ncols,&idx)); 3111 for (i=0; i<ncols; i++) { 3112 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3113 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3114 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3115 } 3116 PetscCall(VecRestoreArray(x,&xarray)); 3117 PetscCall(VecRestoreArray(cmap,&cmaparray)); 3118 PetscCall(ISRestoreIndices(iscol,&is_idx)); 3119 3120 /* Get iscol_d */ 3121 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 3122 PetscCall(ISGetBlockSize(iscol,&i)); 3123 PetscCall(ISSetBlockSize(*iscol_d,i)); 3124 3125 /* Get isrow_d */ 3126 PetscCall(ISGetLocalSize(isrow,&m)); 3127 rstart = mat->rmap->rstart; 3128 PetscCall(PetscMalloc1(m,&idx)); 3129 PetscCall(ISGetIndices(isrow,&is_idx)); 3130 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3131 PetscCall(ISRestoreIndices(isrow,&is_idx)); 3132 3133 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 3134 PetscCall(ISGetBlockSize(isrow,&i)); 3135 PetscCall(ISSetBlockSize(*isrow_d,i)); 3136 3137 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3138 PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3139 PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3140 3141 PetscCall(VecDuplicate(lvec,&lcmap)); 3142 3143 PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3144 PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3145 3146 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3147 /* off-process column indices */ 3148 count = 0; 3149 PetscCall(PetscMalloc1(Bn,&idx)); 3150 PetscCall(PetscMalloc1(Bn,&cmap1)); 3151 3152 PetscCall(VecGetArray(lvec,&xarray)); 3153 PetscCall(VecGetArray(lcmap,&cmaparray)); 3154 for (i=0; i<Bn; i++) { 3155 if (PetscRealPart(xarray[i]) > -1.0) { 3156 idx[count] = i; /* local column index in off-diagonal part B */ 3157 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3158 count++; 3159 } 3160 } 3161 PetscCall(VecRestoreArray(lvec,&xarray)); 3162 PetscCall(VecRestoreArray(lcmap,&cmaparray)); 3163 3164 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3165 /* cannot ensure iscol_o has same blocksize as iscol! */ 3166 3167 PetscCall(PetscFree(idx)); 3168 *garray = cmap1; 3169 3170 PetscCall(VecDestroy(&x)); 3171 PetscCall(VecDestroy(&cmap)); 3172 PetscCall(VecDestroy(&lcmap)); 3173 PetscFunctionReturn(0); 3174 } 3175 3176 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3177 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3178 { 3179 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3180 Mat M = NULL; 3181 MPI_Comm comm; 3182 IS iscol_d,isrow_d,iscol_o; 3183 Mat Asub = NULL,Bsub = NULL; 3184 PetscInt n; 3185 3186 PetscFunctionBegin; 3187 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3188 3189 if (call == MAT_REUSE_MATRIX) { 3190 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3191 PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 3192 PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3193 3194 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 3195 PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3196 3197 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 3198 PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3199 3200 /* Update diagonal and off-diagonal portions of submat */ 3201 asub = (Mat_MPIAIJ*)(*submat)->data; 3202 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 3203 PetscCall(ISGetLocalSize(iscol_o,&n)); 3204 if (n) { 3205 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 3206 } 3207 PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 3208 PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 3209 3210 } else { /* call == MAT_INITIAL_MATRIX) */ 3211 const PetscInt *garray; 3212 PetscInt BsubN; 3213 3214 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3215 PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 3216 3217 /* Create local submatrices Asub and Bsub */ 3218 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 3219 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 3220 3221 /* Create submatrix M */ 3222 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 3223 3224 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3225 asub = (Mat_MPIAIJ*)M->data; 3226 3227 PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3228 n = asub->B->cmap->N; 3229 if (BsubN > n) { 3230 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3231 const PetscInt *idx; 3232 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3233 PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 3234 3235 PetscCall(PetscMalloc1(n,&idx_new)); 3236 j = 0; 3237 PetscCall(ISGetIndices(iscol_o,&idx)); 3238 for (i=0; i<n; i++) { 3239 if (j >= BsubN) break; 3240 while (subgarray[i] > garray[j]) j++; 3241 3242 if (subgarray[i] == garray[j]) { 3243 idx_new[i] = idx[j++]; 3244 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3245 } 3246 PetscCall(ISRestoreIndices(iscol_o,&idx)); 3247 3248 PetscCall(ISDestroy(&iscol_o)); 3249 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 3250 3251 } else if (BsubN < n) { 3252 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3253 } 3254 3255 PetscCall(PetscFree(garray)); 3256 *submat = M; 3257 3258 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3259 PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 3260 PetscCall(ISDestroy(&isrow_d)); 3261 3262 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 3263 PetscCall(ISDestroy(&iscol_d)); 3264 3265 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 3266 PetscCall(ISDestroy(&iscol_o)); 3267 } 3268 PetscFunctionReturn(0); 3269 } 3270 3271 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3272 { 3273 IS iscol_local=NULL,isrow_d; 3274 PetscInt csize; 3275 PetscInt n,i,j,start,end; 3276 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3277 MPI_Comm comm; 3278 3279 PetscFunctionBegin; 3280 /* If isrow has same processor distribution as mat, 3281 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3282 if (call == MAT_REUSE_MATRIX) { 3283 PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3284 if (isrow_d) { 3285 sameRowDist = PETSC_TRUE; 3286 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3287 } else { 3288 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3289 if (iscol_local) { 3290 sameRowDist = PETSC_TRUE; 3291 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3292 } 3293 } 3294 } else { 3295 /* Check if isrow has same processor distribution as mat */ 3296 sameDist[0] = PETSC_FALSE; 3297 PetscCall(ISGetLocalSize(isrow,&n)); 3298 if (!n) { 3299 sameDist[0] = PETSC_TRUE; 3300 } else { 3301 PetscCall(ISGetMinMax(isrow,&i,&j)); 3302 PetscCall(MatGetOwnershipRange(mat,&start,&end)); 3303 if (i >= start && j < end) { 3304 sameDist[0] = PETSC_TRUE; 3305 } 3306 } 3307 3308 /* Check if iscol has same processor distribution as mat */ 3309 sameDist[1] = PETSC_FALSE; 3310 PetscCall(ISGetLocalSize(iscol,&n)); 3311 if (!n) { 3312 sameDist[1] = PETSC_TRUE; 3313 } else { 3314 PetscCall(ISGetMinMax(iscol,&i,&j)); 3315 PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 3316 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3317 } 3318 3319 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3320 PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 3321 sameRowDist = tsameDist[0]; 3322 } 3323 3324 if (sameRowDist) { 3325 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3326 /* isrow and iscol have same processor distribution as mat */ 3327 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 3328 PetscFunctionReturn(0); 3329 } else { /* sameRowDist */ 3330 /* isrow has same processor distribution as mat */ 3331 if (call == MAT_INITIAL_MATRIX) { 3332 PetscBool sorted; 3333 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3334 PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 3335 PetscCall(ISGetSize(iscol,&i)); 3336 PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3337 3338 PetscCall(ISSorted(iscol_local,&sorted)); 3339 if (sorted) { 3340 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3341 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 3342 PetscFunctionReturn(0); 3343 } 3344 } else { /* call == MAT_REUSE_MATRIX */ 3345 IS iscol_sub; 3346 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3347 if (iscol_sub) { 3348 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 3349 PetscFunctionReturn(0); 3350 } 3351 } 3352 } 3353 } 3354 3355 /* General case: iscol -> iscol_local which has global size of iscol */ 3356 if (call == MAT_REUSE_MATRIX) { 3357 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 3358 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3359 } else { 3360 if (!iscol_local) { 3361 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3362 } 3363 } 3364 3365 PetscCall(ISGetLocalSize(iscol,&csize)); 3366 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 3367 3368 if (call == MAT_INITIAL_MATRIX) { 3369 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 3370 PetscCall(ISDestroy(&iscol_local)); 3371 } 3372 PetscFunctionReturn(0); 3373 } 3374 3375 /*@C 3376 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3377 and "off-diagonal" part of the matrix in CSR format. 3378 3379 Collective 3380 3381 Input Parameters: 3382 + comm - MPI communicator 3383 . A - "diagonal" portion of matrix 3384 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3385 - garray - global index of B columns 3386 3387 Output Parameter: 3388 . mat - the matrix, with input A as its local diagonal matrix 3389 Level: advanced 3390 3391 Notes: 3392 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3393 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3394 3395 .seealso: MatCreateMPIAIJWithSplitArrays() 3396 @*/ 3397 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3398 { 3399 Mat_MPIAIJ *maij; 3400 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3401 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3402 const PetscScalar *oa; 3403 Mat Bnew; 3404 PetscInt m,n,N; 3405 3406 PetscFunctionBegin; 3407 PetscCall(MatCreate(comm,mat)); 3408 PetscCall(MatGetSize(A,&m,&n)); 3409 PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3410 PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3411 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3412 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3413 3414 /* Get global columns of mat */ 3415 PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3416 3417 PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 3418 PetscCall(MatSetType(*mat,MATMPIAIJ)); 3419 PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3420 maij = (Mat_MPIAIJ*)(*mat)->data; 3421 3422 (*mat)->preallocated = PETSC_TRUE; 3423 3424 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3425 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3426 3427 /* Set A as diagonal portion of *mat */ 3428 maij->A = A; 3429 3430 nz = oi[m]; 3431 for (i=0; i<nz; i++) { 3432 col = oj[i]; 3433 oj[i] = garray[col]; 3434 } 3435 3436 /* Set Bnew as off-diagonal portion of *mat */ 3437 PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 3438 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 3439 PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3440 bnew = (Mat_SeqAIJ*)Bnew->data; 3441 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3442 maij->B = Bnew; 3443 3444 PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3445 3446 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3447 b->free_a = PETSC_FALSE; 3448 b->free_ij = PETSC_FALSE; 3449 PetscCall(MatDestroy(&B)); 3450 3451 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3452 bnew->free_a = PETSC_TRUE; 3453 bnew->free_ij = PETSC_TRUE; 3454 3455 /* condense columns of maij->B */ 3456 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3457 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3458 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3459 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 3460 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3461 PetscFunctionReturn(0); 3462 } 3463 3464 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3465 3466 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3467 { 3468 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3469 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3470 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3471 Mat M,Msub,B=a->B; 3472 MatScalar *aa; 3473 Mat_SeqAIJ *aij; 3474 PetscInt *garray = a->garray,*colsub,Ncols; 3475 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3476 IS iscol_sub,iscmap; 3477 const PetscInt *is_idx,*cmap; 3478 PetscBool allcolumns=PETSC_FALSE; 3479 MPI_Comm comm; 3480 3481 PetscFunctionBegin; 3482 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3483 if (call == MAT_REUSE_MATRIX) { 3484 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3485 PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3486 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3487 3488 PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 3489 PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3490 3491 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 3492 PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3493 3494 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3495 3496 } else { /* call == MAT_INITIAL_MATRIX) */ 3497 PetscBool flg; 3498 3499 PetscCall(ISGetLocalSize(iscol,&n)); 3500 PetscCall(ISGetSize(iscol,&Ncols)); 3501 3502 /* (1) iscol -> nonscalable iscol_local */ 3503 /* Check for special case: each processor gets entire matrix columns */ 3504 PetscCall(ISIdentity(iscol_local,&flg)); 3505 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3506 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3507 if (allcolumns) { 3508 iscol_sub = iscol_local; 3509 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3510 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3511 3512 } else { 3513 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3514 PetscInt *idx,*cmap1,k; 3515 PetscCall(PetscMalloc1(Ncols,&idx)); 3516 PetscCall(PetscMalloc1(Ncols,&cmap1)); 3517 PetscCall(ISGetIndices(iscol_local,&is_idx)); 3518 count = 0; 3519 k = 0; 3520 for (i=0; i<Ncols; i++) { 3521 j = is_idx[i]; 3522 if (j >= cstart && j < cend) { 3523 /* diagonal part of mat */ 3524 idx[count] = j; 3525 cmap1[count++] = i; /* column index in submat */ 3526 } else if (Bn) { 3527 /* off-diagonal part of mat */ 3528 if (j == garray[k]) { 3529 idx[count] = j; 3530 cmap1[count++] = i; /* column index in submat */ 3531 } else if (j > garray[k]) { 3532 while (j > garray[k] && k < Bn-1) k++; 3533 if (j == garray[k]) { 3534 idx[count] = j; 3535 cmap1[count++] = i; /* column index in submat */ 3536 } 3537 } 3538 } 3539 } 3540 PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 3541 3542 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 3543 PetscCall(ISGetBlockSize(iscol,&cbs)); 3544 PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3545 3546 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3547 } 3548 3549 /* (3) Create sequential Msub */ 3550 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3551 } 3552 3553 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3554 aij = (Mat_SeqAIJ*)(Msub)->data; 3555 ii = aij->i; 3556 PetscCall(ISGetIndices(iscmap,&cmap)); 3557 3558 /* 3559 m - number of local rows 3560 Ncols - number of columns (same on all processors) 3561 rstart - first row in new global matrix generated 3562 */ 3563 PetscCall(MatGetSize(Msub,&m,NULL)); 3564 3565 if (call == MAT_INITIAL_MATRIX) { 3566 /* (4) Create parallel newmat */ 3567 PetscMPIInt rank,size; 3568 PetscInt csize; 3569 3570 PetscCallMPI(MPI_Comm_size(comm,&size)); 3571 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3572 3573 /* 3574 Determine the number of non-zeros in the diagonal and off-diagonal 3575 portions of the matrix in order to do correct preallocation 3576 */ 3577 3578 /* first get start and end of "diagonal" columns */ 3579 PetscCall(ISGetLocalSize(iscol,&csize)); 3580 if (csize == PETSC_DECIDE) { 3581 PetscCall(ISGetSize(isrow,&mglobal)); 3582 if (mglobal == Ncols) { /* square matrix */ 3583 nlocal = m; 3584 } else { 3585 nlocal = Ncols/size + ((Ncols % size) > rank); 3586 } 3587 } else { 3588 nlocal = csize; 3589 } 3590 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3591 rstart = rend - nlocal; 3592 PetscCheckFalse(rank == size - 1 && rend != Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3593 3594 /* next, compute all the lengths */ 3595 jj = aij->j; 3596 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3597 olens = dlens + m; 3598 for (i=0; i<m; i++) { 3599 jend = ii[i+1] - ii[i]; 3600 olen = 0; 3601 dlen = 0; 3602 for (j=0; j<jend; j++) { 3603 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3604 else dlen++; 3605 jj++; 3606 } 3607 olens[i] = olen; 3608 dlens[i] = dlen; 3609 } 3610 3611 PetscCall(ISGetBlockSize(isrow,&bs)); 3612 PetscCall(ISGetBlockSize(iscol,&cbs)); 3613 3614 PetscCall(MatCreate(comm,&M)); 3615 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 3616 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3617 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3618 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3619 PetscCall(PetscFree(dlens)); 3620 3621 } else { /* call == MAT_REUSE_MATRIX */ 3622 M = *newmat; 3623 PetscCall(MatGetLocalSize(M,&i,NULL)); 3624 PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3625 PetscCall(MatZeroEntries(M)); 3626 /* 3627 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3628 rather than the slower MatSetValues(). 3629 */ 3630 M->was_assembled = PETSC_TRUE; 3631 M->assembled = PETSC_FALSE; 3632 } 3633 3634 /* (5) Set values of Msub to *newmat */ 3635 PetscCall(PetscMalloc1(count,&colsub)); 3636 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 3637 3638 jj = aij->j; 3639 PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3640 for (i=0; i<m; i++) { 3641 row = rstart + i; 3642 nz = ii[i+1] - ii[i]; 3643 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3644 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 3645 jj += nz; aa += nz; 3646 } 3647 PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 3648 PetscCall(ISRestoreIndices(iscmap,&cmap)); 3649 3650 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3651 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3652 3653 PetscCall(PetscFree(colsub)); 3654 3655 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3656 if (call == MAT_INITIAL_MATRIX) { 3657 *newmat = M; 3658 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 3659 PetscCall(MatDestroy(&Msub)); 3660 3661 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 3662 PetscCall(ISDestroy(&iscol_sub)); 3663 3664 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 3665 PetscCall(ISDestroy(&iscmap)); 3666 3667 if (iscol_local) { 3668 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 3669 PetscCall(ISDestroy(&iscol_local)); 3670 } 3671 } 3672 PetscFunctionReturn(0); 3673 } 3674 3675 /* 3676 Not great since it makes two copies of the submatrix, first an SeqAIJ 3677 in local and then by concatenating the local matrices the end result. 3678 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3679 3680 Note: This requires a sequential iscol with all indices. 3681 */ 3682 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3683 { 3684 PetscMPIInt rank,size; 3685 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3686 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3687 Mat M,Mreuse; 3688 MatScalar *aa,*vwork; 3689 MPI_Comm comm; 3690 Mat_SeqAIJ *aij; 3691 PetscBool colflag,allcolumns=PETSC_FALSE; 3692 3693 PetscFunctionBegin; 3694 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3695 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3696 PetscCallMPI(MPI_Comm_size(comm,&size)); 3697 3698 /* Check for special case: each processor gets entire matrix columns */ 3699 PetscCall(ISIdentity(iscol,&colflag)); 3700 PetscCall(ISGetLocalSize(iscol,&n)); 3701 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3702 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3703 3704 if (call == MAT_REUSE_MATRIX) { 3705 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 3706 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3707 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3708 } else { 3709 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3710 } 3711 3712 /* 3713 m - number of local rows 3714 n - number of columns (same on all processors) 3715 rstart - first row in new global matrix generated 3716 */ 3717 PetscCall(MatGetSize(Mreuse,&m,&n)); 3718 PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3719 if (call == MAT_INITIAL_MATRIX) { 3720 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3721 ii = aij->i; 3722 jj = aij->j; 3723 3724 /* 3725 Determine the number of non-zeros in the diagonal and off-diagonal 3726 portions of the matrix in order to do correct preallocation 3727 */ 3728 3729 /* first get start and end of "diagonal" columns */ 3730 if (csize == PETSC_DECIDE) { 3731 PetscCall(ISGetSize(isrow,&mglobal)); 3732 if (mglobal == n) { /* square matrix */ 3733 nlocal = m; 3734 } else { 3735 nlocal = n/size + ((n % size) > rank); 3736 } 3737 } else { 3738 nlocal = csize; 3739 } 3740 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3741 rstart = rend - nlocal; 3742 PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3743 3744 /* next, compute all the lengths */ 3745 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3746 olens = dlens + m; 3747 for (i=0; i<m; i++) { 3748 jend = ii[i+1] - ii[i]; 3749 olen = 0; 3750 dlen = 0; 3751 for (j=0; j<jend; j++) { 3752 if (*jj < rstart || *jj >= rend) olen++; 3753 else dlen++; 3754 jj++; 3755 } 3756 olens[i] = olen; 3757 dlens[i] = dlen; 3758 } 3759 PetscCall(MatCreate(comm,&M)); 3760 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 3761 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3762 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3763 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3764 PetscCall(PetscFree(dlens)); 3765 } else { 3766 PetscInt ml,nl; 3767 3768 M = *newmat; 3769 PetscCall(MatGetLocalSize(M,&ml,&nl)); 3770 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3771 PetscCall(MatZeroEntries(M)); 3772 /* 3773 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3774 rather than the slower MatSetValues(). 3775 */ 3776 M->was_assembled = PETSC_TRUE; 3777 M->assembled = PETSC_FALSE; 3778 } 3779 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3780 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3781 ii = aij->i; 3782 jj = aij->j; 3783 3784 /* trigger copy to CPU if needed */ 3785 PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3786 for (i=0; i<m; i++) { 3787 row = rstart + i; 3788 nz = ii[i+1] - ii[i]; 3789 cwork = jj; jj += nz; 3790 vwork = aa; aa += nz; 3791 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3792 } 3793 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3794 3795 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3796 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3797 *newmat = M; 3798 3799 /* save submatrix used in processor for next request */ 3800 if (call == MAT_INITIAL_MATRIX) { 3801 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 3802 PetscCall(MatDestroy(&Mreuse)); 3803 } 3804 PetscFunctionReturn(0); 3805 } 3806 3807 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3808 { 3809 PetscInt m,cstart, cend,j,nnz,i,d; 3810 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3811 const PetscInt *JJ; 3812 PetscBool nooffprocentries; 3813 3814 PetscFunctionBegin; 3815 PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3816 3817 PetscCall(PetscLayoutSetUp(B->rmap)); 3818 PetscCall(PetscLayoutSetUp(B->cmap)); 3819 m = B->rmap->n; 3820 cstart = B->cmap->rstart; 3821 cend = B->cmap->rend; 3822 rstart = B->rmap->rstart; 3823 3824 PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3825 3826 if (PetscDefined(USE_DEBUG)) { 3827 for (i=0; i<m; i++) { 3828 nnz = Ii[i+1]- Ii[i]; 3829 JJ = J + Ii[i]; 3830 PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3831 PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3832 PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3833 } 3834 } 3835 3836 for (i=0; i<m; i++) { 3837 nnz = Ii[i+1]- Ii[i]; 3838 JJ = J + Ii[i]; 3839 nnz_max = PetscMax(nnz_max,nnz); 3840 d = 0; 3841 for (j=0; j<nnz; j++) { 3842 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3843 } 3844 d_nnz[i] = d; 3845 o_nnz[i] = nnz - d; 3846 } 3847 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 3848 PetscCall(PetscFree2(d_nnz,o_nnz)); 3849 3850 for (i=0; i<m; i++) { 3851 ii = i + rstart; 3852 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3853 } 3854 nooffprocentries = B->nooffprocentries; 3855 B->nooffprocentries = PETSC_TRUE; 3856 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3857 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3858 B->nooffprocentries = nooffprocentries; 3859 3860 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3861 PetscFunctionReturn(0); 3862 } 3863 3864 /*@ 3865 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3866 (the default parallel PETSc format). 3867 3868 Collective 3869 3870 Input Parameters: 3871 + B - the matrix 3872 . i - the indices into j for the start of each local row (starts with zero) 3873 . j - the column indices for each local row (starts with zero) 3874 - v - optional values in the matrix 3875 3876 Level: developer 3877 3878 Notes: 3879 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3880 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3881 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3882 3883 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3884 3885 The format which is used for the sparse matrix input, is equivalent to a 3886 row-major ordering.. i.e for the following matrix, the input data expected is 3887 as shown 3888 3889 $ 1 0 0 3890 $ 2 0 3 P0 3891 $ ------- 3892 $ 4 5 6 P1 3893 $ 3894 $ Process0 [P0]: rows_owned=[0,1] 3895 $ i = {0,1,3} [size = nrow+1 = 2+1] 3896 $ j = {0,0,2} [size = 3] 3897 $ v = {1,2,3} [size = 3] 3898 $ 3899 $ Process1 [P1]: rows_owned=[2] 3900 $ i = {0,3} [size = nrow+1 = 1+1] 3901 $ j = {0,1,2} [size = 3] 3902 $ v = {4,5,6} [size = 3] 3903 3904 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3905 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3906 @*/ 3907 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3908 { 3909 PetscFunctionBegin; 3910 PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3911 PetscFunctionReturn(0); 3912 } 3913 3914 /*@C 3915 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3916 (the default parallel PETSc format). For good matrix assembly performance 3917 the user should preallocate the matrix storage by setting the parameters 3918 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3919 performance can be increased by more than a factor of 50. 3920 3921 Collective 3922 3923 Input Parameters: 3924 + B - the matrix 3925 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3926 (same value is used for all local rows) 3927 . d_nnz - array containing the number of nonzeros in the various rows of the 3928 DIAGONAL portion of the local submatrix (possibly different for each row) 3929 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3930 The size of this array is equal to the number of local rows, i.e 'm'. 3931 For matrices that will be factored, you must leave room for (and set) 3932 the diagonal entry even if it is zero. 3933 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3934 submatrix (same value is used for all local rows). 3935 - o_nnz - array containing the number of nonzeros in the various rows of the 3936 OFF-DIAGONAL portion of the local submatrix (possibly different for 3937 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3938 structure. The size of this array is equal to the number 3939 of local rows, i.e 'm'. 3940 3941 If the *_nnz parameter is given then the *_nz parameter is ignored 3942 3943 The AIJ format (also called the Yale sparse matrix format or 3944 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3945 storage. The stored row and column indices begin with zero. 3946 See Users-Manual: ch_mat for details. 3947 3948 The parallel matrix is partitioned such that the first m0 rows belong to 3949 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3950 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3951 3952 The DIAGONAL portion of the local submatrix of a processor can be defined 3953 as the submatrix which is obtained by extraction the part corresponding to 3954 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3955 first row that belongs to the processor, r2 is the last row belonging to 3956 the this processor, and c1-c2 is range of indices of the local part of a 3957 vector suitable for applying the matrix to. This is an mxn matrix. In the 3958 common case of a square matrix, the row and column ranges are the same and 3959 the DIAGONAL part is also square. The remaining portion of the local 3960 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3961 3962 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3963 3964 You can call MatGetInfo() to get information on how effective the preallocation was; 3965 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3966 You can also run with the option -info and look for messages with the string 3967 malloc in them to see if additional memory allocation was needed. 3968 3969 Example usage: 3970 3971 Consider the following 8x8 matrix with 34 non-zero values, that is 3972 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3973 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3974 as follows: 3975 3976 .vb 3977 1 2 0 | 0 3 0 | 0 4 3978 Proc0 0 5 6 | 7 0 0 | 8 0 3979 9 0 10 | 11 0 0 | 12 0 3980 ------------------------------------- 3981 13 0 14 | 15 16 17 | 0 0 3982 Proc1 0 18 0 | 19 20 21 | 0 0 3983 0 0 0 | 22 23 0 | 24 0 3984 ------------------------------------- 3985 Proc2 25 26 27 | 0 0 28 | 29 0 3986 30 0 0 | 31 32 33 | 0 34 3987 .ve 3988 3989 This can be represented as a collection of submatrices as: 3990 3991 .vb 3992 A B C 3993 D E F 3994 G H I 3995 .ve 3996 3997 Where the submatrices A,B,C are owned by proc0, D,E,F are 3998 owned by proc1, G,H,I are owned by proc2. 3999 4000 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4001 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4002 The 'M','N' parameters are 8,8, and have the same values on all procs. 4003 4004 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4005 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4006 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4007 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4008 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4009 matrix, ans [DF] as another SeqAIJ matrix. 4010 4011 When d_nz, o_nz parameters are specified, d_nz storage elements are 4012 allocated for every row of the local diagonal submatrix, and o_nz 4013 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4014 One way to choose d_nz and o_nz is to use the max nonzerors per local 4015 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4016 In this case, the values of d_nz,o_nz are: 4017 .vb 4018 proc0 : dnz = 2, o_nz = 2 4019 proc1 : dnz = 3, o_nz = 2 4020 proc2 : dnz = 1, o_nz = 4 4021 .ve 4022 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4023 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4024 for proc3. i.e we are using 12+15+10=37 storage locations to store 4025 34 values. 4026 4027 When d_nnz, o_nnz parameters are specified, the storage is specified 4028 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4029 In the above case the values for d_nnz,o_nnz are: 4030 .vb 4031 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4032 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4033 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4034 .ve 4035 Here the space allocated is sum of all the above values i.e 34, and 4036 hence pre-allocation is perfect. 4037 4038 Level: intermediate 4039 4040 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4041 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4042 @*/ 4043 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4044 { 4045 PetscFunctionBegin; 4046 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4047 PetscValidType(B,1); 4048 PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4049 PetscFunctionReturn(0); 4050 } 4051 4052 /*@ 4053 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4054 CSR format for the local rows. 4055 4056 Collective 4057 4058 Input Parameters: 4059 + comm - MPI communicator 4060 . m - number of local rows (Cannot be PETSC_DECIDE) 4061 . n - This value should be the same as the local size used in creating the 4062 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4063 calculated if N is given) For square matrices n is almost always m. 4064 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4065 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4066 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4067 . j - column indices 4068 - a - matrix values 4069 4070 Output Parameter: 4071 . mat - the matrix 4072 4073 Level: intermediate 4074 4075 Notes: 4076 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4077 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4078 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4079 4080 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4081 4082 The format which is used for the sparse matrix input, is equivalent to a 4083 row-major ordering.. i.e for the following matrix, the input data expected is 4084 as shown 4085 4086 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4087 4088 $ 1 0 0 4089 $ 2 0 3 P0 4090 $ ------- 4091 $ 4 5 6 P1 4092 $ 4093 $ Process0 [P0]: rows_owned=[0,1] 4094 $ i = {0,1,3} [size = nrow+1 = 2+1] 4095 $ j = {0,0,2} [size = 3] 4096 $ v = {1,2,3} [size = 3] 4097 $ 4098 $ Process1 [P1]: rows_owned=[2] 4099 $ i = {0,3} [size = nrow+1 = 1+1] 4100 $ j = {0,1,2} [size = 3] 4101 $ v = {4,5,6} [size = 3] 4102 4103 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4104 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4105 @*/ 4106 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4107 { 4108 PetscFunctionBegin; 4109 PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4110 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4111 PetscCall(MatCreate(comm,mat)); 4112 PetscCall(MatSetSizes(*mat,m,n,M,N)); 4113 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4114 PetscCall(MatSetType(*mat,MATMPIAIJ)); 4115 PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 4116 PetscFunctionReturn(0); 4117 } 4118 4119 /*@ 4120 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4121 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4122 4123 Collective 4124 4125 Input Parameters: 4126 + mat - the matrix 4127 . m - number of local rows (Cannot be PETSC_DECIDE) 4128 . n - This value should be the same as the local size used in creating the 4129 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4130 calculated if N is given) For square matrices n is almost always m. 4131 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4132 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4133 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4134 . J - column indices 4135 - v - matrix values 4136 4137 Level: intermediate 4138 4139 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4140 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4141 @*/ 4142 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4143 { 4144 PetscInt cstart,nnz,i,j; 4145 PetscInt *ld; 4146 PetscBool nooffprocentries; 4147 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4148 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4149 PetscScalar *ad,*ao; 4150 const PetscInt *Adi = Ad->i; 4151 PetscInt ldi,Iii,md; 4152 4153 PetscFunctionBegin; 4154 PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4155 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4156 PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4157 PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4158 4159 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4160 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4161 cstart = mat->cmap->rstart; 4162 if (!Aij->ld) { 4163 /* count number of entries below block diagonal */ 4164 PetscCall(PetscCalloc1(m,&ld)); 4165 Aij->ld = ld; 4166 for (i=0; i<m; i++) { 4167 nnz = Ii[i+1]- Ii[i]; 4168 j = 0; 4169 while (J[j] < cstart && j < nnz) {j++;} 4170 J += nnz; 4171 ld[i] = j; 4172 } 4173 } else { 4174 ld = Aij->ld; 4175 } 4176 4177 for (i=0; i<m; i++) { 4178 nnz = Ii[i+1]- Ii[i]; 4179 Iii = Ii[i]; 4180 ldi = ld[i]; 4181 md = Adi[i+1]-Adi[i]; 4182 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4183 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4184 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4185 ad += md; 4186 ao += nnz - md; 4187 } 4188 nooffprocentries = mat->nooffprocentries; 4189 mat->nooffprocentries = PETSC_TRUE; 4190 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4191 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4192 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4193 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4194 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4195 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4196 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4197 mat->nooffprocentries = nooffprocentries; 4198 PetscFunctionReturn(0); 4199 } 4200 4201 /*@C 4202 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4203 (the default parallel PETSc format). For good matrix assembly performance 4204 the user should preallocate the matrix storage by setting the parameters 4205 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4206 performance can be increased by more than a factor of 50. 4207 4208 Collective 4209 4210 Input Parameters: 4211 + comm - MPI communicator 4212 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4213 This value should be the same as the local size used in creating the 4214 y vector for the matrix-vector product y = Ax. 4215 . n - This value should be the same as the local size used in creating the 4216 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4217 calculated if N is given) For square matrices n is almost always m. 4218 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4219 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4220 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4221 (same value is used for all local rows) 4222 . d_nnz - array containing the number of nonzeros in the various rows of the 4223 DIAGONAL portion of the local submatrix (possibly different for each row) 4224 or NULL, if d_nz is used to specify the nonzero structure. 4225 The size of this array is equal to the number of local rows, i.e 'm'. 4226 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4227 submatrix (same value is used for all local rows). 4228 - o_nnz - array containing the number of nonzeros in the various rows of the 4229 OFF-DIAGONAL portion of the local submatrix (possibly different for 4230 each row) or NULL, if o_nz is used to specify the nonzero 4231 structure. The size of this array is equal to the number 4232 of local rows, i.e 'm'. 4233 4234 Output Parameter: 4235 . A - the matrix 4236 4237 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4238 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4239 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4240 4241 Notes: 4242 If the *_nnz parameter is given then the *_nz parameter is ignored 4243 4244 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4245 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4246 storage requirements for this matrix. 4247 4248 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4249 processor than it must be used on all processors that share the object for 4250 that argument. 4251 4252 The user MUST specify either the local or global matrix dimensions 4253 (possibly both). 4254 4255 The parallel matrix is partitioned across processors such that the 4256 first m0 rows belong to process 0, the next m1 rows belong to 4257 process 1, the next m2 rows belong to process 2 etc.. where 4258 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4259 values corresponding to [m x N] submatrix. 4260 4261 The columns are logically partitioned with the n0 columns belonging 4262 to 0th partition, the next n1 columns belonging to the next 4263 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4264 4265 The DIAGONAL portion of the local submatrix on any given processor 4266 is the submatrix corresponding to the rows and columns m,n 4267 corresponding to the given processor. i.e diagonal matrix on 4268 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4269 etc. The remaining portion of the local submatrix [m x (N-n)] 4270 constitute the OFF-DIAGONAL portion. The example below better 4271 illustrates this concept. 4272 4273 For a square global matrix we define each processor's diagonal portion 4274 to be its local rows and the corresponding columns (a square submatrix); 4275 each processor's off-diagonal portion encompasses the remainder of the 4276 local matrix (a rectangular submatrix). 4277 4278 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4279 4280 When calling this routine with a single process communicator, a matrix of 4281 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4282 type of communicator, use the construction mechanism 4283 .vb 4284 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4285 .ve 4286 4287 $ MatCreate(...,&A); 4288 $ MatSetType(A,MATMPIAIJ); 4289 $ MatSetSizes(A, m,n,M,N); 4290 $ MatMPIAIJSetPreallocation(A,...); 4291 4292 By default, this format uses inodes (identical nodes) when possible. 4293 We search for consecutive rows with the same nonzero structure, thereby 4294 reusing matrix information to achieve increased efficiency. 4295 4296 Options Database Keys: 4297 + -mat_no_inode - Do not use inodes 4298 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4299 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4300 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4301 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4302 4303 Example usage: 4304 4305 Consider the following 8x8 matrix with 34 non-zero values, that is 4306 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4307 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4308 as follows 4309 4310 .vb 4311 1 2 0 | 0 3 0 | 0 4 4312 Proc0 0 5 6 | 7 0 0 | 8 0 4313 9 0 10 | 11 0 0 | 12 0 4314 ------------------------------------- 4315 13 0 14 | 15 16 17 | 0 0 4316 Proc1 0 18 0 | 19 20 21 | 0 0 4317 0 0 0 | 22 23 0 | 24 0 4318 ------------------------------------- 4319 Proc2 25 26 27 | 0 0 28 | 29 0 4320 30 0 0 | 31 32 33 | 0 34 4321 .ve 4322 4323 This can be represented as a collection of submatrices as 4324 4325 .vb 4326 A B C 4327 D E F 4328 G H I 4329 .ve 4330 4331 Where the submatrices A,B,C are owned by proc0, D,E,F are 4332 owned by proc1, G,H,I are owned by proc2. 4333 4334 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4335 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4336 The 'M','N' parameters are 8,8, and have the same values on all procs. 4337 4338 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4339 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4340 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4341 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4342 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4343 matrix, ans [DF] as another SeqAIJ matrix. 4344 4345 When d_nz, o_nz parameters are specified, d_nz storage elements are 4346 allocated for every row of the local diagonal submatrix, and o_nz 4347 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4348 One way to choose d_nz and o_nz is to use the max nonzerors per local 4349 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4350 In this case, the values of d_nz,o_nz are 4351 .vb 4352 proc0 : dnz = 2, o_nz = 2 4353 proc1 : dnz = 3, o_nz = 2 4354 proc2 : dnz = 1, o_nz = 4 4355 .ve 4356 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4357 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4358 for proc3. i.e we are using 12+15+10=37 storage locations to store 4359 34 values. 4360 4361 When d_nnz, o_nnz parameters are specified, the storage is specified 4362 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4363 In the above case the values for d_nnz,o_nnz are 4364 .vb 4365 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4366 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4367 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4368 .ve 4369 Here the space allocated is sum of all the above values i.e 34, and 4370 hence pre-allocation is perfect. 4371 4372 Level: intermediate 4373 4374 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4375 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4376 @*/ 4377 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4378 { 4379 PetscMPIInt size; 4380 4381 PetscFunctionBegin; 4382 PetscCall(MatCreate(comm,A)); 4383 PetscCall(MatSetSizes(*A,m,n,M,N)); 4384 PetscCallMPI(MPI_Comm_size(comm,&size)); 4385 if (size > 1) { 4386 PetscCall(MatSetType(*A,MATMPIAIJ)); 4387 PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4388 } else { 4389 PetscCall(MatSetType(*A,MATSEQAIJ)); 4390 PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4391 } 4392 PetscFunctionReturn(0); 4393 } 4394 4395 /*@C 4396 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4397 4398 Not collective 4399 4400 Input Parameter: 4401 . A - The MPIAIJ matrix 4402 4403 Output Parameters: 4404 + Ad - The local diagonal block as a SeqAIJ matrix 4405 . Ao - The local off-diagonal block as a SeqAIJ matrix 4406 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4407 4408 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4409 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4410 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4411 local column numbers to global column numbers in the original matrix. 4412 4413 Level: intermediate 4414 4415 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4416 @*/ 4417 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4418 { 4419 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4420 PetscBool flg; 4421 4422 PetscFunctionBegin; 4423 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 4424 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4425 if (Ad) *Ad = a->A; 4426 if (Ao) *Ao = a->B; 4427 if (colmap) *colmap = a->garray; 4428 PetscFunctionReturn(0); 4429 } 4430 4431 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4432 { 4433 PetscInt m,N,i,rstart,nnz,Ii; 4434 PetscInt *indx; 4435 PetscScalar *values; 4436 MatType rootType; 4437 4438 PetscFunctionBegin; 4439 PetscCall(MatGetSize(inmat,&m,&N)); 4440 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4441 PetscInt *dnz,*onz,sum,bs,cbs; 4442 4443 if (n == PETSC_DECIDE) { 4444 PetscCall(PetscSplitOwnership(comm,&n,&N)); 4445 } 4446 /* Check sum(n) = N */ 4447 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 4448 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4449 4450 PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 4451 rstart -= m; 4452 4453 MatPreallocateBegin(comm,m,n,dnz,onz); 4454 for (i=0; i<m; i++) { 4455 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4456 PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 4457 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4458 } 4459 4460 PetscCall(MatCreate(comm,outmat)); 4461 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4462 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 4463 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 4464 PetscCall(MatGetRootType_Private(inmat,&rootType)); 4465 PetscCall(MatSetType(*outmat,rootType)); 4466 PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 4467 PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4468 MatPreallocateEnd(dnz,onz); 4469 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 4470 } 4471 4472 /* numeric phase */ 4473 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 4474 for (i=0; i<m; i++) { 4475 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4476 Ii = i + rstart; 4477 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 4478 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4479 } 4480 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 4481 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4482 PetscFunctionReturn(0); 4483 } 4484 4485 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4486 { 4487 PetscMPIInt rank; 4488 PetscInt m,N,i,rstart,nnz; 4489 size_t len; 4490 const PetscInt *indx; 4491 PetscViewer out; 4492 char *name; 4493 Mat B; 4494 const PetscScalar *values; 4495 4496 PetscFunctionBegin; 4497 PetscCall(MatGetLocalSize(A,&m,NULL)); 4498 PetscCall(MatGetSize(A,NULL,&N)); 4499 /* Should this be the type of the diagonal block of A? */ 4500 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 4501 PetscCall(MatSetSizes(B,m,N,m,N)); 4502 PetscCall(MatSetBlockSizesFromMats(B,A,A)); 4503 PetscCall(MatSetType(B,MATSEQAIJ)); 4504 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 4505 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4506 for (i=0; i<m; i++) { 4507 PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 4508 PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 4509 PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4510 } 4511 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 4512 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4513 4514 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 4515 PetscCall(PetscStrlen(outfile,&len)); 4516 PetscCall(PetscMalloc1(len+6,&name)); 4517 PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 4518 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 4519 PetscCall(PetscFree(name)); 4520 PetscCall(MatView(B,out)); 4521 PetscCall(PetscViewerDestroy(&out)); 4522 PetscCall(MatDestroy(&B)); 4523 PetscFunctionReturn(0); 4524 } 4525 4526 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4527 { 4528 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4529 4530 PetscFunctionBegin; 4531 if (!merge) PetscFunctionReturn(0); 4532 PetscCall(PetscFree(merge->id_r)); 4533 PetscCall(PetscFree(merge->len_s)); 4534 PetscCall(PetscFree(merge->len_r)); 4535 PetscCall(PetscFree(merge->bi)); 4536 PetscCall(PetscFree(merge->bj)); 4537 PetscCall(PetscFree(merge->buf_ri[0])); 4538 PetscCall(PetscFree(merge->buf_ri)); 4539 PetscCall(PetscFree(merge->buf_rj[0])); 4540 PetscCall(PetscFree(merge->buf_rj)); 4541 PetscCall(PetscFree(merge->coi)); 4542 PetscCall(PetscFree(merge->coj)); 4543 PetscCall(PetscFree(merge->owners_co)); 4544 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4545 PetscCall(PetscFree(merge)); 4546 PetscFunctionReturn(0); 4547 } 4548 4549 #include <../src/mat/utils/freespace.h> 4550 #include <petscbt.h> 4551 4552 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4553 { 4554 MPI_Comm comm; 4555 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4556 PetscMPIInt size,rank,taga,*len_s; 4557 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4558 PetscInt proc,m; 4559 PetscInt **buf_ri,**buf_rj; 4560 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4561 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4562 MPI_Request *s_waits,*r_waits; 4563 MPI_Status *status; 4564 const MatScalar *aa,*a_a; 4565 MatScalar **abuf_r,*ba_i; 4566 Mat_Merge_SeqsToMPI *merge; 4567 PetscContainer container; 4568 4569 PetscFunctionBegin; 4570 PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 4571 PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 4572 4573 PetscCallMPI(MPI_Comm_size(comm,&size)); 4574 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4575 4576 PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 4577 PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4578 PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 4579 PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4580 aa = a_a; 4581 4582 bi = merge->bi; 4583 bj = merge->bj; 4584 buf_ri = merge->buf_ri; 4585 buf_rj = merge->buf_rj; 4586 4587 PetscCall(PetscMalloc1(size,&status)); 4588 owners = merge->rowmap->range; 4589 len_s = merge->len_s; 4590 4591 /* send and recv matrix values */ 4592 /*-----------------------------*/ 4593 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 4594 PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 4595 4596 PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 4597 for (proc=0,k=0; proc<size; proc++) { 4598 if (!len_s[proc]) continue; 4599 i = owners[proc]; 4600 PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 4601 k++; 4602 } 4603 4604 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 4605 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 4606 PetscCall(PetscFree(status)); 4607 4608 PetscCall(PetscFree(s_waits)); 4609 PetscCall(PetscFree(r_waits)); 4610 4611 /* insert mat values of mpimat */ 4612 /*----------------------------*/ 4613 PetscCall(PetscMalloc1(N,&ba_i)); 4614 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4615 4616 for (k=0; k<merge->nrecv; k++) { 4617 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4618 nrows = *(buf_ri_k[k]); 4619 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4620 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4621 } 4622 4623 /* set values of ba */ 4624 m = merge->rowmap->n; 4625 for (i=0; i<m; i++) { 4626 arow = owners[rank] + i; 4627 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4628 bnzi = bi[i+1] - bi[i]; 4629 PetscCall(PetscArrayzero(ba_i,bnzi)); 4630 4631 /* add local non-zero vals of this proc's seqmat into ba */ 4632 anzi = ai[arow+1] - ai[arow]; 4633 aj = a->j + ai[arow]; 4634 aa = a_a + ai[arow]; 4635 nextaj = 0; 4636 for (j=0; nextaj<anzi; j++) { 4637 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4638 ba_i[j] += aa[nextaj++]; 4639 } 4640 } 4641 4642 /* add received vals into ba */ 4643 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4644 /* i-th row */ 4645 if (i == *nextrow[k]) { 4646 anzi = *(nextai[k]+1) - *nextai[k]; 4647 aj = buf_rj[k] + *(nextai[k]); 4648 aa = abuf_r[k] + *(nextai[k]); 4649 nextaj = 0; 4650 for (j=0; nextaj<anzi; j++) { 4651 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4652 ba_i[j] += aa[nextaj++]; 4653 } 4654 } 4655 nextrow[k]++; nextai[k]++; 4656 } 4657 } 4658 PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 4659 } 4660 PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 4661 PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 4662 PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 4663 4664 PetscCall(PetscFree(abuf_r[0])); 4665 PetscCall(PetscFree(abuf_r)); 4666 PetscCall(PetscFree(ba_i)); 4667 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4668 PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 4669 PetscFunctionReturn(0); 4670 } 4671 4672 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4673 { 4674 Mat B_mpi; 4675 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4676 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4677 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4678 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4679 PetscInt len,proc,*dnz,*onz,bs,cbs; 4680 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4681 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4682 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4683 MPI_Status *status; 4684 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4685 PetscBT lnkbt; 4686 Mat_Merge_SeqsToMPI *merge; 4687 PetscContainer container; 4688 4689 PetscFunctionBegin; 4690 PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 4691 4692 /* make sure it is a PETSc comm */ 4693 PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 4694 PetscCallMPI(MPI_Comm_size(comm,&size)); 4695 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4696 4697 PetscCall(PetscNew(&merge)); 4698 PetscCall(PetscMalloc1(size,&status)); 4699 4700 /* determine row ownership */ 4701 /*---------------------------------------------------------*/ 4702 PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 4703 PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 4704 PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 4705 PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 4706 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4707 PetscCall(PetscMalloc1(size,&len_si)); 4708 PetscCall(PetscMalloc1(size,&merge->len_s)); 4709 4710 m = merge->rowmap->n; 4711 owners = merge->rowmap->range; 4712 4713 /* determine the number of messages to send, their lengths */ 4714 /*---------------------------------------------------------*/ 4715 len_s = merge->len_s; 4716 4717 len = 0; /* length of buf_si[] */ 4718 merge->nsend = 0; 4719 for (proc=0; proc<size; proc++) { 4720 len_si[proc] = 0; 4721 if (proc == rank) { 4722 len_s[proc] = 0; 4723 } else { 4724 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4725 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4726 } 4727 if (len_s[proc]) { 4728 merge->nsend++; 4729 nrows = 0; 4730 for (i=owners[proc]; i<owners[proc+1]; i++) { 4731 if (ai[i+1] > ai[i]) nrows++; 4732 } 4733 len_si[proc] = 2*(nrows+1); 4734 len += len_si[proc]; 4735 } 4736 } 4737 4738 /* determine the number and length of messages to receive for ij-structure */ 4739 /*-------------------------------------------------------------------------*/ 4740 PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 4741 PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4742 4743 /* post the Irecv of j-structure */ 4744 /*-------------------------------*/ 4745 PetscCall(PetscCommGetNewTag(comm,&tagj)); 4746 PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 4747 4748 /* post the Isend of j-structure */ 4749 /*--------------------------------*/ 4750 PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 4751 4752 for (proc=0, k=0; proc<size; proc++) { 4753 if (!len_s[proc]) continue; 4754 i = owners[proc]; 4755 PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 4756 k++; 4757 } 4758 4759 /* receives and sends of j-structure are complete */ 4760 /*------------------------------------------------*/ 4761 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 4762 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 4763 4764 /* send and recv i-structure */ 4765 /*---------------------------*/ 4766 PetscCall(PetscCommGetNewTag(comm,&tagi)); 4767 PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 4768 4769 PetscCall(PetscMalloc1(len+1,&buf_s)); 4770 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4771 for (proc=0,k=0; proc<size; proc++) { 4772 if (!len_s[proc]) continue; 4773 /* form outgoing message for i-structure: 4774 buf_si[0]: nrows to be sent 4775 [1:nrows]: row index (global) 4776 [nrows+1:2*nrows+1]: i-structure index 4777 */ 4778 /*-------------------------------------------*/ 4779 nrows = len_si[proc]/2 - 1; 4780 buf_si_i = buf_si + nrows+1; 4781 buf_si[0] = nrows; 4782 buf_si_i[0] = 0; 4783 nrows = 0; 4784 for (i=owners[proc]; i<owners[proc+1]; i++) { 4785 anzi = ai[i+1] - ai[i]; 4786 if (anzi) { 4787 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4788 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4789 nrows++; 4790 } 4791 } 4792 PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 4793 k++; 4794 buf_si += len_si[proc]; 4795 } 4796 4797 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 4798 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 4799 4800 PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 4801 for (i=0; i<merge->nrecv; i++) { 4802 PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 4803 } 4804 4805 PetscCall(PetscFree(len_si)); 4806 PetscCall(PetscFree(len_ri)); 4807 PetscCall(PetscFree(rj_waits)); 4808 PetscCall(PetscFree2(si_waits,sj_waits)); 4809 PetscCall(PetscFree(ri_waits)); 4810 PetscCall(PetscFree(buf_s)); 4811 PetscCall(PetscFree(status)); 4812 4813 /* compute a local seq matrix in each processor */ 4814 /*----------------------------------------------*/ 4815 /* allocate bi array and free space for accumulating nonzero column info */ 4816 PetscCall(PetscMalloc1(m+1,&bi)); 4817 bi[0] = 0; 4818 4819 /* create and initialize a linked list */ 4820 nlnk = N+1; 4821 PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 4822 4823 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4824 len = ai[owners[rank+1]] - ai[owners[rank]]; 4825 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 4826 4827 current_space = free_space; 4828 4829 /* determine symbolic info for each local row */ 4830 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4831 4832 for (k=0; k<merge->nrecv; k++) { 4833 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4834 nrows = *buf_ri_k[k]; 4835 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4836 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4837 } 4838 4839 MatPreallocateBegin(comm,m,n,dnz,onz); 4840 len = 0; 4841 for (i=0; i<m; i++) { 4842 bnzi = 0; 4843 /* add local non-zero cols of this proc's seqmat into lnk */ 4844 arow = owners[rank] + i; 4845 anzi = ai[arow+1] - ai[arow]; 4846 aj = a->j + ai[arow]; 4847 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4848 bnzi += nlnk; 4849 /* add received col data into lnk */ 4850 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4851 if (i == *nextrow[k]) { /* i-th row */ 4852 anzi = *(nextai[k]+1) - *nextai[k]; 4853 aj = buf_rj[k] + *nextai[k]; 4854 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4855 bnzi += nlnk; 4856 nextrow[k]++; nextai[k]++; 4857 } 4858 } 4859 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4860 4861 /* if free space is not available, make more free space */ 4862 if (current_space->local_remaining<bnzi) { 4863 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 4864 nspacedouble++; 4865 } 4866 /* copy data into free space, then initialize lnk */ 4867 PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 4868 PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4869 4870 current_space->array += bnzi; 4871 current_space->local_used += bnzi; 4872 current_space->local_remaining -= bnzi; 4873 4874 bi[i+1] = bi[i] + bnzi; 4875 } 4876 4877 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4878 4879 PetscCall(PetscMalloc1(bi[m]+1,&bj)); 4880 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4881 PetscCall(PetscLLDestroy(lnk,lnkbt)); 4882 4883 /* create symbolic parallel matrix B_mpi */ 4884 /*---------------------------------------*/ 4885 PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 4886 PetscCall(MatCreate(comm,&B_mpi)); 4887 if (n==PETSC_DECIDE) { 4888 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 4889 } else { 4890 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4891 } 4892 PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 4893 PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 4894 PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 4895 MatPreallocateEnd(dnz,onz); 4896 PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 4897 4898 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4899 B_mpi->assembled = PETSC_FALSE; 4900 merge->bi = bi; 4901 merge->bj = bj; 4902 merge->buf_ri = buf_ri; 4903 merge->buf_rj = buf_rj; 4904 merge->coi = NULL; 4905 merge->coj = NULL; 4906 merge->owners_co = NULL; 4907 4908 PetscCall(PetscCommDestroy(&comm)); 4909 4910 /* attach the supporting struct to B_mpi for reuse */ 4911 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 4912 PetscCall(PetscContainerSetPointer(container,merge)); 4913 PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 4914 PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 4915 PetscCall(PetscContainerDestroy(&container)); 4916 *mpimat = B_mpi; 4917 4918 PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 4919 PetscFunctionReturn(0); 4920 } 4921 4922 /*@C 4923 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4924 matrices from each processor 4925 4926 Collective 4927 4928 Input Parameters: 4929 + comm - the communicators the parallel matrix will live on 4930 . seqmat - the input sequential matrices 4931 . m - number of local rows (or PETSC_DECIDE) 4932 . n - number of local columns (or PETSC_DECIDE) 4933 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4934 4935 Output Parameter: 4936 . mpimat - the parallel matrix generated 4937 4938 Level: advanced 4939 4940 Notes: 4941 The dimensions of the sequential matrix in each processor MUST be the same. 4942 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4943 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4944 @*/ 4945 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4946 { 4947 PetscMPIInt size; 4948 4949 PetscFunctionBegin; 4950 PetscCallMPI(MPI_Comm_size(comm,&size)); 4951 if (size == 1) { 4952 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4953 if (scall == MAT_INITIAL_MATRIX) { 4954 PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 4955 } else { 4956 PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 4957 } 4958 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4959 PetscFunctionReturn(0); 4960 } 4961 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4962 if (scall == MAT_INITIAL_MATRIX) { 4963 PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 4964 } 4965 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 4966 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4967 PetscFunctionReturn(0); 4968 } 4969 4970 /*@ 4971 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4972 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4973 with MatGetSize() 4974 4975 Not Collective 4976 4977 Input Parameters: 4978 + A - the matrix 4979 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4980 4981 Output Parameter: 4982 . A_loc - the local sequential matrix generated 4983 4984 Level: developer 4985 4986 Notes: 4987 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 4988 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 4989 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 4990 modify the values of the returned A_loc. 4991 4992 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 4993 @*/ 4994 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4995 { 4996 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4997 Mat_SeqAIJ *mat,*a,*b; 4998 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4999 const PetscScalar *aa,*ba,*aav,*bav; 5000 PetscScalar *ca,*cam; 5001 PetscMPIInt size; 5002 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5003 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5004 PetscBool match; 5005 5006 PetscFunctionBegin; 5007 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 5008 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5009 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5010 if (size == 1) { 5011 if (scall == MAT_INITIAL_MATRIX) { 5012 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5013 *A_loc = mpimat->A; 5014 } else if (scall == MAT_REUSE_MATRIX) { 5015 PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 5016 } 5017 PetscFunctionReturn(0); 5018 } 5019 5020 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5021 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5022 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5023 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5024 PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 5025 PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5026 aa = aav; 5027 ba = bav; 5028 if (scall == MAT_INITIAL_MATRIX) { 5029 PetscCall(PetscMalloc1(1+am,&ci)); 5030 ci[0] = 0; 5031 for (i=0; i<am; i++) { 5032 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5033 } 5034 PetscCall(PetscMalloc1(1+ci[am],&cj)); 5035 PetscCall(PetscMalloc1(1+ci[am],&ca)); 5036 k = 0; 5037 for (i=0; i<am; i++) { 5038 ncols_o = bi[i+1] - bi[i]; 5039 ncols_d = ai[i+1] - ai[i]; 5040 /* off-diagonal portion of A */ 5041 for (jo=0; jo<ncols_o; jo++) { 5042 col = cmap[*bj]; 5043 if (col >= cstart) break; 5044 cj[k] = col; bj++; 5045 ca[k++] = *ba++; 5046 } 5047 /* diagonal portion of A */ 5048 for (j=0; j<ncols_d; j++) { 5049 cj[k] = cstart + *aj++; 5050 ca[k++] = *aa++; 5051 } 5052 /* off-diagonal portion of A */ 5053 for (j=jo; j<ncols_o; j++) { 5054 cj[k] = cmap[*bj++]; 5055 ca[k++] = *ba++; 5056 } 5057 } 5058 /* put together the new matrix */ 5059 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5060 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5061 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5062 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5063 mat->free_a = PETSC_TRUE; 5064 mat->free_ij = PETSC_TRUE; 5065 mat->nonew = 0; 5066 } else if (scall == MAT_REUSE_MATRIX) { 5067 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5068 ci = mat->i; 5069 cj = mat->j; 5070 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 5071 for (i=0; i<am; i++) { 5072 /* off-diagonal portion of A */ 5073 ncols_o = bi[i+1] - bi[i]; 5074 for (jo=0; jo<ncols_o; jo++) { 5075 col = cmap[*bj]; 5076 if (col >= cstart) break; 5077 *cam++ = *ba++; bj++; 5078 } 5079 /* diagonal portion of A */ 5080 ncols_d = ai[i+1] - ai[i]; 5081 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5082 /* off-diagonal portion of A */ 5083 for (j=jo; j<ncols_o; j++) { 5084 *cam++ = *ba++; bj++; 5085 } 5086 } 5087 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 5088 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5089 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 5090 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 5091 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5092 PetscFunctionReturn(0); 5093 } 5094 5095 /*@ 5096 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5097 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5098 5099 Not Collective 5100 5101 Input Parameters: 5102 + A - the matrix 5103 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5104 5105 Output Parameters: 5106 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5107 - A_loc - the local sequential matrix generated 5108 5109 Level: developer 5110 5111 Notes: 5112 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5113 5114 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5115 5116 @*/ 5117 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5118 { 5119 Mat Ao,Ad; 5120 const PetscInt *cmap; 5121 PetscMPIInt size; 5122 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5123 5124 PetscFunctionBegin; 5125 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 5126 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5127 if (size == 1) { 5128 if (scall == MAT_INITIAL_MATRIX) { 5129 PetscCall(PetscObjectReference((PetscObject)Ad)); 5130 *A_loc = Ad; 5131 } else if (scall == MAT_REUSE_MATRIX) { 5132 PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5133 } 5134 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5135 PetscFunctionReturn(0); 5136 } 5137 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 5138 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5139 if (f) { 5140 PetscCall((*f)(A,scall,glob,A_loc)); 5141 } else { 5142 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5143 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5144 Mat_SeqAIJ *c; 5145 PetscInt *ai = a->i, *aj = a->j; 5146 PetscInt *bi = b->i, *bj = b->j; 5147 PetscInt *ci,*cj; 5148 const PetscScalar *aa,*ba; 5149 PetscScalar *ca; 5150 PetscInt i,j,am,dn,on; 5151 5152 PetscCall(MatGetLocalSize(Ad,&am,&dn)); 5153 PetscCall(MatGetLocalSize(Ao,NULL,&on)); 5154 PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 5155 PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5156 if (scall == MAT_INITIAL_MATRIX) { 5157 PetscInt k; 5158 PetscCall(PetscMalloc1(1+am,&ci)); 5159 PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 5160 PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5161 ci[0] = 0; 5162 for (i=0,k=0; i<am; i++) { 5163 const PetscInt ncols_o = bi[i+1] - bi[i]; 5164 const PetscInt ncols_d = ai[i+1] - ai[i]; 5165 ci[i+1] = ci[i] + ncols_o + ncols_d; 5166 /* diagonal portion of A */ 5167 for (j=0; j<ncols_d; j++,k++) { 5168 cj[k] = *aj++; 5169 ca[k] = *aa++; 5170 } 5171 /* off-diagonal portion of A */ 5172 for (j=0; j<ncols_o; j++,k++) { 5173 cj[k] = dn + *bj++; 5174 ca[k] = *ba++; 5175 } 5176 } 5177 /* put together the new matrix */ 5178 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5179 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5180 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5181 c = (Mat_SeqAIJ*)(*A_loc)->data; 5182 c->free_a = PETSC_TRUE; 5183 c->free_ij = PETSC_TRUE; 5184 c->nonew = 0; 5185 PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5186 } else if (scall == MAT_REUSE_MATRIX) { 5187 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5188 for (i=0; i<am; i++) { 5189 const PetscInt ncols_d = ai[i+1] - ai[i]; 5190 const PetscInt ncols_o = bi[i+1] - bi[i]; 5191 /* diagonal portion of A */ 5192 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5193 /* off-diagonal portion of A */ 5194 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5195 } 5196 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 5197 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5198 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 5199 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5200 if (glob) { 5201 PetscInt cst, *gidx; 5202 5203 PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 5204 PetscCall(PetscMalloc1(dn+on,&gidx)); 5205 for (i=0; i<dn; i++) gidx[i] = cst + i; 5206 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5207 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5208 } 5209 } 5210 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5211 PetscFunctionReturn(0); 5212 } 5213 5214 /*@C 5215 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5216 5217 Not Collective 5218 5219 Input Parameters: 5220 + A - the matrix 5221 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5222 - row, col - index sets of rows and columns to extract (or NULL) 5223 5224 Output Parameter: 5225 . A_loc - the local sequential matrix generated 5226 5227 Level: developer 5228 5229 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5230 5231 @*/ 5232 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5233 { 5234 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5235 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5236 IS isrowa,iscola; 5237 Mat *aloc; 5238 PetscBool match; 5239 5240 PetscFunctionBegin; 5241 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 5242 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5243 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 5244 if (!row) { 5245 start = A->rmap->rstart; end = A->rmap->rend; 5246 PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 5247 } else { 5248 isrowa = *row; 5249 } 5250 if (!col) { 5251 start = A->cmap->rstart; 5252 cmap = a->garray; 5253 nzA = a->A->cmap->n; 5254 nzB = a->B->cmap->n; 5255 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5256 ncols = 0; 5257 for (i=0; i<nzB; i++) { 5258 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5259 else break; 5260 } 5261 imark = i; 5262 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5263 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5264 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 5265 } else { 5266 iscola = *col; 5267 } 5268 if (scall != MAT_INITIAL_MATRIX) { 5269 PetscCall(PetscMalloc1(1,&aloc)); 5270 aloc[0] = *A_loc; 5271 } 5272 PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5273 if (!col) { /* attach global id of condensed columns */ 5274 PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5275 } 5276 *A_loc = aloc[0]; 5277 PetscCall(PetscFree(aloc)); 5278 if (!row) { 5279 PetscCall(ISDestroy(&isrowa)); 5280 } 5281 if (!col) { 5282 PetscCall(ISDestroy(&iscola)); 5283 } 5284 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 5285 PetscFunctionReturn(0); 5286 } 5287 5288 /* 5289 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5290 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5291 * on a global size. 5292 * */ 5293 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5294 { 5295 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5296 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5297 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5298 PetscMPIInt owner; 5299 PetscSFNode *iremote,*oiremote; 5300 const PetscInt *lrowindices; 5301 PetscSF sf,osf; 5302 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5303 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5304 MPI_Comm comm; 5305 ISLocalToGlobalMapping mapping; 5306 const PetscScalar *pd_a,*po_a; 5307 5308 PetscFunctionBegin; 5309 PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 5310 /* plocalsize is the number of roots 5311 * nrows is the number of leaves 5312 * */ 5313 PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 5314 PetscCall(ISGetLocalSize(rows,&nrows)); 5315 PetscCall(PetscCalloc1(nrows,&iremote)); 5316 PetscCall(ISGetIndices(rows,&lrowindices)); 5317 for (i=0;i<nrows;i++) { 5318 /* Find a remote index and an owner for a row 5319 * The row could be local or remote 5320 * */ 5321 owner = 0; 5322 lidx = 0; 5323 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 5324 iremote[i].index = lidx; 5325 iremote[i].rank = owner; 5326 } 5327 /* Create SF to communicate how many nonzero columns for each row */ 5328 PetscCall(PetscSFCreate(comm,&sf)); 5329 /* SF will figure out the number of nonzero colunms for each row, and their 5330 * offsets 5331 * */ 5332 PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5333 PetscCall(PetscSFSetFromOptions(sf)); 5334 PetscCall(PetscSFSetUp(sf)); 5335 5336 PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 5337 PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 5338 PetscCall(PetscCalloc1(nrows,&pnnz)); 5339 roffsets[0] = 0; 5340 roffsets[1] = 0; 5341 for (i=0;i<plocalsize;i++) { 5342 /* diag */ 5343 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5344 /* off diag */ 5345 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5346 /* compute offsets so that we relative location for each row */ 5347 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5348 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5349 } 5350 PetscCall(PetscCalloc1(2*nrows,&nlcols)); 5351 PetscCall(PetscCalloc1(2*nrows,&loffsets)); 5352 /* 'r' means root, and 'l' means leaf */ 5353 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5354 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5355 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5356 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5357 PetscCall(PetscSFDestroy(&sf)); 5358 PetscCall(PetscFree(roffsets)); 5359 PetscCall(PetscFree(nrcols)); 5360 dntotalcols = 0; 5361 ontotalcols = 0; 5362 ncol = 0; 5363 for (i=0;i<nrows;i++) { 5364 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5365 ncol = PetscMax(pnnz[i],ncol); 5366 /* diag */ 5367 dntotalcols += nlcols[i*2+0]; 5368 /* off diag */ 5369 ontotalcols += nlcols[i*2+1]; 5370 } 5371 /* We do not need to figure the right number of columns 5372 * since all the calculations will be done by going through the raw data 5373 * */ 5374 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 5375 PetscCall(MatSetUp(*P_oth)); 5376 PetscCall(PetscFree(pnnz)); 5377 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5378 /* diag */ 5379 PetscCall(PetscCalloc1(dntotalcols,&iremote)); 5380 /* off diag */ 5381 PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 5382 /* diag */ 5383 PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 5384 /* off diag */ 5385 PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 5386 dntotalcols = 0; 5387 ontotalcols = 0; 5388 ntotalcols = 0; 5389 for (i=0;i<nrows;i++) { 5390 owner = 0; 5391 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 5392 /* Set iremote for diag matrix */ 5393 for (j=0;j<nlcols[i*2+0];j++) { 5394 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5395 iremote[dntotalcols].rank = owner; 5396 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5397 ilocal[dntotalcols++] = ntotalcols++; 5398 } 5399 /* off diag */ 5400 for (j=0;j<nlcols[i*2+1];j++) { 5401 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5402 oiremote[ontotalcols].rank = owner; 5403 oilocal[ontotalcols++] = ntotalcols++; 5404 } 5405 } 5406 PetscCall(ISRestoreIndices(rows,&lrowindices)); 5407 PetscCall(PetscFree(loffsets)); 5408 PetscCall(PetscFree(nlcols)); 5409 PetscCall(PetscSFCreate(comm,&sf)); 5410 /* P serves as roots and P_oth is leaves 5411 * Diag matrix 5412 * */ 5413 PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5414 PetscCall(PetscSFSetFromOptions(sf)); 5415 PetscCall(PetscSFSetUp(sf)); 5416 5417 PetscCall(PetscSFCreate(comm,&osf)); 5418 /* Off diag */ 5419 PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 5420 PetscCall(PetscSFSetFromOptions(osf)); 5421 PetscCall(PetscSFSetUp(osf)); 5422 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5423 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5424 /* We operate on the matrix internal data for saving memory */ 5425 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5426 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5427 PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 5428 /* Convert to global indices for diag matrix */ 5429 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5430 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5431 /* We want P_oth store global indices */ 5432 PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 5433 /* Use memory scalable approach */ 5434 PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 5435 PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 5436 PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5437 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5438 /* Convert back to local indices */ 5439 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5440 PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5441 nout = 0; 5442 PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 5443 PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5444 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5445 /* Exchange values */ 5446 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5447 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5448 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5449 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5450 /* Stop PETSc from shrinking memory */ 5451 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5452 PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 5453 PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 5454 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5455 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 5456 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 5457 PetscCall(PetscSFDestroy(&sf)); 5458 PetscCall(PetscSFDestroy(&osf)); 5459 PetscFunctionReturn(0); 5460 } 5461 5462 /* 5463 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5464 * This supports MPIAIJ and MAIJ 5465 * */ 5466 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5467 { 5468 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5469 Mat_SeqAIJ *p_oth; 5470 IS rows,map; 5471 PetscHMapI hamp; 5472 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5473 MPI_Comm comm; 5474 PetscSF sf,osf; 5475 PetscBool has; 5476 5477 PetscFunctionBegin; 5478 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5479 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 5480 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5481 * and then create a submatrix (that often is an overlapping matrix) 5482 * */ 5483 if (reuse == MAT_INITIAL_MATRIX) { 5484 /* Use a hash table to figure out unique keys */ 5485 PetscCall(PetscHMapICreate(&hamp)); 5486 PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 5487 PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5488 count = 0; 5489 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5490 for (i=0;i<a->B->cmap->n;i++) { 5491 key = a->garray[i]/dof; 5492 PetscCall(PetscHMapIHas(hamp,key,&has)); 5493 if (!has) { 5494 mapping[i] = count; 5495 PetscCall(PetscHMapISet(hamp,key,count++)); 5496 } else { 5497 /* Current 'i' has the same value the previous step */ 5498 mapping[i] = count-1; 5499 } 5500 } 5501 PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 5502 PetscCall(PetscHMapIGetSize(hamp,&htsize)); 5503 PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5504 PetscCall(PetscCalloc1(htsize,&rowindices)); 5505 off = 0; 5506 PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 5507 PetscCall(PetscHMapIDestroy(&hamp)); 5508 PetscCall(PetscSortInt(htsize,rowindices)); 5509 PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 5510 /* In case, the matrix was already created but users want to recreate the matrix */ 5511 PetscCall(MatDestroy(P_oth)); 5512 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 5513 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 5514 PetscCall(ISDestroy(&map)); 5515 PetscCall(ISDestroy(&rows)); 5516 } else if (reuse == MAT_REUSE_MATRIX) { 5517 /* If matrix was already created, we simply update values using SF objects 5518 * that as attached to the matrix ealier. 5519 */ 5520 const PetscScalar *pd_a,*po_a; 5521 5522 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 5523 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 5524 PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5525 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5526 /* Update values in place */ 5527 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5528 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5529 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5530 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5531 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5532 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5533 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5534 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5535 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5536 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 5537 PetscFunctionReturn(0); 5538 } 5539 5540 /*@C 5541 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5542 5543 Collective on Mat 5544 5545 Input Parameters: 5546 + A - the first matrix in mpiaij format 5547 . B - the second matrix in mpiaij format 5548 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5549 5550 Output Parameters: 5551 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5552 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5553 - B_seq - the sequential matrix generated 5554 5555 Level: developer 5556 5557 @*/ 5558 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5559 { 5560 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5561 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5562 IS isrowb,iscolb; 5563 Mat *bseq=NULL; 5564 5565 PetscFunctionBegin; 5566 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5567 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5568 } 5569 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 5570 5571 if (scall == MAT_INITIAL_MATRIX) { 5572 start = A->cmap->rstart; 5573 cmap = a->garray; 5574 nzA = a->A->cmap->n; 5575 nzB = a->B->cmap->n; 5576 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5577 ncols = 0; 5578 for (i=0; i<nzB; i++) { /* row < local row index */ 5579 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5580 else break; 5581 } 5582 imark = i; 5583 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5584 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5585 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 5586 PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 5587 } else { 5588 PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5589 isrowb = *rowb; iscolb = *colb; 5590 PetscCall(PetscMalloc1(1,&bseq)); 5591 bseq[0] = *B_seq; 5592 } 5593 PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 5594 *B_seq = bseq[0]; 5595 PetscCall(PetscFree(bseq)); 5596 if (!rowb) { 5597 PetscCall(ISDestroy(&isrowb)); 5598 } else { 5599 *rowb = isrowb; 5600 } 5601 if (!colb) { 5602 PetscCall(ISDestroy(&iscolb)); 5603 } else { 5604 *colb = iscolb; 5605 } 5606 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 5607 PetscFunctionReturn(0); 5608 } 5609 5610 /* 5611 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5612 of the OFF-DIAGONAL portion of local A 5613 5614 Collective on Mat 5615 5616 Input Parameters: 5617 + A,B - the matrices in mpiaij format 5618 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5619 5620 Output Parameter: 5621 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5622 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5623 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5624 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5625 5626 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5627 for this matrix. This is not desirable.. 5628 5629 Level: developer 5630 5631 */ 5632 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5633 { 5634 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5635 Mat_SeqAIJ *b_oth; 5636 VecScatter ctx; 5637 MPI_Comm comm; 5638 const PetscMPIInt *rprocs,*sprocs; 5639 const PetscInt *srow,*rstarts,*sstarts; 5640 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5641 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5642 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5643 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5644 PetscMPIInt size,tag,rank,nreqs; 5645 5646 PetscFunctionBegin; 5647 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5648 PetscCallMPI(MPI_Comm_size(comm,&size)); 5649 5650 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5651 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5652 } 5653 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 5654 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5655 5656 if (size == 1) { 5657 startsj_s = NULL; 5658 bufa_ptr = NULL; 5659 *B_oth = NULL; 5660 PetscFunctionReturn(0); 5661 } 5662 5663 ctx = a->Mvctx; 5664 tag = ((PetscObject)ctx)->tag; 5665 5666 PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5667 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5668 PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 5669 PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 5670 PetscCall(PetscMalloc1(nreqs,&reqs)); 5671 rwaits = reqs; 5672 swaits = reqs + nrecvs; 5673 5674 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5675 if (scall == MAT_INITIAL_MATRIX) { 5676 /* i-array */ 5677 /*---------*/ 5678 /* post receives */ 5679 if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5680 for (i=0; i<nrecvs; i++) { 5681 rowlen = rvalues + rstarts[i]*rbs; 5682 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5683 PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5684 } 5685 5686 /* pack the outgoing message */ 5687 PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 5688 5689 sstartsj[0] = 0; 5690 rstartsj[0] = 0; 5691 len = 0; /* total length of j or a array to be sent */ 5692 if (nsends) { 5693 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5694 PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 5695 } 5696 for (i=0; i<nsends; i++) { 5697 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5698 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5699 for (j=0; j<nrows; j++) { 5700 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5701 for (l=0; l<sbs; l++) { 5702 PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 5703 5704 rowlen[j*sbs+l] = ncols; 5705 5706 len += ncols; 5707 PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5708 } 5709 k++; 5710 } 5711 PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5712 5713 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5714 } 5715 /* recvs and sends of i-array are completed */ 5716 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5717 PetscCall(PetscFree(svalues)); 5718 5719 /* allocate buffers for sending j and a arrays */ 5720 PetscCall(PetscMalloc1(len+1,&bufj)); 5721 PetscCall(PetscMalloc1(len+1,&bufa)); 5722 5723 /* create i-array of B_oth */ 5724 PetscCall(PetscMalloc1(aBn+2,&b_othi)); 5725 5726 b_othi[0] = 0; 5727 len = 0; /* total length of j or a array to be received */ 5728 k = 0; 5729 for (i=0; i<nrecvs; i++) { 5730 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5731 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5732 for (j=0; j<nrows; j++) { 5733 b_othi[k+1] = b_othi[k] + rowlen[j]; 5734 PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5735 k++; 5736 } 5737 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5738 } 5739 PetscCall(PetscFree(rvalues)); 5740 5741 /* allocate space for j and a arrrays of B_oth */ 5742 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 5743 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5744 5745 /* j-array */ 5746 /*---------*/ 5747 /* post receives of j-array */ 5748 for (i=0; i<nrecvs; i++) { 5749 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5750 PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5751 } 5752 5753 /* pack the outgoing message j-array */ 5754 if (nsends) k = sstarts[0]; 5755 for (i=0; i<nsends; i++) { 5756 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5757 bufJ = bufj+sstartsj[i]; 5758 for (j=0; j<nrows; j++) { 5759 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5760 for (ll=0; ll<sbs; ll++) { 5761 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5762 for (l=0; l<ncols; l++) { 5763 *bufJ++ = cols[l]; 5764 } 5765 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5766 } 5767 } 5768 PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5769 } 5770 5771 /* recvs and sends of j-array are completed */ 5772 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5773 } else if (scall == MAT_REUSE_MATRIX) { 5774 sstartsj = *startsj_s; 5775 rstartsj = *startsj_r; 5776 bufa = *bufa_ptr; 5777 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5778 PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5779 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5780 5781 /* a-array */ 5782 /*---------*/ 5783 /* post receives of a-array */ 5784 for (i=0; i<nrecvs; i++) { 5785 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5786 PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 5787 } 5788 5789 /* pack the outgoing message a-array */ 5790 if (nsends) k = sstarts[0]; 5791 for (i=0; i<nsends; i++) { 5792 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5793 bufA = bufa+sstartsj[i]; 5794 for (j=0; j<nrows; j++) { 5795 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5796 for (ll=0; ll<sbs; ll++) { 5797 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5798 for (l=0; l<ncols; l++) { 5799 *bufA++ = vals[l]; 5800 } 5801 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5802 } 5803 } 5804 PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5805 } 5806 /* recvs and sends of a-array are completed */ 5807 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5808 PetscCall(PetscFree(reqs)); 5809 5810 if (scall == MAT_INITIAL_MATRIX) { 5811 /* put together the new matrix */ 5812 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5813 5814 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5815 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5816 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5817 b_oth->free_a = PETSC_TRUE; 5818 b_oth->free_ij = PETSC_TRUE; 5819 b_oth->nonew = 0; 5820 5821 PetscCall(PetscFree(bufj)); 5822 if (!startsj_s || !bufa_ptr) { 5823 PetscCall(PetscFree2(sstartsj,rstartsj)); 5824 PetscCall(PetscFree(bufa_ptr)); 5825 } else { 5826 *startsj_s = sstartsj; 5827 *startsj_r = rstartsj; 5828 *bufa_ptr = bufa; 5829 } 5830 } else if (scall == MAT_REUSE_MATRIX) { 5831 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5832 } 5833 5834 PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5835 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 5836 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5837 PetscFunctionReturn(0); 5838 } 5839 5840 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5841 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5842 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5843 #if defined(PETSC_HAVE_MKL_SPARSE) 5844 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5845 #endif 5846 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5847 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5848 #if defined(PETSC_HAVE_ELEMENTAL) 5849 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5850 #endif 5851 #if defined(PETSC_HAVE_SCALAPACK) 5852 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5853 #endif 5854 #if defined(PETSC_HAVE_HYPRE) 5855 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5856 #endif 5857 #if defined(PETSC_HAVE_CUDA) 5858 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5859 #endif 5860 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5861 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5862 #endif 5863 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5864 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5865 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5866 5867 /* 5868 Computes (B'*A')' since computing B*A directly is untenable 5869 5870 n p p 5871 [ ] [ ] [ ] 5872 m [ A ] * n [ B ] = m [ C ] 5873 [ ] [ ] [ ] 5874 5875 */ 5876 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5877 { 5878 Mat At,Bt,Ct; 5879 5880 PetscFunctionBegin; 5881 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 5882 PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 5883 PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 5884 PetscCall(MatDestroy(&At)); 5885 PetscCall(MatDestroy(&Bt)); 5886 PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 5887 PetscCall(MatDestroy(&Ct)); 5888 PetscFunctionReturn(0); 5889 } 5890 5891 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5892 { 5893 PetscBool cisdense; 5894 5895 PetscFunctionBegin; 5896 PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 5897 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 5898 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 5899 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 5900 if (!cisdense) { 5901 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 5902 } 5903 PetscCall(MatSetUp(C)); 5904 5905 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5906 PetscFunctionReturn(0); 5907 } 5908 5909 /* ----------------------------------------------------------------*/ 5910 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5911 { 5912 Mat_Product *product = C->product; 5913 Mat A = product->A,B=product->B; 5914 5915 PetscFunctionBegin; 5916 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5917 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5918 5919 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5920 C->ops->productsymbolic = MatProductSymbolic_AB; 5921 PetscFunctionReturn(0); 5922 } 5923 5924 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5925 { 5926 Mat_Product *product = C->product; 5927 5928 PetscFunctionBegin; 5929 if (product->type == MATPRODUCT_AB) { 5930 PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 5931 } 5932 PetscFunctionReturn(0); 5933 } 5934 5935 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 5936 5937 Input Parameters: 5938 5939 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 5940 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 5941 5942 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 5943 5944 For Set1, j1[] contains column indices of the nonzeros. 5945 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 5946 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 5947 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 5948 5949 Similar for Set2. 5950 5951 This routine merges the two sets of nonzeros row by row and removes repeats. 5952 5953 Output Parameters: (memory is allocated by the caller) 5954 5955 i[],j[]: the CSR of the merged matrix, which has m rows. 5956 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 5957 imap2[]: similar to imap1[], but for Set2. 5958 Note we order nonzeros row-by-row and from left to right. 5959 */ 5960 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 5961 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 5962 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 5963 { 5964 PetscInt r,m; /* Row index of mat */ 5965 PetscCount t,t1,t2,b1,e1,b2,e2; 5966 5967 PetscFunctionBegin; 5968 PetscCall(MatGetLocalSize(mat,&m,NULL)); 5969 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 5970 i[0] = 0; 5971 for (r=0; r<m; r++) { /* Do row by row merging */ 5972 b1 = rowBegin1[r]; 5973 e1 = rowEnd1[r]; 5974 b2 = rowBegin2[r]; 5975 e2 = rowEnd2[r]; 5976 while (b1 < e1 && b2 < e2) { 5977 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 5978 j[t] = j1[b1]; 5979 imap1[t1] = t; 5980 imap2[t2] = t; 5981 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 5982 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 5983 t1++; t2++; t++; 5984 } else if (j1[b1] < j2[b2]) { 5985 j[t] = j1[b1]; 5986 imap1[t1] = t; 5987 b1 += jmap1[t1+1] - jmap1[t1]; 5988 t1++; t++; 5989 } else { 5990 j[t] = j2[b2]; 5991 imap2[t2] = t; 5992 b2 += jmap2[t2+1] - jmap2[t2]; 5993 t2++; t++; 5994 } 5995 } 5996 /* Merge the remaining in either j1[] or j2[] */ 5997 while (b1 < e1) { 5998 j[t] = j1[b1]; 5999 imap1[t1] = t; 6000 b1 += jmap1[t1+1] - jmap1[t1]; 6001 t1++; t++; 6002 } 6003 while (b2 < e2) { 6004 j[t] = j2[b2]; 6005 imap2[t2] = t; 6006 b2 += jmap2[t2+1] - jmap2[t2]; 6007 t2++; t++; 6008 } 6009 i[r+1] = t; 6010 } 6011 PetscFunctionReturn(0); 6012 } 6013 6014 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6015 6016 Input Parameters: 6017 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6018 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6019 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6020 6021 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6022 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6023 6024 Output Parameters: 6025 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6026 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6027 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6028 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6029 6030 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6031 Atot: number of entries belonging to the diagonal block. 6032 Annz: number of unique nonzeros belonging to the diagonal block. 6033 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6034 repeats (i.e., same 'i,j' pair). 6035 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6036 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6037 6038 Atot: number of entries belonging to the diagonal block 6039 Annz: number of unique nonzeros belonging to the diagonal block. 6040 6041 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6042 6043 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6044 */ 6045 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6046 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6047 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6048 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6049 { 6050 PetscInt cstart,cend,rstart,rend,row,col; 6051 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6052 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6053 PetscCount k,m,p,q,r,s,mid; 6054 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6055 6056 PetscFunctionBegin; 6057 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6058 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6059 m = rend - rstart; 6060 6061 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6062 6063 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6064 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6065 */ 6066 while (k<n) { 6067 row = i[k]; 6068 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6069 for (s=k; s<n; s++) if (i[s] != row) break; 6070 for (p=k; p<s; p++) { 6071 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6072 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6073 } 6074 PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6075 PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6076 rowBegin[row-rstart] = k; 6077 rowMid[row-rstart] = mid; 6078 rowEnd[row-rstart] = s; 6079 6080 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6081 Atot += mid - k; 6082 Btot += s - mid; 6083 6084 /* Count unique nonzeros of this diag/offdiag row */ 6085 for (p=k; p<mid;) { 6086 col = j[p]; 6087 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6088 Annz++; 6089 } 6090 6091 for (p=mid; p<s;) { 6092 col = j[p]; 6093 do {p++;} while (p<s && j[p] == col); 6094 Bnnz++; 6095 } 6096 k = s; 6097 } 6098 6099 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6100 PetscCall(PetscMalloc1(Atot,&Aperm)); 6101 PetscCall(PetscMalloc1(Btot,&Bperm)); 6102 PetscCall(PetscMalloc1(Annz+1,&Ajmap)); 6103 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap)); 6104 6105 /* Re-scan indices and copy diag/offdiag permuation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6106 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6107 for (r=0; r<m; r++) { 6108 k = rowBegin[r]; 6109 mid = rowMid[r]; 6110 s = rowEnd[r]; 6111 PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 6112 PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6113 Atot += mid - k; 6114 Btot += s - mid; 6115 6116 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6117 for (p=k; p<mid;) { 6118 col = j[p]; 6119 q = p; 6120 do {p++;} while (p<mid && j[p] == col); 6121 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6122 Annz++; 6123 } 6124 6125 for (p=mid; p<s;) { 6126 col = j[p]; 6127 q = p; 6128 do {p++;} while (p<s && j[p] == col); 6129 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6130 Bnnz++; 6131 } 6132 } 6133 /* Output */ 6134 *Aperm_ = Aperm; 6135 *Annz_ = Annz; 6136 *Atot_ = Atot; 6137 *Ajmap_ = Ajmap; 6138 *Bperm_ = Bperm; 6139 *Bnnz_ = Bnnz; 6140 *Btot_ = Btot; 6141 *Bjmap_ = Bjmap; 6142 PetscFunctionReturn(0); 6143 } 6144 6145 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6146 6147 Input Parameters: 6148 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6149 nnz: number of unique nonzeros in the merged matrix 6150 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6151 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6152 6153 Output Parameter: (memory is allocated by the caller) 6154 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6155 6156 Example: 6157 nnz1 = 4 6158 nnz = 6 6159 imap = [1,3,4,5] 6160 jmap = [0,3,5,6,7] 6161 then, 6162 jmap_new = [0,0,3,3,5,6,7] 6163 */ 6164 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[]) 6165 { 6166 PetscCount k,p; 6167 6168 PetscFunctionBegin; 6169 jmap_new[0] = 0; 6170 p = nnz; /* p loops over jmap_new[] backwards */ 6171 for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */ 6172 for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1]; 6173 } 6174 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6175 PetscFunctionReturn(0); 6176 } 6177 6178 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6179 { 6180 MPI_Comm comm; 6181 PetscMPIInt rank,size; 6182 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6183 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6184 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6185 6186 PetscFunctionBegin; 6187 PetscCall(PetscFree(mpiaij->garray)); 6188 PetscCall(VecDestroy(&mpiaij->lvec)); 6189 #if defined(PETSC_USE_CTABLE) 6190 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6191 #else 6192 PetscCall(PetscFree(mpiaij->colmap)); 6193 #endif 6194 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6195 mat->assembled = PETSC_FALSE; 6196 mat->was_assembled = PETSC_FALSE; 6197 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6198 6199 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 6200 PetscCallMPI(MPI_Comm_size(comm,&size)); 6201 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 6202 PetscCall(PetscLayoutSetUp(mat->rmap)); 6203 PetscCall(PetscLayoutSetUp(mat->cmap)); 6204 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6205 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6206 PetscCall(MatGetLocalSize(mat,&m,&n)); 6207 PetscCall(MatGetSize(mat,&M,&N)); 6208 6209 /* ---------------------------------------------------------------------------*/ 6210 /* Sort (i,j) by row along with a permuation array, so that the to-be-ignored */ 6211 /* entries come first, then local rows, then remote rows. */ 6212 /* ---------------------------------------------------------------------------*/ 6213 PetscCount n1 = coo_n,*perm1; 6214 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6215 PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1)); 6216 PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */ 6217 PetscCall(PetscArraycpy(j1,coo_j,n1)); 6218 for (k=0; k<n1; k++) perm1[k] = k; 6219 6220 /* Manipulate indices so that entries with negative row or col indices will have smallest 6221 row indices, local entries will have greater but negative row indices, and remote entries 6222 will have positive row indices. 6223 */ 6224 for (k=0; k<n1; k++) { 6225 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6226 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6227 else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6228 else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6229 } 6230 6231 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6232 PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6233 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6234 PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6235 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6236 6237 /* ---------------------------------------------------------------------------*/ 6238 /* Split local rows into diag/offdiag portions */ 6239 /* ---------------------------------------------------------------------------*/ 6240 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6241 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6242 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6243 6244 PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 6245 PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 6246 PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6247 6248 /* ---------------------------------------------------------------------------*/ 6249 /* Send remote rows to their owner */ 6250 /* ---------------------------------------------------------------------------*/ 6251 /* Find which rows should be sent to which remote ranks*/ 6252 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6253 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6254 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6255 const PetscInt *ranges; 6256 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6257 6258 PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 6259 PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6260 for (k=rem; k<n1;) { 6261 PetscMPIInt owner; 6262 PetscInt firstRow,lastRow; 6263 6264 /* Locate a row range */ 6265 firstRow = i1[k]; /* first row of this owner */ 6266 PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6267 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6268 6269 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6270 PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6271 6272 /* All entries in [k,p) belong to this remote owner */ 6273 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6274 PetscMPIInt *sendto2; 6275 PetscInt *nentries2; 6276 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6277 6278 PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 6279 PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 6280 PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 6281 PetscCall(PetscFree2(sendto,nentries2)); 6282 sendto = sendto2; 6283 nentries = nentries2; 6284 maxNsend = maxNsend2; 6285 } 6286 sendto[nsend] = owner; 6287 nentries[nsend] = p - k; 6288 PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6289 nsend++; 6290 k = p; 6291 } 6292 6293 /* Build 1st SF to know offsets on remote to send data */ 6294 PetscSF sf1; 6295 PetscInt nroots = 1,nroots2 = 0; 6296 PetscInt nleaves = nsend,nleaves2 = 0; 6297 PetscInt *offsets; 6298 PetscSFNode *iremote; 6299 6300 PetscCall(PetscSFCreate(comm,&sf1)); 6301 PetscCall(PetscMalloc1(nsend,&iremote)); 6302 PetscCall(PetscMalloc1(nsend,&offsets)); 6303 for (k=0; k<nsend; k++) { 6304 iremote[k].rank = sendto[k]; 6305 iremote[k].index = 0; 6306 nleaves2 += nentries[k]; 6307 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6308 } 6309 PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6310 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 6311 PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6312 PetscCall(PetscSFDestroy(&sf1)); 6313 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem); 6314 6315 /* Build 2nd SF to send remote COOs to their owner */ 6316 PetscSF sf2; 6317 nroots = nroots2; 6318 nleaves = nleaves2; 6319 PetscCall(PetscSFCreate(comm,&sf2)); 6320 PetscCall(PetscSFSetFromOptions(sf2)); 6321 PetscCall(PetscMalloc1(nleaves,&iremote)); 6322 p = 0; 6323 for (k=0; k<nsend; k++) { 6324 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6325 for (q=0; q<nentries[k]; q++,p++) { 6326 iremote[p].rank = sendto[k]; 6327 iremote[p].index = offsets[k] + q; 6328 } 6329 } 6330 PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6331 6332 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permuation which will be used to fill leafdata */ 6333 PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6334 6335 /* Send the remote COOs to their owner */ 6336 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6337 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6338 PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 6339 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 6340 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 6341 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 6342 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6343 6344 PetscCall(PetscFree(offsets)); 6345 PetscCall(PetscFree2(sendto,nentries)); 6346 6347 /* ---------------------------------------------------------------*/ 6348 /* Sort received COOs by row along with the permutation array */ 6349 /* ---------------------------------------------------------------*/ 6350 for (k=0; k<n2; k++) perm2[k] = k; 6351 PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6352 6353 /* ---------------------------------------------------------------*/ 6354 /* Split received COOs into diag/offdiag portions */ 6355 /* ---------------------------------------------------------------*/ 6356 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6357 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6358 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6359 6360 PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 6361 PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6362 6363 /* --------------------------------------------------------------------------*/ 6364 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6365 /* --------------------------------------------------------------------------*/ 6366 PetscInt *Ai,*Bi; 6367 PetscInt *Aj,*Bj; 6368 6369 PetscCall(PetscMalloc1(m+1,&Ai)); 6370 PetscCall(PetscMalloc1(m+1,&Bi)); 6371 PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6372 PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6373 6374 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6375 PetscCall(PetscMalloc1(Annz1,&Aimap1)); 6376 PetscCall(PetscMalloc1(Bnnz1,&Bimap1)); 6377 PetscCall(PetscMalloc1(Annz2,&Aimap2)); 6378 PetscCall(PetscMalloc1(Bnnz2,&Bimap2)); 6379 6380 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 6381 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6382 6383 /* --------------------------------------------------------------------------*/ 6384 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6385 /* expect nonzeros in A/B most likely have local contributing entries */ 6386 /* --------------------------------------------------------------------------*/ 6387 PetscInt Annz = Ai[m]; 6388 PetscInt Bnnz = Bi[m]; 6389 PetscCount *Ajmap1_new,*Bjmap1_new; 6390 6391 PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new)); 6392 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new)); 6393 6394 PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new)); 6395 PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new)); 6396 6397 PetscCall(PetscFree(Aimap1)); 6398 PetscCall(PetscFree(Ajmap1)); 6399 PetscCall(PetscFree(Bimap1)); 6400 PetscCall(PetscFree(Bjmap1)); 6401 PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 6402 PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 6403 PetscCall(PetscFree3(i1,j1,perm1)); 6404 PetscCall(PetscFree3(i2,j2,perm2)); 6405 6406 Ajmap1 = Ajmap1_new; 6407 Bjmap1 = Bjmap1_new; 6408 6409 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6410 if (Annz < Annz1 + Annz2) { 6411 PetscInt *Aj_new; 6412 PetscCall(PetscMalloc1(Annz,&Aj_new)); 6413 PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 6414 PetscCall(PetscFree(Aj)); 6415 Aj = Aj_new; 6416 } 6417 6418 if (Bnnz < Bnnz1 + Bnnz2) { 6419 PetscInt *Bj_new; 6420 PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 6421 PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 6422 PetscCall(PetscFree(Bj)); 6423 Bj = Bj_new; 6424 } 6425 6426 /* --------------------------------------------------------------------------------*/ 6427 /* Create new submatrices for on-process and off-process coupling */ 6428 /* --------------------------------------------------------------------------------*/ 6429 PetscScalar *Aa,*Ba; 6430 MatType rtype; 6431 Mat_SeqAIJ *a,*b; 6432 PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 6433 PetscCall(PetscCalloc1(Bnnz,&Ba)); 6434 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6435 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6436 PetscCall(MatDestroy(&mpiaij->A)); 6437 PetscCall(MatDestroy(&mpiaij->B)); 6438 PetscCall(MatGetRootType_Private(mat,&rtype)); 6439 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 6440 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 6441 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6442 6443 a = (Mat_SeqAIJ*)mpiaij->A->data; 6444 b = (Mat_SeqAIJ*)mpiaij->B->data; 6445 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6446 a->free_a = b->free_a = PETSC_TRUE; 6447 a->free_ij = b->free_ij = PETSC_TRUE; 6448 6449 /* conversion must happen AFTER multiply setup */ 6450 PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 6451 PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 6452 PetscCall(VecDestroy(&mpiaij->lvec)); 6453 PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 6454 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6455 6456 mpiaij->coo_n = coo_n; 6457 mpiaij->coo_sf = sf2; 6458 mpiaij->sendlen = nleaves; 6459 mpiaij->recvlen = nroots; 6460 6461 mpiaij->Annz = Annz; 6462 mpiaij->Bnnz = Bnnz; 6463 6464 mpiaij->Annz2 = Annz2; 6465 mpiaij->Bnnz2 = Bnnz2; 6466 6467 mpiaij->Atot1 = Atot1; 6468 mpiaij->Atot2 = Atot2; 6469 mpiaij->Btot1 = Btot1; 6470 mpiaij->Btot2 = Btot2; 6471 6472 mpiaij->Ajmap1 = Ajmap1; 6473 mpiaij->Aperm1 = Aperm1; 6474 6475 mpiaij->Bjmap1 = Bjmap1; 6476 mpiaij->Bperm1 = Bperm1; 6477 6478 mpiaij->Aimap2 = Aimap2; 6479 mpiaij->Ajmap2 = Ajmap2; 6480 mpiaij->Aperm2 = Aperm2; 6481 6482 mpiaij->Bimap2 = Bimap2; 6483 mpiaij->Bjmap2 = Bjmap2; 6484 mpiaij->Bperm2 = Bperm2; 6485 6486 mpiaij->Cperm1 = Cperm1; 6487 6488 /* Allocate in preallocation. If not used, it has zero cost on host */ 6489 PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6490 PetscFunctionReturn(0); 6491 } 6492 6493 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6494 { 6495 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6496 Mat A = mpiaij->A,B = mpiaij->B; 6497 PetscCount Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2; 6498 PetscScalar *Aa,*Ba; 6499 PetscScalar *sendbuf = mpiaij->sendbuf; 6500 PetscScalar *recvbuf = mpiaij->recvbuf; 6501 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2; 6502 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2; 6503 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6504 const PetscCount *Cperm1 = mpiaij->Cperm1; 6505 6506 PetscFunctionBegin; 6507 PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 6508 PetscCall(MatSeqAIJGetArray(B,&Ba)); 6509 6510 /* Pack entries to be sent to remote */ 6511 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6512 6513 /* Send remote entries to their owner and overlap the communication with local computation */ 6514 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6515 /* Add local entries to A and B */ 6516 for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6517 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6518 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]]; 6519 Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum; 6520 } 6521 for (PetscCount i=0; i<Bnnz; i++) { 6522 PetscScalar sum = 0.0; 6523 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]]; 6524 Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum; 6525 } 6526 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6527 6528 /* Add received remote entries to A and B */ 6529 for (PetscCount i=0; i<Annz2; i++) { 6530 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6531 } 6532 for (PetscCount i=0; i<Bnnz2; i++) { 6533 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6534 } 6535 PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 6536 PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6537 PetscFunctionReturn(0); 6538 } 6539 6540 /* ----------------------------------------------------------------*/ 6541 6542 /*MC 6543 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6544 6545 Options Database Keys: 6546 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6547 6548 Level: beginner 6549 6550 Notes: 6551 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6552 in this case the values associated with the rows and columns one passes in are set to zero 6553 in the matrix 6554 6555 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6556 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6557 6558 .seealso: MatCreateAIJ() 6559 M*/ 6560 6561 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6562 { 6563 Mat_MPIAIJ *b; 6564 PetscMPIInt size; 6565 6566 PetscFunctionBegin; 6567 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 6568 6569 PetscCall(PetscNewLog(B,&b)); 6570 B->data = (void*)b; 6571 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6572 B->assembled = PETSC_FALSE; 6573 B->insertmode = NOT_SET_VALUES; 6574 b->size = size; 6575 6576 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6577 6578 /* build cache for off array entries formed */ 6579 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 6580 6581 b->donotstash = PETSC_FALSE; 6582 b->colmap = NULL; 6583 b->garray = NULL; 6584 b->roworiented = PETSC_TRUE; 6585 6586 /* stuff used for matrix vector multiply */ 6587 b->lvec = NULL; 6588 b->Mvctx = NULL; 6589 6590 /* stuff for MatGetRow() */ 6591 b->rowindices = NULL; 6592 b->rowvalues = NULL; 6593 b->getrowactive = PETSC_FALSE; 6594 6595 /* flexible pointer used in CUSPARSE classes */ 6596 b->spptr = NULL; 6597 6598 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6599 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 6600 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 6601 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 6602 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 6603 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 6604 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6605 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 6606 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 6607 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 6608 #if defined(PETSC_HAVE_CUDA) 6609 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6610 #endif 6611 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6612 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 6613 #endif 6614 #if defined(PETSC_HAVE_MKL_SPARSE) 6615 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6616 #endif 6617 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 6618 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 6619 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 6620 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 6621 #if defined(PETSC_HAVE_ELEMENTAL) 6622 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 6623 #endif 6624 #if defined(PETSC_HAVE_SCALAPACK) 6625 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6626 #endif 6627 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 6628 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 6629 #if defined(PETSC_HAVE_HYPRE) 6630 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 6631 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6632 #endif 6633 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 6634 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 6635 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 6636 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 6637 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6638 PetscFunctionReturn(0); 6639 } 6640 6641 /*@C 6642 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6643 and "off-diagonal" part of the matrix in CSR format. 6644 6645 Collective 6646 6647 Input Parameters: 6648 + comm - MPI communicator 6649 . m - number of local rows (Cannot be PETSC_DECIDE) 6650 . n - This value should be the same as the local size used in creating the 6651 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6652 calculated if N is given) For square matrices n is almost always m. 6653 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6654 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6655 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6656 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6657 . a - matrix values 6658 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6659 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6660 - oa - matrix values 6661 6662 Output Parameter: 6663 . mat - the matrix 6664 6665 Level: advanced 6666 6667 Notes: 6668 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6669 must free the arrays once the matrix has been destroyed and not before. 6670 6671 The i and j indices are 0 based 6672 6673 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6674 6675 This sets local rows and cannot be used to set off-processor values. 6676 6677 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6678 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6679 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6680 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6681 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6682 communication if it is known that only local entries will be set. 6683 6684 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6685 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6686 @*/ 6687 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6688 { 6689 Mat_MPIAIJ *maij; 6690 6691 PetscFunctionBegin; 6692 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6693 PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6694 PetscCheckFalse(oi[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6695 PetscCall(MatCreate(comm,mat)); 6696 PetscCall(MatSetSizes(*mat,m,n,M,N)); 6697 PetscCall(MatSetType(*mat,MATMPIAIJ)); 6698 maij = (Mat_MPIAIJ*) (*mat)->data; 6699 6700 (*mat)->preallocated = PETSC_TRUE; 6701 6702 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6703 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6704 6705 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 6706 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 6707 6708 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 6709 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 6710 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 6711 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 6712 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 6713 PetscFunctionReturn(0); 6714 } 6715 6716 typedef struct { 6717 Mat *mp; /* intermediate products */ 6718 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6719 PetscInt cp; /* number of intermediate products */ 6720 6721 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6722 PetscInt *startsj_s,*startsj_r; 6723 PetscScalar *bufa; 6724 Mat P_oth; 6725 6726 /* may take advantage of merging product->B */ 6727 Mat Bloc; /* B-local by merging diag and off-diag */ 6728 6729 /* cusparse does not have support to split between symbolic and numeric phases. 6730 When api_user is true, we don't need to update the numerical values 6731 of the temporary storage */ 6732 PetscBool reusesym; 6733 6734 /* support for COO values insertion */ 6735 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6736 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6737 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6738 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6739 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6740 PetscMemType mtype; 6741 6742 /* customization */ 6743 PetscBool abmerge; 6744 PetscBool P_oth_bind; 6745 } MatMatMPIAIJBACKEND; 6746 6747 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6748 { 6749 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6750 PetscInt i; 6751 6752 PetscFunctionBegin; 6753 PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 6754 PetscCall(PetscFree(mmdata->bufa)); 6755 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 6756 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 6757 PetscCall(MatDestroy(&mmdata->P_oth)); 6758 PetscCall(MatDestroy(&mmdata->Bloc)); 6759 PetscCall(PetscSFDestroy(&mmdata->sf)); 6760 for (i = 0; i < mmdata->cp; i++) { 6761 PetscCall(MatDestroy(&mmdata->mp[i])); 6762 } 6763 PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 6764 PetscCall(PetscFree(mmdata->own[0])); 6765 PetscCall(PetscFree(mmdata->own)); 6766 PetscCall(PetscFree(mmdata->off[0])); 6767 PetscCall(PetscFree(mmdata->off)); 6768 PetscCall(PetscFree(mmdata)); 6769 PetscFunctionReturn(0); 6770 } 6771 6772 /* Copy selected n entries with indices in idx[] of A to v[]. 6773 If idx is NULL, copy the whole data array of A to v[] 6774 */ 6775 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6776 { 6777 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6778 6779 PetscFunctionBegin; 6780 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6781 if (f) { 6782 PetscCall((*f)(A,n,idx,v)); 6783 } else { 6784 const PetscScalar *vv; 6785 6786 PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6787 if (n && idx) { 6788 PetscScalar *w = v; 6789 const PetscInt *oi = idx; 6790 PetscInt j; 6791 6792 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6793 } else { 6794 PetscCall(PetscArraycpy(v,vv,n)); 6795 } 6796 PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6797 } 6798 PetscFunctionReturn(0); 6799 } 6800 6801 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6802 { 6803 MatMatMPIAIJBACKEND *mmdata; 6804 PetscInt i,n_d,n_o; 6805 6806 PetscFunctionBegin; 6807 MatCheckProduct(C,1); 6808 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6809 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6810 if (!mmdata->reusesym) { /* update temporary matrices */ 6811 if (mmdata->P_oth) { 6812 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6813 } 6814 if (mmdata->Bloc) { 6815 PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 6816 } 6817 } 6818 mmdata->reusesym = PETSC_FALSE; 6819 6820 for (i = 0; i < mmdata->cp; i++) { 6821 PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6822 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6823 } 6824 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6825 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6826 6827 if (mmdata->mptmp[i]) continue; 6828 if (noff) { 6829 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6830 6831 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 6832 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 6833 n_o += noff; 6834 n_d += nown; 6835 } else { 6836 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6837 6838 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 6839 n_d += mm->nz; 6840 } 6841 } 6842 if (mmdata->hasoffproc) { /* offprocess insertion */ 6843 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6844 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6845 } 6846 PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 6847 PetscFunctionReturn(0); 6848 } 6849 6850 /* Support for Pt * A, A * P, or Pt * A * P */ 6851 #define MAX_NUMBER_INTERMEDIATE 4 6852 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6853 { 6854 Mat_Product *product = C->product; 6855 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6856 Mat_MPIAIJ *a,*p; 6857 MatMatMPIAIJBACKEND *mmdata; 6858 ISLocalToGlobalMapping P_oth_l2g = NULL; 6859 IS glob = NULL; 6860 const char *prefix; 6861 char pprefix[256]; 6862 const PetscInt *globidx,*P_oth_idx; 6863 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 6864 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 6865 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6866 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6867 /* a base offset; type-2: sparse with a local to global map table */ 6868 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6869 6870 MatProductType ptype; 6871 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6872 PetscMPIInt size; 6873 6874 PetscFunctionBegin; 6875 MatCheckProduct(C,1); 6876 PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6877 ptype = product->type; 6878 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6879 ptype = MATPRODUCT_AB; 6880 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6881 } 6882 switch (ptype) { 6883 case MATPRODUCT_AB: 6884 A = product->A; 6885 P = product->B; 6886 m = A->rmap->n; 6887 n = P->cmap->n; 6888 M = A->rmap->N; 6889 N = P->cmap->N; 6890 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6891 break; 6892 case MATPRODUCT_AtB: 6893 P = product->A; 6894 A = product->B; 6895 m = P->cmap->n; 6896 n = A->cmap->n; 6897 M = P->cmap->N; 6898 N = A->cmap->N; 6899 hasoffproc = PETSC_TRUE; 6900 break; 6901 case MATPRODUCT_PtAP: 6902 A = product->A; 6903 P = product->B; 6904 m = P->cmap->n; 6905 n = P->cmap->n; 6906 M = P->cmap->N; 6907 N = P->cmap->N; 6908 hasoffproc = PETSC_TRUE; 6909 break; 6910 default: 6911 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6912 } 6913 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 6914 if (size == 1) hasoffproc = PETSC_FALSE; 6915 6916 /* defaults */ 6917 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6918 mp[i] = NULL; 6919 mptmp[i] = PETSC_FALSE; 6920 rmapt[i] = -1; 6921 cmapt[i] = -1; 6922 rmapa[i] = NULL; 6923 cmapa[i] = NULL; 6924 } 6925 6926 /* customization */ 6927 PetscCall(PetscNew(&mmdata)); 6928 mmdata->reusesym = product->api_user; 6929 if (ptype == MATPRODUCT_AB) { 6930 if (product->api_user) { 6931 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 6932 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6933 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6934 PetscOptionsEnd(); 6935 } else { 6936 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 6937 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6938 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6939 PetscOptionsEnd(); 6940 } 6941 } else if (ptype == MATPRODUCT_PtAP) { 6942 if (product->api_user) { 6943 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 6944 PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6945 PetscOptionsEnd(); 6946 } else { 6947 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 6948 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6949 PetscOptionsEnd(); 6950 } 6951 } 6952 a = (Mat_MPIAIJ*)A->data; 6953 p = (Mat_MPIAIJ*)P->data; 6954 PetscCall(MatSetSizes(C,m,n,M,N)); 6955 PetscCall(PetscLayoutSetUp(C->rmap)); 6956 PetscCall(PetscLayoutSetUp(C->cmap)); 6957 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 6958 PetscCall(MatGetOptionsPrefix(C,&prefix)); 6959 6960 cp = 0; 6961 switch (ptype) { 6962 case MATPRODUCT_AB: /* A * P */ 6963 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6964 6965 /* A_diag * P_local (merged or not) */ 6966 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 6967 /* P is product->B */ 6968 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 6969 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 6970 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 6971 PetscCall(MatProductSetFill(mp[cp],product->fill)); 6972 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 6973 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 6974 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 6975 mp[cp]->product->api_user = product->api_user; 6976 PetscCall(MatProductSetFromOptions(mp[cp])); 6977 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6978 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 6979 PetscCall(ISGetIndices(glob,&globidx)); 6980 rmapt[cp] = 1; 6981 cmapt[cp] = 2; 6982 cmapa[cp] = globidx; 6983 mptmp[cp] = PETSC_FALSE; 6984 cp++; 6985 } else { /* A_diag * P_diag and A_diag * P_off */ 6986 PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 6987 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 6988 PetscCall(MatProductSetFill(mp[cp],product->fill)); 6989 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 6990 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 6991 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 6992 mp[cp]->product->api_user = product->api_user; 6993 PetscCall(MatProductSetFromOptions(mp[cp])); 6994 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6995 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 6996 rmapt[cp] = 1; 6997 cmapt[cp] = 1; 6998 mptmp[cp] = PETSC_FALSE; 6999 cp++; 7000 PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 7001 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7002 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7003 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7004 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7005 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7006 mp[cp]->product->api_user = product->api_user; 7007 PetscCall(MatProductSetFromOptions(mp[cp])); 7008 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7009 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7010 rmapt[cp] = 1; 7011 cmapt[cp] = 2; 7012 cmapa[cp] = p->garray; 7013 mptmp[cp] = PETSC_FALSE; 7014 cp++; 7015 } 7016 7017 /* A_off * P_other */ 7018 if (mmdata->P_oth) { 7019 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 7020 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7021 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7022 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7023 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7024 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7025 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7026 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7027 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7028 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7029 mp[cp]->product->api_user = product->api_user; 7030 PetscCall(MatProductSetFromOptions(mp[cp])); 7031 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7032 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7033 rmapt[cp] = 1; 7034 cmapt[cp] = 2; 7035 cmapa[cp] = P_oth_idx; 7036 mptmp[cp] = PETSC_FALSE; 7037 cp++; 7038 } 7039 break; 7040 7041 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7042 /* A is product->B */ 7043 PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7044 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7045 PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 7046 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7047 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7048 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7049 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7050 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7051 mp[cp]->product->api_user = product->api_user; 7052 PetscCall(MatProductSetFromOptions(mp[cp])); 7053 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7054 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7055 PetscCall(ISGetIndices(glob,&globidx)); 7056 rmapt[cp] = 2; 7057 rmapa[cp] = globidx; 7058 cmapt[cp] = 2; 7059 cmapa[cp] = globidx; 7060 mptmp[cp] = PETSC_FALSE; 7061 cp++; 7062 } else { 7063 PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 7064 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7065 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7066 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7067 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7068 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7069 mp[cp]->product->api_user = product->api_user; 7070 PetscCall(MatProductSetFromOptions(mp[cp])); 7071 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7072 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7073 PetscCall(ISGetIndices(glob,&globidx)); 7074 rmapt[cp] = 1; 7075 cmapt[cp] = 2; 7076 cmapa[cp] = globidx; 7077 mptmp[cp] = PETSC_FALSE; 7078 cp++; 7079 PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 7080 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7081 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7082 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7083 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7084 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7085 mp[cp]->product->api_user = product->api_user; 7086 PetscCall(MatProductSetFromOptions(mp[cp])); 7087 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7088 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7089 rmapt[cp] = 2; 7090 rmapa[cp] = p->garray; 7091 cmapt[cp] = 2; 7092 cmapa[cp] = globidx; 7093 mptmp[cp] = PETSC_FALSE; 7094 cp++; 7095 } 7096 break; 7097 case MATPRODUCT_PtAP: 7098 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7099 /* P is product->B */ 7100 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7101 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7102 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 7103 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7104 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7105 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7106 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7107 mp[cp]->product->api_user = product->api_user; 7108 PetscCall(MatProductSetFromOptions(mp[cp])); 7109 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7110 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7111 PetscCall(ISGetIndices(glob,&globidx)); 7112 rmapt[cp] = 2; 7113 rmapa[cp] = globidx; 7114 cmapt[cp] = 2; 7115 cmapa[cp] = globidx; 7116 mptmp[cp] = PETSC_FALSE; 7117 cp++; 7118 if (mmdata->P_oth) { 7119 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 7120 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7121 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7122 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7123 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7124 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7125 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7126 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7127 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7128 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7129 mp[cp]->product->api_user = product->api_user; 7130 PetscCall(MatProductSetFromOptions(mp[cp])); 7131 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7132 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7133 mptmp[cp] = PETSC_TRUE; 7134 cp++; 7135 PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 7136 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7137 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7138 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7139 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7140 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7141 mp[cp]->product->api_user = product->api_user; 7142 PetscCall(MatProductSetFromOptions(mp[cp])); 7143 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7144 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7145 rmapt[cp] = 2; 7146 rmapa[cp] = globidx; 7147 cmapt[cp] = 2; 7148 cmapa[cp] = P_oth_idx; 7149 mptmp[cp] = PETSC_FALSE; 7150 cp++; 7151 } 7152 break; 7153 default: 7154 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7155 } 7156 /* sanity check */ 7157 if (size > 1) for (i = 0; i < cp; i++) PetscCheckFalse(rmapt[i] == 2 && !hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7158 7159 PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7160 for (i = 0; i < cp; i++) { 7161 mmdata->mp[i] = mp[i]; 7162 mmdata->mptmp[i] = mptmp[i]; 7163 } 7164 mmdata->cp = cp; 7165 C->product->data = mmdata; 7166 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7167 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7168 7169 /* memory type */ 7170 mmdata->mtype = PETSC_MEMTYPE_HOST; 7171 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 7172 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7173 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7174 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7175 7176 /* prepare coo coordinates for values insertion */ 7177 7178 /* count total nonzeros of those intermediate seqaij Mats 7179 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7180 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7181 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7182 */ 7183 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7184 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7185 if (mptmp[cp]) continue; 7186 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7187 const PetscInt *rmap = rmapa[cp]; 7188 const PetscInt mr = mp[cp]->rmap->n; 7189 const PetscInt rs = C->rmap->rstart; 7190 const PetscInt re = C->rmap->rend; 7191 const PetscInt *ii = mm->i; 7192 for (i = 0; i < mr; i++) { 7193 const PetscInt gr = rmap[i]; 7194 const PetscInt nz = ii[i+1] - ii[i]; 7195 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7196 else ncoo_oown += nz; /* this row is local */ 7197 } 7198 } else ncoo_d += mm->nz; 7199 } 7200 7201 /* 7202 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7203 7204 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7205 7206 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7207 7208 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7209 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7210 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7211 7212 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7213 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7214 */ 7215 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 7216 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7217 7218 /* gather (i,j) of nonzeros inserted by remote procs */ 7219 if (hasoffproc) { 7220 PetscSF msf; 7221 PetscInt ncoo2,*coo_i2,*coo_j2; 7222 7223 PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 7224 PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 7225 PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7226 7227 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7228 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7229 PetscInt *idxoff = mmdata->off[cp]; 7230 PetscInt *idxown = mmdata->own[cp]; 7231 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7232 const PetscInt *rmap = rmapa[cp]; 7233 const PetscInt *cmap = cmapa[cp]; 7234 const PetscInt *ii = mm->i; 7235 PetscInt *coi = coo_i + ncoo_o; 7236 PetscInt *coj = coo_j + ncoo_o; 7237 const PetscInt mr = mp[cp]->rmap->n; 7238 const PetscInt rs = C->rmap->rstart; 7239 const PetscInt re = C->rmap->rend; 7240 const PetscInt cs = C->cmap->rstart; 7241 for (i = 0; i < mr; i++) { 7242 const PetscInt *jj = mm->j + ii[i]; 7243 const PetscInt gr = rmap[i]; 7244 const PetscInt nz = ii[i+1] - ii[i]; 7245 if (gr < rs || gr >= re) { /* this is an offproc row */ 7246 for (j = ii[i]; j < ii[i+1]; j++) { 7247 *coi++ = gr; 7248 *idxoff++ = j; 7249 } 7250 if (!cmapt[cp]) { /* already global */ 7251 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7252 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7253 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7254 } else { /* offdiag */ 7255 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7256 } 7257 ncoo_o += nz; 7258 } else { /* this is a local row */ 7259 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7260 } 7261 } 7262 } 7263 mmdata->off[cp + 1] = idxoff; 7264 mmdata->own[cp + 1] = idxown; 7265 } 7266 7267 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7268 PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 7269 PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 7270 PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 7271 ncoo = ncoo_d + ncoo_oown + ncoo2; 7272 PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 7273 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7274 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 7275 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7276 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7277 PetscCall(PetscFree2(coo_i,coo_j)); 7278 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7279 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 7280 coo_i = coo_i2; 7281 coo_j = coo_j2; 7282 } else { /* no offproc values insertion */ 7283 ncoo = ncoo_d; 7284 PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7285 7286 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7287 PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 7288 PetscCall(PetscSFSetUp(mmdata->sf)); 7289 } 7290 mmdata->hasoffproc = hasoffproc; 7291 7292 /* gather (i,j) of nonzeros inserted locally */ 7293 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7294 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7295 PetscInt *coi = coo_i + ncoo_d; 7296 PetscInt *coj = coo_j + ncoo_d; 7297 const PetscInt *jj = mm->j; 7298 const PetscInt *ii = mm->i; 7299 const PetscInt *cmap = cmapa[cp]; 7300 const PetscInt *rmap = rmapa[cp]; 7301 const PetscInt mr = mp[cp]->rmap->n; 7302 const PetscInt rs = C->rmap->rstart; 7303 const PetscInt re = C->rmap->rend; 7304 const PetscInt cs = C->cmap->rstart; 7305 7306 if (mptmp[cp]) continue; 7307 if (rmapt[cp] == 1) { /* consecutive rows */ 7308 /* fill coo_i */ 7309 for (i = 0; i < mr; i++) { 7310 const PetscInt gr = i + rs; 7311 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7312 } 7313 /* fill coo_j */ 7314 if (!cmapt[cp]) { /* type-0, already global */ 7315 PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7316 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7317 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7318 } else { /* type-2, local to global for sparse columns */ 7319 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7320 } 7321 ncoo_d += mm->nz; 7322 } else if (rmapt[cp] == 2) { /* sparse rows */ 7323 for (i = 0; i < mr; i++) { 7324 const PetscInt *jj = mm->j + ii[i]; 7325 const PetscInt gr = rmap[i]; 7326 const PetscInt nz = ii[i+1] - ii[i]; 7327 if (gr >= rs && gr < re) { /* local rows */ 7328 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7329 if (!cmapt[cp]) { /* type-0, already global */ 7330 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7331 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7332 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7333 } else { /* type-2, local to global for sparse columns */ 7334 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7335 } 7336 ncoo_d += nz; 7337 } 7338 } 7339 } 7340 } 7341 if (glob) { 7342 PetscCall(ISRestoreIndices(glob,&globidx)); 7343 } 7344 PetscCall(ISDestroy(&glob)); 7345 if (P_oth_l2g) { 7346 PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 7347 } 7348 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7349 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7350 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 7351 7352 /* preallocate with COO data */ 7353 PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 7354 PetscCall(PetscFree2(coo_i,coo_j)); 7355 PetscFunctionReturn(0); 7356 } 7357 7358 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7359 { 7360 Mat_Product *product = mat->product; 7361 #if defined(PETSC_HAVE_DEVICE) 7362 PetscBool match = PETSC_FALSE; 7363 PetscBool usecpu = PETSC_FALSE; 7364 #else 7365 PetscBool match = PETSC_TRUE; 7366 #endif 7367 7368 PetscFunctionBegin; 7369 MatCheckProduct(mat,1); 7370 #if defined(PETSC_HAVE_DEVICE) 7371 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7372 PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 7373 } 7374 if (match) { /* we can always fallback to the CPU if requested */ 7375 switch (product->type) { 7376 case MATPRODUCT_AB: 7377 if (product->api_user) { 7378 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat"); 7379 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7380 PetscOptionsEnd(); 7381 } else { 7382 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat"); 7383 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7384 PetscOptionsEnd(); 7385 } 7386 break; 7387 case MATPRODUCT_AtB: 7388 if (product->api_user) { 7389 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat"); 7390 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7391 PetscOptionsEnd(); 7392 } else { 7393 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat"); 7394 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7395 PetscOptionsEnd(); 7396 } 7397 break; 7398 case MATPRODUCT_PtAP: 7399 if (product->api_user) { 7400 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat"); 7401 PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7402 PetscOptionsEnd(); 7403 } else { 7404 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat"); 7405 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7406 PetscOptionsEnd(); 7407 } 7408 break; 7409 default: 7410 break; 7411 } 7412 match = (PetscBool)!usecpu; 7413 } 7414 #endif 7415 if (match) { 7416 switch (product->type) { 7417 case MATPRODUCT_AB: 7418 case MATPRODUCT_AtB: 7419 case MATPRODUCT_PtAP: 7420 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7421 break; 7422 default: 7423 break; 7424 } 7425 } 7426 /* fallback to MPIAIJ ops */ 7427 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7428 PetscFunctionReturn(0); 7429 } 7430 7431 /* 7432 Special version for direct calls from Fortran 7433 */ 7434 #include <petsc/private/fortranimpl.h> 7435 7436 /* Change these macros so can be used in void function */ 7437 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7438 #undef PetscCall 7439 #define PetscCall(...) do { \ 7440 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7441 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7442 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7443 return; \ 7444 } \ 7445 } while (0) 7446 7447 #undef SETERRQ 7448 #define SETERRQ(comm,ierr,...) do { \ 7449 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 7450 return; \ 7451 } while (0) 7452 7453 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7454 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7455 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7456 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7457 #else 7458 #endif 7459 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 7460 { 7461 Mat mat = *mmat; 7462 PetscInt m = *mm, n = *mn; 7463 InsertMode addv = *maddv; 7464 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 7465 PetscScalar value; 7466 7467 MatCheckPreallocated(mat,1); 7468 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 7469 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 7470 { 7471 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 7472 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 7473 PetscBool roworiented = aij->roworiented; 7474 7475 /* Some Variables required in the macro */ 7476 Mat A = aij->A; 7477 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 7478 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 7479 MatScalar *aa; 7480 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 7481 Mat B = aij->B; 7482 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 7483 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 7484 MatScalar *ba; 7485 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 7486 * cannot use "#if defined" inside a macro. */ 7487 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 7488 7489 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 7490 PetscInt nonew = a->nonew; 7491 MatScalar *ap1,*ap2; 7492 7493 PetscFunctionBegin; 7494 PetscCall(MatSeqAIJGetArray(A,&aa)); 7495 PetscCall(MatSeqAIJGetArray(B,&ba)); 7496 for (i=0; i<m; i++) { 7497 if (im[i] < 0) continue; 7498 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 7499 if (im[i] >= rstart && im[i] < rend) { 7500 row = im[i] - rstart; 7501 lastcol1 = -1; 7502 rp1 = aj + ai[row]; 7503 ap1 = aa + ai[row]; 7504 rmax1 = aimax[row]; 7505 nrow1 = ailen[row]; 7506 low1 = 0; 7507 high1 = nrow1; 7508 lastcol2 = -1; 7509 rp2 = bj + bi[row]; 7510 ap2 = ba + bi[row]; 7511 rmax2 = bimax[row]; 7512 nrow2 = bilen[row]; 7513 low2 = 0; 7514 high2 = nrow2; 7515 7516 for (j=0; j<n; j++) { 7517 if (roworiented) value = v[i*n+j]; 7518 else value = v[i+j*m]; 7519 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 7520 if (in[j] >= cstart && in[j] < cend) { 7521 col = in[j] - cstart; 7522 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 7523 } else if (in[j] < 0) continue; 7524 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 7525 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 7526 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 7527 } else { 7528 if (mat->was_assembled) { 7529 if (!aij->colmap) { 7530 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 7531 } 7532 #if defined(PETSC_USE_CTABLE) 7533 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 7534 col--; 7535 #else 7536 col = aij->colmap[in[j]] - 1; 7537 #endif 7538 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 7539 PetscCall(MatDisAssemble_MPIAIJ(mat)); 7540 col = in[j]; 7541 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 7542 B = aij->B; 7543 b = (Mat_SeqAIJ*)B->data; 7544 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 7545 rp2 = bj + bi[row]; 7546 ap2 = ba + bi[row]; 7547 rmax2 = bimax[row]; 7548 nrow2 = bilen[row]; 7549 low2 = 0; 7550 high2 = nrow2; 7551 bm = aij->B->rmap->n; 7552 ba = b->a; 7553 inserted = PETSC_FALSE; 7554 } 7555 } else col = in[j]; 7556 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 7557 } 7558 } 7559 } else if (!aij->donotstash) { 7560 if (roworiented) { 7561 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7562 } else { 7563 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7564 } 7565 } 7566 } 7567 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 7568 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 7569 } 7570 PetscFunctionReturnVoid(); 7571 } 7572 /* Undefining these here since they were redefined from their original definition above! No 7573 * other PETSc functions should be defined past this point, as it is impossible to recover the 7574 * original definitions */ 7575 #undef PetscCall 7576 #undef SETERRQ 7577