1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B)); 15 PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B)); 16 PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 17 PetscCall(MatDestroy(&B)); 18 PetscFunctionReturn(0); 19 } 20 21 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 22 { 23 Mat B; 24 25 PetscFunctionBegin; 26 PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B)); 27 PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 28 PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",NULL)); 29 PetscFunctionReturn(0); 30 } 31 32 /*MC 33 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 34 35 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 36 and MATMPIAIJ otherwise. As a result, for single process communicators, 37 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 38 for communicators controlling multiple processes. It is recommended that you call both of 39 the above preallocation routines for simplicity. 40 41 Options Database Keys: 42 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 43 44 Developer Notes: 45 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 46 enough exist. 47 48 Level: beginner 49 50 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 51 M*/ 52 53 /*MC 54 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 55 56 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 57 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 58 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 59 for communicators controlling multiple processes. It is recommended that you call both of 60 the above preallocation routines for simplicity. 61 62 Options Database Keys: 63 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 64 65 Level: beginner 66 67 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 68 M*/ 69 70 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 71 { 72 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 73 74 PetscFunctionBegin; 75 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 76 A->boundtocpu = flg; 77 #endif 78 if (a->A) PetscCall(MatBindToCPU(a->A,flg)); 79 if (a->B) PetscCall(MatBindToCPU(a->B,flg)); 80 81 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 82 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 83 * to differ from the parent matrix. */ 84 if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg)); 85 if (a->diag) PetscCall(VecBindToCPU(a->diag,flg)); 86 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 91 { 92 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 93 94 PetscFunctionBegin; 95 if (mat->A) { 96 PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 97 PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 98 } 99 PetscFunctionReturn(0); 100 } 101 102 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 103 { 104 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 105 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 106 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 107 const PetscInt *ia,*ib; 108 const MatScalar *aa,*bb,*aav,*bav; 109 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 110 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 111 112 PetscFunctionBegin; 113 *keptrows = NULL; 114 115 ia = a->i; 116 ib = b->i; 117 PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 118 PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 119 for (i=0; i<m; i++) { 120 na = ia[i+1] - ia[i]; 121 nb = ib[i+1] - ib[i]; 122 if (!na && !nb) { 123 cnt++; 124 goto ok1; 125 } 126 aa = aav + ia[i]; 127 for (j=0; j<na; j++) { 128 if (aa[j] != 0.0) goto ok1; 129 } 130 bb = bav + ib[i]; 131 for (j=0; j <nb; j++) { 132 if (bb[j] != 0.0) goto ok1; 133 } 134 cnt++; 135 ok1:; 136 } 137 PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 138 if (!n0rows) { 139 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 140 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 141 PetscFunctionReturn(0); 142 } 143 PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 144 cnt = 0; 145 for (i=0; i<m; i++) { 146 na = ia[i+1] - ia[i]; 147 nb = ib[i+1] - ib[i]; 148 if (!na && !nb) continue; 149 aa = aav + ia[i]; 150 for (j=0; j<na;j++) { 151 if (aa[j] != 0.0) { 152 rows[cnt++] = rstart + i; 153 goto ok2; 154 } 155 } 156 bb = bav + ib[i]; 157 for (j=0; j<nb; j++) { 158 if (bb[j] != 0.0) { 159 rows[cnt++] = rstart + i; 160 goto ok2; 161 } 162 } 163 ok2:; 164 } 165 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 166 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 167 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 172 { 173 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 174 PetscBool cong; 175 176 PetscFunctionBegin; 177 PetscCall(MatHasCongruentLayouts(Y,&cong)); 178 if (Y->assembled && cong) { 179 PetscCall(MatDiagonalSet(aij->A,D,is)); 180 } else { 181 PetscCall(MatDiagonalSet_Default(Y,D,is)); 182 } 183 PetscFunctionReturn(0); 184 } 185 186 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 189 PetscInt i,rstart,nrows,*rows; 190 191 PetscFunctionBegin; 192 *zrows = NULL; 193 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 194 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 195 for (i=0; i<nrows; i++) rows[i] += rstart; 196 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 197 PetscFunctionReturn(0); 198 } 199 200 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 201 { 202 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 203 PetscInt i,m,n,*garray = aij->garray; 204 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 205 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 206 PetscReal *work; 207 const PetscScalar *dummy; 208 209 PetscFunctionBegin; 210 PetscCall(MatGetSize(A,&m,&n)); 211 PetscCall(PetscCalloc1(n,&work)); 212 PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 213 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 214 PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 215 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 216 if (type == NORM_2) { 217 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 218 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 219 } 220 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 221 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 222 } 223 } else if (type == NORM_1) { 224 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 225 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 226 } 227 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 228 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 229 } 230 } else if (type == NORM_INFINITY) { 231 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 232 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 233 } 234 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 235 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 236 } 237 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 238 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 239 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 240 } 241 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 242 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 243 } 244 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 245 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 246 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 247 } 248 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 249 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 250 } 251 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 252 if (type == NORM_INFINITY) { 253 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 254 } else { 255 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 256 } 257 PetscCall(PetscFree(work)); 258 if (type == NORM_2) { 259 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 260 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 261 for (i=0; i<n; i++) reductions[i] /= m; 262 } 263 PetscFunctionReturn(0); 264 } 265 266 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 267 { 268 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 269 IS sis,gis; 270 const PetscInt *isis,*igis; 271 PetscInt n,*iis,nsis,ngis,rstart,i; 272 273 PetscFunctionBegin; 274 PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 275 PetscCall(MatFindNonzeroRows(a->B,&gis)); 276 PetscCall(ISGetSize(gis,&ngis)); 277 PetscCall(ISGetSize(sis,&nsis)); 278 PetscCall(ISGetIndices(sis,&isis)); 279 PetscCall(ISGetIndices(gis,&igis)); 280 281 PetscCall(PetscMalloc1(ngis+nsis,&iis)); 282 PetscCall(PetscArraycpy(iis,igis,ngis)); 283 PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 284 n = ngis + nsis; 285 PetscCall(PetscSortRemoveDupsInt(&n,iis)); 286 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 287 for (i=0; i<n; i++) iis[i] += rstart; 288 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 289 290 PetscCall(ISRestoreIndices(sis,&isis)); 291 PetscCall(ISRestoreIndices(gis,&igis)); 292 PetscCall(ISDestroy(&sis)); 293 PetscCall(ISDestroy(&gis)); 294 PetscFunctionReturn(0); 295 } 296 297 /* 298 Local utility routine that creates a mapping from the global column 299 number to the local number in the off-diagonal part of the local 300 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 301 a slightly higher hash table cost; without it it is not scalable (each processor 302 has an order N integer array but is fast to access. 303 */ 304 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 305 { 306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 307 PetscInt n = aij->B->cmap->n,i; 308 309 PetscFunctionBegin; 310 PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 311 #if defined(PETSC_USE_CTABLE) 312 PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 313 for (i=0; i<n; i++) { 314 PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 315 } 316 #else 317 PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 318 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 319 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 320 #endif 321 PetscFunctionReturn(0); 322 } 323 324 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 325 { \ 326 if (col <= lastcol1) low1 = 0; \ 327 else high1 = nrow1; \ 328 lastcol1 = col;\ 329 while (high1-low1 > 5) { \ 330 t = (low1+high1)/2; \ 331 if (rp1[t] > col) high1 = t; \ 332 else low1 = t; \ 333 } \ 334 for (_i=low1; _i<high1; _i++) { \ 335 if (rp1[_i] > col) break; \ 336 if (rp1[_i] == col) { \ 337 if (addv == ADD_VALUES) { \ 338 ap1[_i] += value; \ 339 /* Not sure LogFlops will slow dow the code or not */ \ 340 (void)PetscLogFlops(1.0); \ 341 } \ 342 else ap1[_i] = value; \ 343 goto a_noinsert; \ 344 } \ 345 } \ 346 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 347 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 348 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 349 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 350 N = nrow1++ - 1; a->nz++; high1++; \ 351 /* shift up all the later entries in this row */ \ 352 PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 353 PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 354 rp1[_i] = col; \ 355 ap1[_i] = value; \ 356 A->nonzerostate++;\ 357 a_noinsert: ; \ 358 ailen[row] = nrow1; \ 359 } 360 361 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 362 { \ 363 if (col <= lastcol2) low2 = 0; \ 364 else high2 = nrow2; \ 365 lastcol2 = col; \ 366 while (high2-low2 > 5) { \ 367 t = (low2+high2)/2; \ 368 if (rp2[t] > col) high2 = t; \ 369 else low2 = t; \ 370 } \ 371 for (_i=low2; _i<high2; _i++) { \ 372 if (rp2[_i] > col) break; \ 373 if (rp2[_i] == col) { \ 374 if (addv == ADD_VALUES) { \ 375 ap2[_i] += value; \ 376 (void)PetscLogFlops(1.0); \ 377 } \ 378 else ap2[_i] = value; \ 379 goto b_noinsert; \ 380 } \ 381 } \ 382 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 383 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 384 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 385 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 386 N = nrow2++ - 1; b->nz++; high2++; \ 387 /* shift up all the later entries in this row */ \ 388 PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 389 PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 390 rp2[_i] = col; \ 391 ap2[_i] = value; \ 392 B->nonzerostate++; \ 393 b_noinsert: ; \ 394 bilen[row] = nrow2; \ 395 } 396 397 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 398 { 399 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 400 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 401 PetscInt l,*garray = mat->garray,diag; 402 PetscScalar *aa,*ba; 403 404 PetscFunctionBegin; 405 /* code only works for square matrices A */ 406 407 /* find size of row to the left of the diagonal part */ 408 PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 409 row = row - diag; 410 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 411 if (garray[b->j[b->i[row]+l]] > diag) break; 412 } 413 if (l) { 414 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 415 PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 416 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 417 } 418 419 /* diagonal part */ 420 if (a->i[row+1]-a->i[row]) { 421 PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 422 PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 423 PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 424 } 425 426 /* right of diagonal part */ 427 if (b->i[row+1]-b->i[row]-l) { 428 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 429 PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 430 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 431 } 432 PetscFunctionReturn(0); 433 } 434 435 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 436 { 437 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 438 PetscScalar value = 0.0; 439 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 440 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 441 PetscBool roworiented = aij->roworiented; 442 443 /* Some Variables required in the macro */ 444 Mat A = aij->A; 445 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 446 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 447 PetscBool ignorezeroentries = a->ignorezeroentries; 448 Mat B = aij->B; 449 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 450 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 451 MatScalar *aa,*ba; 452 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 453 PetscInt nonew; 454 MatScalar *ap1,*ap2; 455 456 PetscFunctionBegin; 457 PetscCall(MatSeqAIJGetArray(A,&aa)); 458 PetscCall(MatSeqAIJGetArray(B,&ba)); 459 for (i=0; i<m; i++) { 460 if (im[i] < 0) continue; 461 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 462 if (im[i] >= rstart && im[i] < rend) { 463 row = im[i] - rstart; 464 lastcol1 = -1; 465 rp1 = aj + ai[row]; 466 ap1 = aa + ai[row]; 467 rmax1 = aimax[row]; 468 nrow1 = ailen[row]; 469 low1 = 0; 470 high1 = nrow1; 471 lastcol2 = -1; 472 rp2 = bj + bi[row]; 473 ap2 = ba + bi[row]; 474 rmax2 = bimax[row]; 475 nrow2 = bilen[row]; 476 low2 = 0; 477 high2 = nrow2; 478 479 for (j=0; j<n; j++) { 480 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 481 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 482 if (in[j] >= cstart && in[j] < cend) { 483 col = in[j] - cstart; 484 nonew = a->nonew; 485 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 486 } else if (in[j] < 0) { 487 continue; 488 } else { 489 PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 490 if (mat->was_assembled) { 491 if (!aij->colmap) { 492 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 493 } 494 #if defined(PETSC_USE_CTABLE) 495 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 496 col--; 497 #else 498 col = aij->colmap[in[j]] - 1; 499 #endif 500 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 501 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 502 col = in[j]; 503 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 504 B = aij->B; 505 b = (Mat_SeqAIJ*)B->data; 506 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 507 rp2 = bj + bi[row]; 508 ap2 = ba + bi[row]; 509 rmax2 = bimax[row]; 510 nrow2 = bilen[row]; 511 low2 = 0; 512 high2 = nrow2; 513 bm = aij->B->rmap->n; 514 ba = b->a; 515 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 516 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 517 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 518 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 519 } 520 } else col = in[j]; 521 nonew = b->nonew; 522 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 523 } 524 } 525 } else { 526 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 527 if (!aij->donotstash) { 528 mat->assembled = PETSC_FALSE; 529 if (roworiented) { 530 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 531 } else { 532 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 533 } 534 } 535 } 536 } 537 PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 538 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 539 PetscFunctionReturn(0); 540 } 541 542 /* 543 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 544 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 545 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 546 */ 547 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 548 { 549 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 550 Mat A = aij->A; /* diagonal part of the matrix */ 551 Mat B = aij->B; /* offdiagonal part of the matrix */ 552 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 553 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 554 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 555 PetscInt *ailen = a->ilen,*aj = a->j; 556 PetscInt *bilen = b->ilen,*bj = b->j; 557 PetscInt am = aij->A->rmap->n,j; 558 PetscInt diag_so_far = 0,dnz; 559 PetscInt offd_so_far = 0,onz; 560 561 PetscFunctionBegin; 562 /* Iterate over all rows of the matrix */ 563 for (j=0; j<am; j++) { 564 dnz = onz = 0; 565 /* Iterate over all non-zero columns of the current row */ 566 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 567 /* If column is in the diagonal */ 568 if (mat_j[col] >= cstart && mat_j[col] < cend) { 569 aj[diag_so_far++] = mat_j[col] - cstart; 570 dnz++; 571 } else { /* off-diagonal entries */ 572 bj[offd_so_far++] = mat_j[col]; 573 onz++; 574 } 575 } 576 ailen[j] = dnz; 577 bilen[j] = onz; 578 } 579 PetscFunctionReturn(0); 580 } 581 582 /* 583 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 584 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 585 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 586 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 587 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 588 */ 589 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 590 { 591 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 592 Mat A = aij->A; /* diagonal part of the matrix */ 593 Mat B = aij->B; /* offdiagonal part of the matrix */ 594 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 595 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 596 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 598 PetscInt *ailen = a->ilen,*aj = a->j; 599 PetscInt *bilen = b->ilen,*bj = b->j; 600 PetscInt am = aij->A->rmap->n,j; 601 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 602 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 603 PetscScalar *aa = a->a,*ba = b->a; 604 605 PetscFunctionBegin; 606 /* Iterate over all rows of the matrix */ 607 for (j=0; j<am; j++) { 608 dnz_row = onz_row = 0; 609 rowstart_offd = full_offd_i[j]; 610 rowstart_diag = full_diag_i[j]; 611 /* Iterate over all non-zero columns of the current row */ 612 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 613 /* If column is in the diagonal */ 614 if (mat_j[col] >= cstart && mat_j[col] < cend) { 615 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 616 aa[rowstart_diag+dnz_row] = mat_a[col]; 617 dnz_row++; 618 } else { /* off-diagonal entries */ 619 bj[rowstart_offd+onz_row] = mat_j[col]; 620 ba[rowstart_offd+onz_row] = mat_a[col]; 621 onz_row++; 622 } 623 } 624 ailen[j] = dnz_row; 625 bilen[j] = onz_row; 626 } 627 PetscFunctionReturn(0); 628 } 629 630 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 631 { 632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 633 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 634 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 635 636 PetscFunctionBegin; 637 for (i=0; i<m; i++) { 638 if (idxm[i] < 0) continue; /* negative row */ 639 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 640 if (idxm[i] >= rstart && idxm[i] < rend) { 641 row = idxm[i] - rstart; 642 for (j=0; j<n; j++) { 643 if (idxn[j] < 0) continue; /* negative column */ 644 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 645 if (idxn[j] >= cstart && idxn[j] < cend) { 646 col = idxn[j] - cstart; 647 PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 648 } else { 649 if (!aij->colmap) { 650 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 651 } 652 #if defined(PETSC_USE_CTABLE) 653 PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 654 col--; 655 #else 656 col = aij->colmap[idxn[j]] - 1; 657 #endif 658 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 659 else { 660 PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 661 } 662 } 663 } 664 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 665 } 666 PetscFunctionReturn(0); 667 } 668 669 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 670 { 671 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 672 PetscInt nstash,reallocs; 673 674 PetscFunctionBegin; 675 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 676 677 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 678 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 679 PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 680 PetscFunctionReturn(0); 681 } 682 683 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 684 { 685 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 686 PetscMPIInt n; 687 PetscInt i,j,rstart,ncols,flg; 688 PetscInt *row,*col; 689 PetscBool other_disassembled; 690 PetscScalar *val; 691 692 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 693 694 PetscFunctionBegin; 695 if (!aij->donotstash && !mat->nooffprocentries) { 696 while (1) { 697 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 698 if (!flg) break; 699 700 for (i=0; i<n;) { 701 /* Now identify the consecutive vals belonging to the same row */ 702 for (j=i,rstart=row[j]; j<n; j++) { 703 if (row[j] != rstart) break; 704 } 705 if (j < n) ncols = j-i; 706 else ncols = n-i; 707 /* Now assemble all these values with a single function call */ 708 PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 709 i = j; 710 } 711 } 712 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 713 } 714 #if defined(PETSC_HAVE_DEVICE) 715 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 716 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 717 if (mat->boundtocpu) { 718 PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 719 PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 720 } 721 #endif 722 PetscCall(MatAssemblyBegin(aij->A,mode)); 723 PetscCall(MatAssemblyEnd(aij->A,mode)); 724 725 /* determine if any processor has disassembled, if so we must 726 also disassemble ourself, in order that we may reassemble. */ 727 /* 728 if nonzero structure of submatrix B cannot change then we know that 729 no processor disassembled thus we can skip this stuff 730 */ 731 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 732 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 733 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 734 PetscCall(MatDisAssemble_MPIAIJ(mat)); 735 } 736 } 737 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 738 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 739 } 740 PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 741 #if defined(PETSC_HAVE_DEVICE) 742 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 743 #endif 744 PetscCall(MatAssemblyBegin(aij->B,mode)); 745 PetscCall(MatAssemblyEnd(aij->B,mode)); 746 747 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 748 749 aij->rowvalues = NULL; 750 751 PetscCall(VecDestroy(&aij->diag)); 752 753 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 754 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 755 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 756 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 757 } 758 #if defined(PETSC_HAVE_DEVICE) 759 mat->offloadmask = PETSC_OFFLOAD_BOTH; 760 #endif 761 PetscFunctionReturn(0); 762 } 763 764 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 765 { 766 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 767 768 PetscFunctionBegin; 769 PetscCall(MatZeroEntries(l->A)); 770 PetscCall(MatZeroEntries(l->B)); 771 PetscFunctionReturn(0); 772 } 773 774 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 775 { 776 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 777 PetscObjectState sA, sB; 778 PetscInt *lrows; 779 PetscInt r, len; 780 PetscBool cong, lch, gch; 781 782 PetscFunctionBegin; 783 /* get locally owned rows */ 784 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 785 PetscCall(MatHasCongruentLayouts(A,&cong)); 786 /* fix right hand side if needed */ 787 if (x && b) { 788 const PetscScalar *xx; 789 PetscScalar *bb; 790 791 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 792 PetscCall(VecGetArrayRead(x, &xx)); 793 PetscCall(VecGetArray(b, &bb)); 794 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 795 PetscCall(VecRestoreArrayRead(x, &xx)); 796 PetscCall(VecRestoreArray(b, &bb)); 797 } 798 799 sA = mat->A->nonzerostate; 800 sB = mat->B->nonzerostate; 801 802 if (diag != 0.0 && cong) { 803 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 804 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 805 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 806 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 807 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 808 PetscInt nnwA, nnwB; 809 PetscBool nnzA, nnzB; 810 811 nnwA = aijA->nonew; 812 nnwB = aijB->nonew; 813 nnzA = aijA->keepnonzeropattern; 814 nnzB = aijB->keepnonzeropattern; 815 if (!nnzA) { 816 PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 817 aijA->nonew = 0; 818 } 819 if (!nnzB) { 820 PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 821 aijB->nonew = 0; 822 } 823 /* Must zero here before the next loop */ 824 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 825 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 826 for (r = 0; r < len; ++r) { 827 const PetscInt row = lrows[r] + A->rmap->rstart; 828 if (row >= A->cmap->N) continue; 829 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 830 } 831 aijA->nonew = nnwA; 832 aijB->nonew = nnwB; 833 } else { 834 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 835 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 836 } 837 PetscCall(PetscFree(lrows)); 838 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 839 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 840 841 /* reduce nonzerostate */ 842 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 843 PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 844 if (gch) A->nonzerostate++; 845 PetscFunctionReturn(0); 846 } 847 848 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 849 { 850 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 851 PetscMPIInt n = A->rmap->n; 852 PetscInt i,j,r,m,len = 0; 853 PetscInt *lrows,*owners = A->rmap->range; 854 PetscMPIInt p = 0; 855 PetscSFNode *rrows; 856 PetscSF sf; 857 const PetscScalar *xx; 858 PetscScalar *bb,*mask,*aij_a; 859 Vec xmask,lmask; 860 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 861 const PetscInt *aj, *ii,*ridx; 862 PetscScalar *aa; 863 864 PetscFunctionBegin; 865 /* Create SF where leaves are input rows and roots are owned rows */ 866 PetscCall(PetscMalloc1(n, &lrows)); 867 for (r = 0; r < n; ++r) lrows[r] = -1; 868 PetscCall(PetscMalloc1(N, &rrows)); 869 for (r = 0; r < N; ++r) { 870 const PetscInt idx = rows[r]; 871 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 872 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 873 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 874 } 875 rrows[r].rank = p; 876 rrows[r].index = rows[r] - owners[p]; 877 } 878 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 879 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 880 /* Collect flags for rows to be zeroed */ 881 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 882 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 883 PetscCall(PetscSFDestroy(&sf)); 884 /* Compress and put in row numbers */ 885 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 886 /* zero diagonal part of matrix */ 887 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 888 /* handle off diagonal part of matrix */ 889 PetscCall(MatCreateVecs(A,&xmask,NULL)); 890 PetscCall(VecDuplicate(l->lvec,&lmask)); 891 PetscCall(VecGetArray(xmask,&bb)); 892 for (i=0; i<len; i++) bb[lrows[i]] = 1; 893 PetscCall(VecRestoreArray(xmask,&bb)); 894 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 895 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 896 PetscCall(VecDestroy(&xmask)); 897 if (x && b) { /* this code is buggy when the row and column layout don't match */ 898 PetscBool cong; 899 900 PetscCall(MatHasCongruentLayouts(A,&cong)); 901 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 902 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 903 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 904 PetscCall(VecGetArrayRead(l->lvec,&xx)); 905 PetscCall(VecGetArray(b,&bb)); 906 } 907 PetscCall(VecGetArray(lmask,&mask)); 908 /* remove zeroed rows of off diagonal matrix */ 909 PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 910 ii = aij->i; 911 for (i=0; i<len; i++) { 912 PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 913 } 914 /* loop over all elements of off process part of matrix zeroing removed columns*/ 915 if (aij->compressedrow.use) { 916 m = aij->compressedrow.nrows; 917 ii = aij->compressedrow.i; 918 ridx = aij->compressedrow.rindex; 919 for (i=0; i<m; i++) { 920 n = ii[i+1] - ii[i]; 921 aj = aij->j + ii[i]; 922 aa = aij_a + ii[i]; 923 924 for (j=0; j<n; j++) { 925 if (PetscAbsScalar(mask[*aj])) { 926 if (b) bb[*ridx] -= *aa*xx[*aj]; 927 *aa = 0.0; 928 } 929 aa++; 930 aj++; 931 } 932 ridx++; 933 } 934 } else { /* do not use compressed row format */ 935 m = l->B->rmap->n; 936 for (i=0; i<m; i++) { 937 n = ii[i+1] - ii[i]; 938 aj = aij->j + ii[i]; 939 aa = aij_a + ii[i]; 940 for (j=0; j<n; j++) { 941 if (PetscAbsScalar(mask[*aj])) { 942 if (b) bb[i] -= *aa*xx[*aj]; 943 *aa = 0.0; 944 } 945 aa++; 946 aj++; 947 } 948 } 949 } 950 if (x && b) { 951 PetscCall(VecRestoreArray(b,&bb)); 952 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 953 } 954 PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 955 PetscCall(VecRestoreArray(lmask,&mask)); 956 PetscCall(VecDestroy(&lmask)); 957 PetscCall(PetscFree(lrows)); 958 959 /* only change matrix nonzero state if pattern was allowed to be changed */ 960 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 961 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 962 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 963 } 964 PetscFunctionReturn(0); 965 } 966 967 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 968 { 969 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 970 PetscInt nt; 971 VecScatter Mvctx = a->Mvctx; 972 973 PetscFunctionBegin; 974 PetscCall(VecGetLocalSize(xx,&nt)); 975 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 976 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 977 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 978 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 979 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 980 PetscFunctionReturn(0); 981 } 982 983 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 987 PetscFunctionBegin; 988 PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 989 PetscFunctionReturn(0); 990 } 991 992 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 993 { 994 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 995 VecScatter Mvctx = a->Mvctx; 996 997 PetscFunctionBegin; 998 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 999 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 1000 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1001 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1006 { 1007 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1008 1009 PetscFunctionBegin; 1010 /* do nondiagonal part */ 1011 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1012 /* do local part */ 1013 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1014 /* add partial results together */ 1015 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1016 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1021 { 1022 MPI_Comm comm; 1023 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1024 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1025 IS Me,Notme; 1026 PetscInt M,N,first,last,*notme,i; 1027 PetscBool lf; 1028 PetscMPIInt size; 1029 1030 PetscFunctionBegin; 1031 /* Easy test: symmetric diagonal block */ 1032 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1033 PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 1034 PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1035 if (!*f) PetscFunctionReturn(0); 1036 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 1037 PetscCallMPI(MPI_Comm_size(comm,&size)); 1038 if (size == 1) PetscFunctionReturn(0); 1039 1040 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1041 PetscCall(MatGetSize(Amat,&M,&N)); 1042 PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 1043 PetscCall(PetscMalloc1(N-last+first,¬me)); 1044 for (i=0; i<first; i++) notme[i] = i; 1045 for (i=last; i<M; i++) notme[i-last+first] = i; 1046 PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 1047 PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 1048 PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 1049 Aoff = Aoffs[0]; 1050 PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 1051 Boff = Boffs[0]; 1052 PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 1053 PetscCall(MatDestroyMatrices(1,&Aoffs)); 1054 PetscCall(MatDestroyMatrices(1,&Boffs)); 1055 PetscCall(ISDestroy(&Me)); 1056 PetscCall(ISDestroy(&Notme)); 1057 PetscCall(PetscFree(notme)); 1058 PetscFunctionReturn(0); 1059 } 1060 1061 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1062 { 1063 PetscFunctionBegin; 1064 PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1065 PetscFunctionReturn(0); 1066 } 1067 1068 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1071 1072 PetscFunctionBegin; 1073 /* do nondiagonal part */ 1074 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1075 /* do local part */ 1076 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1077 /* add partial results together */ 1078 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1079 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 /* 1084 This only works correctly for square matrices where the subblock A->A is the 1085 diagonal block 1086 */ 1087 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1088 { 1089 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1090 1091 PetscFunctionBegin; 1092 PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1093 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1094 PetscCall(MatGetDiagonal(a->A,v)); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 1102 PetscFunctionBegin; 1103 PetscCall(MatScale(a->A,aa)); 1104 PetscCall(MatScale(a->B,aa)); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1109 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1110 { 1111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1112 1113 PetscFunctionBegin; 1114 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1115 PetscCall(PetscFree(aij->Aperm1)); 1116 PetscCall(PetscFree(aij->Bperm1)); 1117 PetscCall(PetscFree(aij->Ajmap1)); 1118 PetscCall(PetscFree(aij->Bjmap1)); 1119 1120 PetscCall(PetscFree(aij->Aimap2)); 1121 PetscCall(PetscFree(aij->Bimap2)); 1122 PetscCall(PetscFree(aij->Aperm2)); 1123 PetscCall(PetscFree(aij->Bperm2)); 1124 PetscCall(PetscFree(aij->Ajmap2)); 1125 PetscCall(PetscFree(aij->Bjmap2)); 1126 1127 PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 1128 PetscCall(PetscFree(aij->Cperm1)); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1133 { 1134 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1135 1136 PetscFunctionBegin; 1137 #if defined(PETSC_USE_LOG) 1138 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1139 #endif 1140 PetscCall(MatStashDestroy_Private(&mat->stash)); 1141 PetscCall(VecDestroy(&aij->diag)); 1142 PetscCall(MatDestroy(&aij->A)); 1143 PetscCall(MatDestroy(&aij->B)); 1144 #if defined(PETSC_USE_CTABLE) 1145 PetscCall(PetscTableDestroy(&aij->colmap)); 1146 #else 1147 PetscCall(PetscFree(aij->colmap)); 1148 #endif 1149 PetscCall(PetscFree(aij->garray)); 1150 PetscCall(VecDestroy(&aij->lvec)); 1151 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1152 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 1153 PetscCall(PetscFree(aij->ld)); 1154 1155 /* Free COO */ 1156 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1157 1158 PetscCall(PetscFree(mat->data)); 1159 1160 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1161 PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 1162 1163 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1164 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1166 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 1167 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 1169 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1171 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 1172 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 1173 #if defined(PETSC_HAVE_CUDA) 1174 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 1175 #endif 1176 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1177 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 1178 #endif 1179 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 1180 #if defined(PETSC_HAVE_ELEMENTAL) 1181 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 1182 #endif 1183 #if defined(PETSC_HAVE_SCALAPACK) 1184 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1185 #endif 1186 #if defined(PETSC_HAVE_HYPRE) 1187 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 1188 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 1189 #endif 1190 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 1192 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 1195 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 1196 #if defined(PETSC_HAVE_MKL_SPARSE) 1197 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 1198 #endif 1199 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 1200 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1201 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 1202 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 1203 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1208 { 1209 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1210 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1211 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1212 const PetscInt *garray = aij->garray; 1213 const PetscScalar *aa,*ba; 1214 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1215 PetscInt *rowlens; 1216 PetscInt *colidxs; 1217 PetscScalar *matvals; 1218 1219 PetscFunctionBegin; 1220 PetscCall(PetscViewerSetUp(viewer)); 1221 1222 M = mat->rmap->N; 1223 N = mat->cmap->N; 1224 m = mat->rmap->n; 1225 rs = mat->rmap->rstart; 1226 cs = mat->cmap->rstart; 1227 nz = A->nz + B->nz; 1228 1229 /* write matrix header */ 1230 header[0] = MAT_FILE_CLASSID; 1231 header[1] = M; header[2] = N; header[3] = nz; 1232 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1233 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1234 1235 /* fill in and store row lengths */ 1236 PetscCall(PetscMalloc1(m,&rowlens)); 1237 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1238 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1239 PetscCall(PetscFree(rowlens)); 1240 1241 /* fill in and store column indices */ 1242 PetscCall(PetscMalloc1(nz,&colidxs)); 1243 for (cnt=0, i=0; i<m; i++) { 1244 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1245 if (garray[B->j[jb]] > cs) break; 1246 colidxs[cnt++] = garray[B->j[jb]]; 1247 } 1248 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1249 colidxs[cnt++] = A->j[ja] + cs; 1250 for (; jb<B->i[i+1]; jb++) 1251 colidxs[cnt++] = garray[B->j[jb]]; 1252 } 1253 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1254 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 1255 PetscCall(PetscFree(colidxs)); 1256 1257 /* fill in and store nonzero values */ 1258 PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 1259 PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 1260 PetscCall(PetscMalloc1(nz,&matvals)); 1261 for (cnt=0, i=0; i<m; i++) { 1262 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1263 if (garray[B->j[jb]] > cs) break; 1264 matvals[cnt++] = ba[jb]; 1265 } 1266 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1267 matvals[cnt++] = aa[ja]; 1268 for (; jb<B->i[i+1]; jb++) 1269 matvals[cnt++] = ba[jb]; 1270 } 1271 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 1272 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 1273 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1274 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 1275 PetscCall(PetscFree(matvals)); 1276 1277 /* write block size option to the viewer's .info file */ 1278 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #include <petscdraw.h> 1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1284 { 1285 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1286 PetscMPIInt rank = aij->rank,size = aij->size; 1287 PetscBool isdraw,iascii,isbinary; 1288 PetscViewer sviewer; 1289 PetscViewerFormat format; 1290 1291 PetscFunctionBegin; 1292 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1293 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1294 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1295 if (iascii) { 1296 PetscCall(PetscViewerGetFormat(viewer,&format)); 1297 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1298 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1299 PetscCall(PetscMalloc1(size,&nz)); 1300 PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1301 for (i=0; i<(PetscInt)size; i++) { 1302 nmax = PetscMax(nmax,nz[i]); 1303 nmin = PetscMin(nmin,nz[i]); 1304 navg += nz[i]; 1305 } 1306 PetscCall(PetscFree(nz)); 1307 navg = navg/size; 1308 PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1309 PetscFunctionReturn(0); 1310 } 1311 PetscCall(PetscViewerGetFormat(viewer,&format)); 1312 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1313 MatInfo info; 1314 PetscInt *inodes=NULL; 1315 1316 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 1317 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 1318 PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 1319 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1320 if (!inodes) { 1321 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1322 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1323 } else { 1324 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1325 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1326 } 1327 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 1328 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1329 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 1330 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1331 PetscCall(PetscViewerFlush(viewer)); 1332 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1333 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 1334 PetscCall(VecScatterView(aij->Mvctx,viewer)); 1335 PetscFunctionReturn(0); 1336 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1337 PetscInt inodecount,inodelimit,*inodes; 1338 PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1339 if (inodes) { 1340 PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1341 } else { 1342 PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1343 } 1344 PetscFunctionReturn(0); 1345 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1346 PetscFunctionReturn(0); 1347 } 1348 } else if (isbinary) { 1349 if (size == 1) { 1350 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1351 PetscCall(MatView(aij->A,viewer)); 1352 } else { 1353 PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 1354 } 1355 PetscFunctionReturn(0); 1356 } else if (iascii && size == 1) { 1357 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1358 PetscCall(MatView(aij->A,viewer)); 1359 PetscFunctionReturn(0); 1360 } else if (isdraw) { 1361 PetscDraw draw; 1362 PetscBool isnull; 1363 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1364 PetscCall(PetscDrawIsNull(draw,&isnull)); 1365 if (isnull) PetscFunctionReturn(0); 1366 } 1367 1368 { /* assemble the entire matrix onto first processor */ 1369 Mat A = NULL, Av; 1370 IS isrow,iscol; 1371 1372 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1373 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1374 PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 1375 PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 1376 /* The commented code uses MatCreateSubMatrices instead */ 1377 /* 1378 Mat *AA, A = NULL, Av; 1379 IS isrow,iscol; 1380 1381 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1382 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1383 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1384 if (rank == 0) { 1385 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1386 A = AA[0]; 1387 Av = AA[0]; 1388 } 1389 PetscCall(MatDestroySubMatrices(1,&AA)); 1390 */ 1391 PetscCall(ISDestroy(&iscol)); 1392 PetscCall(ISDestroy(&isrow)); 1393 /* 1394 Everyone has to call to draw the matrix since the graphics waits are 1395 synchronized across all processors that share the PetscDraw object 1396 */ 1397 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1398 if (rank == 0) { 1399 if (((PetscObject)mat)->name) { 1400 PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 1401 } 1402 PetscCall(MatView_SeqAIJ(Av,sviewer)); 1403 } 1404 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1405 PetscCall(PetscViewerFlush(viewer)); 1406 PetscCall(MatDestroy(&A)); 1407 } 1408 PetscFunctionReturn(0); 1409 } 1410 1411 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1412 { 1413 PetscBool iascii,isdraw,issocket,isbinary; 1414 1415 PetscFunctionBegin; 1416 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1417 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1418 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1419 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1420 if (iascii || isdraw || isbinary || issocket) { 1421 PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1422 } 1423 PetscFunctionReturn(0); 1424 } 1425 1426 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1427 { 1428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1429 Vec bb1 = NULL; 1430 PetscBool hasop; 1431 1432 PetscFunctionBegin; 1433 if (flag == SOR_APPLY_UPPER) { 1434 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1439 PetscCall(VecDuplicate(bb,&bb1)); 1440 } 1441 1442 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1443 if (flag & SOR_ZERO_INITIAL_GUESS) { 1444 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1445 its--; 1446 } 1447 1448 while (its--) { 1449 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1450 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1451 1452 /* update rhs: bb1 = bb - B*x */ 1453 PetscCall(VecScale(mat->lvec,-1.0)); 1454 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1455 1456 /* local sweep */ 1457 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 1458 } 1459 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1460 if (flag & SOR_ZERO_INITIAL_GUESS) { 1461 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1462 its--; 1463 } 1464 while (its--) { 1465 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1466 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1467 1468 /* update rhs: bb1 = bb - B*x */ 1469 PetscCall(VecScale(mat->lvec,-1.0)); 1470 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1471 1472 /* local sweep */ 1473 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 1474 } 1475 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1476 if (flag & SOR_ZERO_INITIAL_GUESS) { 1477 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1478 its--; 1479 } 1480 while (its--) { 1481 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1482 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1483 1484 /* update rhs: bb1 = bb - B*x */ 1485 PetscCall(VecScale(mat->lvec,-1.0)); 1486 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1487 1488 /* local sweep */ 1489 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 1490 } 1491 } else if (flag & SOR_EISENSTAT) { 1492 Vec xx1; 1493 1494 PetscCall(VecDuplicate(bb,&xx1)); 1495 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1496 1497 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1498 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1499 if (!mat->diag) { 1500 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 1501 PetscCall(MatGetDiagonal(matin,mat->diag)); 1502 } 1503 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1504 if (hasop) { 1505 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1506 } else { 1507 PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1508 } 1509 PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1510 1511 PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1512 1513 /* local sweep */ 1514 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 1515 PetscCall(VecAXPY(xx,1.0,xx1)); 1516 PetscCall(VecDestroy(&xx1)); 1517 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1518 1519 PetscCall(VecDestroy(&bb1)); 1520 1521 matin->factorerrortype = mat->A->factorerrortype; 1522 PetscFunctionReturn(0); 1523 } 1524 1525 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1526 { 1527 Mat aA,aB,Aperm; 1528 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1529 PetscScalar *aa,*ba; 1530 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1531 PetscSF rowsf,sf; 1532 IS parcolp = NULL; 1533 PetscBool done; 1534 1535 PetscFunctionBegin; 1536 PetscCall(MatGetLocalSize(A,&m,&n)); 1537 PetscCall(ISGetIndices(rowp,&rwant)); 1538 PetscCall(ISGetIndices(colp,&cwant)); 1539 PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 1540 1541 /* Invert row permutation to find out where my rows should go */ 1542 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 1543 PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 1544 PetscCall(PetscSFSetFromOptions(rowsf)); 1545 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1546 PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1547 PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1548 1549 /* Invert column permutation to find out where my columns should go */ 1550 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1551 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 1552 PetscCall(PetscSFSetFromOptions(sf)); 1553 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1554 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1555 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1556 PetscCall(PetscSFDestroy(&sf)); 1557 1558 PetscCall(ISRestoreIndices(rowp,&rwant)); 1559 PetscCall(ISRestoreIndices(colp,&cwant)); 1560 PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 1561 1562 /* Find out where my gcols should go */ 1563 PetscCall(MatGetSize(aB,NULL,&ng)); 1564 PetscCall(PetscMalloc1(ng,&gcdest)); 1565 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1566 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 1567 PetscCall(PetscSFSetFromOptions(sf)); 1568 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1569 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1570 PetscCall(PetscSFDestroy(&sf)); 1571 1572 PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 1573 PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1574 PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1575 for (i=0; i<m; i++) { 1576 PetscInt row = rdest[i]; 1577 PetscMPIInt rowner; 1578 PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 1579 for (j=ai[i]; j<ai[i+1]; j++) { 1580 PetscInt col = cdest[aj[j]]; 1581 PetscMPIInt cowner; 1582 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 1583 if (rowner == cowner) dnnz[i]++; 1584 else onnz[i]++; 1585 } 1586 for (j=bi[i]; j<bi[i+1]; j++) { 1587 PetscInt col = gcdest[bj[j]]; 1588 PetscMPIInt cowner; 1589 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 1590 if (rowner == cowner) dnnz[i]++; 1591 else onnz[i]++; 1592 } 1593 } 1594 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1595 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1596 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1597 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1598 PetscCall(PetscSFDestroy(&rowsf)); 1599 1600 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 1601 PetscCall(MatSeqAIJGetArray(aA,&aa)); 1602 PetscCall(MatSeqAIJGetArray(aB,&ba)); 1603 for (i=0; i<m; i++) { 1604 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1605 PetscInt j0,rowlen; 1606 rowlen = ai[i+1] - ai[i]; 1607 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1608 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1609 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1610 } 1611 rowlen = bi[i+1] - bi[i]; 1612 for (j0=j=0; j<rowlen; j0=j) { 1613 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1614 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1615 } 1616 } 1617 PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 1618 PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 1619 PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1620 PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1621 PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 1622 PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 1623 PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 1624 PetscCall(PetscFree3(work,rdest,cdest)); 1625 PetscCall(PetscFree(gcdest)); 1626 if (parcolp) PetscCall(ISDestroy(&colp)); 1627 *B = Aperm; 1628 PetscFunctionReturn(0); 1629 } 1630 1631 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1632 { 1633 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1634 1635 PetscFunctionBegin; 1636 PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1637 if (ghosts) *ghosts = aij->garray; 1638 PetscFunctionReturn(0); 1639 } 1640 1641 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1642 { 1643 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1644 Mat A = mat->A,B = mat->B; 1645 PetscLogDouble isend[5],irecv[5]; 1646 1647 PetscFunctionBegin; 1648 info->block_size = 1.0; 1649 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1650 1651 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1652 isend[3] = info->memory; isend[4] = info->mallocs; 1653 1654 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1655 1656 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1657 isend[3] += info->memory; isend[4] += info->mallocs; 1658 if (flag == MAT_LOCAL) { 1659 info->nz_used = isend[0]; 1660 info->nz_allocated = isend[1]; 1661 info->nz_unneeded = isend[2]; 1662 info->memory = isend[3]; 1663 info->mallocs = isend[4]; 1664 } else if (flag == MAT_GLOBAL_MAX) { 1665 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1666 1667 info->nz_used = irecv[0]; 1668 info->nz_allocated = irecv[1]; 1669 info->nz_unneeded = irecv[2]; 1670 info->memory = irecv[3]; 1671 info->mallocs = irecv[4]; 1672 } else if (flag == MAT_GLOBAL_SUM) { 1673 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1674 1675 info->nz_used = irecv[0]; 1676 info->nz_allocated = irecv[1]; 1677 info->nz_unneeded = irecv[2]; 1678 info->memory = irecv[3]; 1679 info->mallocs = irecv[4]; 1680 } 1681 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1682 info->fill_ratio_needed = 0; 1683 info->factor_mallocs = 0; 1684 PetscFunctionReturn(0); 1685 } 1686 1687 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1688 { 1689 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1690 1691 PetscFunctionBegin; 1692 switch (op) { 1693 case MAT_NEW_NONZERO_LOCATIONS: 1694 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1695 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1696 case MAT_KEEP_NONZERO_PATTERN: 1697 case MAT_NEW_NONZERO_LOCATION_ERR: 1698 case MAT_USE_INODES: 1699 case MAT_IGNORE_ZERO_ENTRIES: 1700 case MAT_FORM_EXPLICIT_TRANSPOSE: 1701 MatCheckPreallocated(A,1); 1702 PetscCall(MatSetOption(a->A,op,flg)); 1703 PetscCall(MatSetOption(a->B,op,flg)); 1704 break; 1705 case MAT_ROW_ORIENTED: 1706 MatCheckPreallocated(A,1); 1707 a->roworiented = flg; 1708 1709 PetscCall(MatSetOption(a->A,op,flg)); 1710 PetscCall(MatSetOption(a->B,op,flg)); 1711 break; 1712 case MAT_FORCE_DIAGONAL_ENTRIES: 1713 case MAT_SORTED_FULL: 1714 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1715 break; 1716 case MAT_IGNORE_OFF_PROC_ENTRIES: 1717 a->donotstash = flg; 1718 break; 1719 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1720 case MAT_SPD: 1721 case MAT_SYMMETRIC: 1722 case MAT_STRUCTURALLY_SYMMETRIC: 1723 case MAT_HERMITIAN: 1724 case MAT_SYMMETRY_ETERNAL: 1725 break; 1726 case MAT_SUBMAT_SINGLEIS: 1727 A->submat_singleis = flg; 1728 break; 1729 case MAT_STRUCTURE_ONLY: 1730 /* The option is handled directly by MatSetOption() */ 1731 break; 1732 default: 1733 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1734 } 1735 PetscFunctionReturn(0); 1736 } 1737 1738 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1739 { 1740 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1741 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1742 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1743 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1744 PetscInt *cmap,*idx_p; 1745 1746 PetscFunctionBegin; 1747 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1748 mat->getrowactive = PETSC_TRUE; 1749 1750 if (!mat->rowvalues && (idx || v)) { 1751 /* 1752 allocate enough space to hold information from the longest row. 1753 */ 1754 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1755 PetscInt max = 1,tmp; 1756 for (i=0; i<matin->rmap->n; i++) { 1757 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1758 if (max < tmp) max = tmp; 1759 } 1760 PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 1761 } 1762 1763 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1764 lrow = row - rstart; 1765 1766 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1767 if (!v) {pvA = NULL; pvB = NULL;} 1768 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1769 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1770 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1771 nztot = nzA + nzB; 1772 1773 cmap = mat->garray; 1774 if (v || idx) { 1775 if (nztot) { 1776 /* Sort by increasing column numbers, assuming A and B already sorted */ 1777 PetscInt imark = -1; 1778 if (v) { 1779 *v = v_p = mat->rowvalues; 1780 for (i=0; i<nzB; i++) { 1781 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1782 else break; 1783 } 1784 imark = i; 1785 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1786 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1787 } 1788 if (idx) { 1789 *idx = idx_p = mat->rowindices; 1790 if (imark > -1) { 1791 for (i=0; i<imark; i++) { 1792 idx_p[i] = cmap[cworkB[i]]; 1793 } 1794 } else { 1795 for (i=0; i<nzB; i++) { 1796 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1797 else break; 1798 } 1799 imark = i; 1800 } 1801 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1802 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1803 } 1804 } else { 1805 if (idx) *idx = NULL; 1806 if (v) *v = NULL; 1807 } 1808 } 1809 *nz = nztot; 1810 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1811 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1816 { 1817 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1818 1819 PetscFunctionBegin; 1820 PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1821 aij->getrowactive = PETSC_FALSE; 1822 PetscFunctionReturn(0); 1823 } 1824 1825 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1826 { 1827 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1828 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1829 PetscInt i,j,cstart = mat->cmap->rstart; 1830 PetscReal sum = 0.0; 1831 const MatScalar *v,*amata,*bmata; 1832 1833 PetscFunctionBegin; 1834 if (aij->size == 1) { 1835 PetscCall(MatNorm(aij->A,type,norm)); 1836 } else { 1837 PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 1838 PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 1839 if (type == NORM_FROBENIUS) { 1840 v = amata; 1841 for (i=0; i<amat->nz; i++) { 1842 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1843 } 1844 v = bmata; 1845 for (i=0; i<bmat->nz; i++) { 1846 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1847 } 1848 PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1849 *norm = PetscSqrtReal(*norm); 1850 PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 1851 } else if (type == NORM_1) { /* max column norm */ 1852 PetscReal *tmp,*tmp2; 1853 PetscInt *jj,*garray = aij->garray; 1854 PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 1855 PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 1856 *norm = 0.0; 1857 v = amata; jj = amat->j; 1858 for (j=0; j<amat->nz; j++) { 1859 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1860 } 1861 v = bmata; jj = bmat->j; 1862 for (j=0; j<bmat->nz; j++) { 1863 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1864 } 1865 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1866 for (j=0; j<mat->cmap->N; j++) { 1867 if (tmp2[j] > *norm) *norm = tmp2[j]; 1868 } 1869 PetscCall(PetscFree(tmp)); 1870 PetscCall(PetscFree(tmp2)); 1871 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1872 } else if (type == NORM_INFINITY) { /* max row norm */ 1873 PetscReal ntemp = 0.0; 1874 for (j=0; j<aij->A->rmap->n; j++) { 1875 v = amata + amat->i[j]; 1876 sum = 0.0; 1877 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1878 sum += PetscAbsScalar(*v); v++; 1879 } 1880 v = bmata + bmat->i[j]; 1881 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1882 sum += PetscAbsScalar(*v); v++; 1883 } 1884 if (sum > ntemp) ntemp = sum; 1885 } 1886 PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 1887 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1888 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1889 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 1890 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 1891 } 1892 PetscFunctionReturn(0); 1893 } 1894 1895 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1896 { 1897 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1898 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1899 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1900 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1901 Mat B,A_diag,*B_diag; 1902 const MatScalar *pbv,*bv; 1903 1904 PetscFunctionBegin; 1905 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1906 ai = Aloc->i; aj = Aloc->j; 1907 bi = Bloc->i; bj = Bloc->j; 1908 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1909 PetscInt *d_nnz,*g_nnz,*o_nnz; 1910 PetscSFNode *oloc; 1911 PETSC_UNUSED PetscSF sf; 1912 1913 PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 1914 /* compute d_nnz for preallocation */ 1915 PetscCall(PetscArrayzero(d_nnz,na)); 1916 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1917 /* compute local off-diagonal contributions */ 1918 PetscCall(PetscArrayzero(g_nnz,nb)); 1919 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1920 /* map those to global */ 1921 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1922 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 1923 PetscCall(PetscSFSetFromOptions(sf)); 1924 PetscCall(PetscArrayzero(o_nnz,na)); 1925 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1926 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1927 PetscCall(PetscSFDestroy(&sf)); 1928 1929 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1930 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1931 PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1932 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1933 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 1934 PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1935 } else { 1936 B = *matout; 1937 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1938 } 1939 1940 b = (Mat_MPIAIJ*)B->data; 1941 A_diag = a->A; 1942 B_diag = &b->A; 1943 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1944 A_diag_ncol = A_diag->cmap->N; 1945 B_diag_ilen = sub_B_diag->ilen; 1946 B_diag_i = sub_B_diag->i; 1947 1948 /* Set ilen for diagonal of B */ 1949 for (i=0; i<A_diag_ncol; i++) { 1950 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1951 } 1952 1953 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1954 very quickly (=without using MatSetValues), because all writes are local. */ 1955 PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1956 1957 /* copy over the B part */ 1958 PetscCall(PetscMalloc1(bi[mb],&cols)); 1959 PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1960 pbv = bv; 1961 row = A->rmap->rstart; 1962 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1963 cols_tmp = cols; 1964 for (i=0; i<mb; i++) { 1965 ncol = bi[i+1]-bi[i]; 1966 PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 1967 row++; 1968 pbv += ncol; cols_tmp += ncol; 1969 } 1970 PetscCall(PetscFree(cols)); 1971 PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1972 1973 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1974 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1975 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1976 *matout = B; 1977 } else { 1978 PetscCall(MatHeaderMerge(A,&B)); 1979 } 1980 PetscFunctionReturn(0); 1981 } 1982 1983 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1984 { 1985 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1986 Mat a = aij->A,b = aij->B; 1987 PetscInt s1,s2,s3; 1988 1989 PetscFunctionBegin; 1990 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1991 if (rr) { 1992 PetscCall(VecGetLocalSize(rr,&s1)); 1993 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1994 /* Overlap communication with computation. */ 1995 PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1996 } 1997 if (ll) { 1998 PetscCall(VecGetLocalSize(ll,&s1)); 1999 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2000 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 2001 } 2002 /* scale the diagonal block */ 2003 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 2004 2005 if (rr) { 2006 /* Do a scatter end and then right scale the off-diagonal block */ 2007 PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2008 PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 2009 } 2010 PetscFunctionReturn(0); 2011 } 2012 2013 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2014 { 2015 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2016 2017 PetscFunctionBegin; 2018 PetscCall(MatSetUnfactored(a->A)); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2023 { 2024 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2025 Mat a,b,c,d; 2026 PetscBool flg; 2027 2028 PetscFunctionBegin; 2029 a = matA->A; b = matA->B; 2030 c = matB->A; d = matB->B; 2031 2032 PetscCall(MatEqual(a,c,&flg)); 2033 if (flg) { 2034 PetscCall(MatEqual(b,d,&flg)); 2035 } 2036 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2037 PetscFunctionReturn(0); 2038 } 2039 2040 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2041 { 2042 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2043 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2044 2045 PetscFunctionBegin; 2046 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2047 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2048 /* because of the column compression in the off-processor part of the matrix a->B, 2049 the number of columns in a->B and b->B may be different, hence we cannot call 2050 the MatCopy() directly on the two parts. If need be, we can provide a more 2051 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2052 then copying the submatrices */ 2053 PetscCall(MatCopy_Basic(A,B,str)); 2054 } else { 2055 PetscCall(MatCopy(a->A,b->A,str)); 2056 PetscCall(MatCopy(a->B,b->B,str)); 2057 } 2058 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2059 PetscFunctionReturn(0); 2060 } 2061 2062 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2063 { 2064 PetscFunctionBegin; 2065 PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2066 PetscFunctionReturn(0); 2067 } 2068 2069 /* 2070 Computes the number of nonzeros per row needed for preallocation when X and Y 2071 have different nonzero structure. 2072 */ 2073 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2074 { 2075 PetscInt i,j,k,nzx,nzy; 2076 2077 PetscFunctionBegin; 2078 /* Set the number of nonzeros in the new matrix */ 2079 for (i=0; i<m; i++) { 2080 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2081 nzx = xi[i+1] - xi[i]; 2082 nzy = yi[i+1] - yi[i]; 2083 nnz[i] = 0; 2084 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2085 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2086 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2087 nnz[i]++; 2088 } 2089 for (; k<nzy; k++) nnz[i]++; 2090 } 2091 PetscFunctionReturn(0); 2092 } 2093 2094 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2095 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2096 { 2097 PetscInt m = Y->rmap->N; 2098 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2099 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2100 2101 PetscFunctionBegin; 2102 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2103 PetscFunctionReturn(0); 2104 } 2105 2106 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2107 { 2108 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2109 2110 PetscFunctionBegin; 2111 if (str == SAME_NONZERO_PATTERN) { 2112 PetscCall(MatAXPY(yy->A,a,xx->A,str)); 2113 PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2114 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2115 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2116 } else { 2117 Mat B; 2118 PetscInt *nnz_d,*nnz_o; 2119 2120 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 2121 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 2122 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2123 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2124 PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 2125 PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 2126 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 2127 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 2128 PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 2129 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2130 PetscCall(MatHeaderMerge(Y,&B)); 2131 PetscCall(PetscFree(nnz_d)); 2132 PetscCall(PetscFree(nnz_o)); 2133 } 2134 PetscFunctionReturn(0); 2135 } 2136 2137 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2138 2139 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2140 { 2141 PetscFunctionBegin; 2142 if (PetscDefined(USE_COMPLEX)) { 2143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2144 2145 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2146 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2147 } 2148 PetscFunctionReturn(0); 2149 } 2150 2151 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2152 { 2153 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2154 2155 PetscFunctionBegin; 2156 PetscCall(MatRealPart(a->A)); 2157 PetscCall(MatRealPart(a->B)); 2158 PetscFunctionReturn(0); 2159 } 2160 2161 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2162 { 2163 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2164 2165 PetscFunctionBegin; 2166 PetscCall(MatImaginaryPart(a->A)); 2167 PetscCall(MatImaginaryPart(a->B)); 2168 PetscFunctionReturn(0); 2169 } 2170 2171 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2172 { 2173 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2174 PetscInt i,*idxb = NULL,m = A->rmap->n; 2175 PetscScalar *va,*vv; 2176 Vec vB,vA; 2177 const PetscScalar *vb; 2178 2179 PetscFunctionBegin; 2180 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 2181 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2182 2183 PetscCall(VecGetArrayWrite(vA,&va)); 2184 if (idx) { 2185 for (i=0; i<m; i++) { 2186 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2187 } 2188 } 2189 2190 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 2191 PetscCall(PetscMalloc1(m,&idxb)); 2192 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2193 2194 PetscCall(VecGetArrayWrite(v,&vv)); 2195 PetscCall(VecGetArrayRead(vB,&vb)); 2196 for (i=0; i<m; i++) { 2197 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2198 vv[i] = vb[i]; 2199 if (idx) idx[i] = a->garray[idxb[i]]; 2200 } else { 2201 vv[i] = va[i]; 2202 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2203 idx[i] = a->garray[idxb[i]]; 2204 } 2205 } 2206 PetscCall(VecRestoreArrayWrite(vA,&vv)); 2207 PetscCall(VecRestoreArrayWrite(vA,&va)); 2208 PetscCall(VecRestoreArrayRead(vB,&vb)); 2209 PetscCall(PetscFree(idxb)); 2210 PetscCall(VecDestroy(&vA)); 2211 PetscCall(VecDestroy(&vB)); 2212 PetscFunctionReturn(0); 2213 } 2214 2215 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2216 { 2217 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2218 PetscInt m = A->rmap->n,n = A->cmap->n; 2219 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2220 PetscInt *cmap = mat->garray; 2221 PetscInt *diagIdx, *offdiagIdx; 2222 Vec diagV, offdiagV; 2223 PetscScalar *a, *diagA, *offdiagA; 2224 const PetscScalar *ba,*bav; 2225 PetscInt r,j,col,ncols,*bi,*bj; 2226 Mat B = mat->B; 2227 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2228 2229 PetscFunctionBegin; 2230 /* When a process holds entire A and other processes have no entry */ 2231 if (A->cmap->N == n) { 2232 PetscCall(VecGetArrayWrite(v,&diagA)); 2233 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2234 PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 2235 PetscCall(VecDestroy(&diagV)); 2236 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2237 PetscFunctionReturn(0); 2238 } else if (n == 0) { 2239 if (m) { 2240 PetscCall(VecGetArrayWrite(v,&a)); 2241 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2242 PetscCall(VecRestoreArrayWrite(v,&a)); 2243 } 2244 PetscFunctionReturn(0); 2245 } 2246 2247 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2248 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2249 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2250 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2251 2252 /* Get offdiagIdx[] for implicit 0.0 */ 2253 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2254 ba = bav; 2255 bi = b->i; 2256 bj = b->j; 2257 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2258 for (r = 0; r < m; r++) { 2259 ncols = bi[r+1] - bi[r]; 2260 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2261 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2262 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2263 offdiagA[r] = 0.0; 2264 2265 /* Find first hole in the cmap */ 2266 for (j=0; j<ncols; j++) { 2267 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2268 if (col > j && j < cstart) { 2269 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2270 break; 2271 } else if (col > j + n && j >= cstart) { 2272 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2273 break; 2274 } 2275 } 2276 if (j == ncols && ncols < A->cmap->N - n) { 2277 /* a hole is outside compressed Bcols */ 2278 if (ncols == 0) { 2279 if (cstart) { 2280 offdiagIdx[r] = 0; 2281 } else offdiagIdx[r] = cend; 2282 } else { /* ncols > 0 */ 2283 offdiagIdx[r] = cmap[ncols-1] + 1; 2284 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2285 } 2286 } 2287 } 2288 2289 for (j=0; j<ncols; j++) { 2290 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2291 ba++; bj++; 2292 } 2293 } 2294 2295 PetscCall(VecGetArrayWrite(v, &a)); 2296 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2297 for (r = 0; r < m; ++r) { 2298 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2299 a[r] = diagA[r]; 2300 if (idx) idx[r] = cstart + diagIdx[r]; 2301 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2302 a[r] = diagA[r]; 2303 if (idx) { 2304 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2305 idx[r] = cstart + diagIdx[r]; 2306 } else idx[r] = offdiagIdx[r]; 2307 } 2308 } else { 2309 a[r] = offdiagA[r]; 2310 if (idx) idx[r] = offdiagIdx[r]; 2311 } 2312 } 2313 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2314 PetscCall(VecRestoreArrayWrite(v, &a)); 2315 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2316 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2317 PetscCall(VecDestroy(&diagV)); 2318 PetscCall(VecDestroy(&offdiagV)); 2319 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2320 PetscFunctionReturn(0); 2321 } 2322 2323 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2324 { 2325 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2326 PetscInt m = A->rmap->n,n = A->cmap->n; 2327 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2328 PetscInt *cmap = mat->garray; 2329 PetscInt *diagIdx, *offdiagIdx; 2330 Vec diagV, offdiagV; 2331 PetscScalar *a, *diagA, *offdiagA; 2332 const PetscScalar *ba,*bav; 2333 PetscInt r,j,col,ncols,*bi,*bj; 2334 Mat B = mat->B; 2335 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2336 2337 PetscFunctionBegin; 2338 /* When a process holds entire A and other processes have no entry */ 2339 if (A->cmap->N == n) { 2340 PetscCall(VecGetArrayWrite(v,&diagA)); 2341 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2342 PetscCall(MatGetRowMin(mat->A,diagV,idx)); 2343 PetscCall(VecDestroy(&diagV)); 2344 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2345 PetscFunctionReturn(0); 2346 } else if (n == 0) { 2347 if (m) { 2348 PetscCall(VecGetArrayWrite(v,&a)); 2349 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2350 PetscCall(VecRestoreArrayWrite(v,&a)); 2351 } 2352 PetscFunctionReturn(0); 2353 } 2354 2355 PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 2356 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2357 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2358 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2359 2360 /* Get offdiagIdx[] for implicit 0.0 */ 2361 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2362 ba = bav; 2363 bi = b->i; 2364 bj = b->j; 2365 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2366 for (r = 0; r < m; r++) { 2367 ncols = bi[r+1] - bi[r]; 2368 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2369 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2370 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2371 offdiagA[r] = 0.0; 2372 2373 /* Find first hole in the cmap */ 2374 for (j=0; j<ncols; j++) { 2375 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2376 if (col > j && j < cstart) { 2377 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2378 break; 2379 } else if (col > j + n && j >= cstart) { 2380 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2381 break; 2382 } 2383 } 2384 if (j == ncols && ncols < A->cmap->N - n) { 2385 /* a hole is outside compressed Bcols */ 2386 if (ncols == 0) { 2387 if (cstart) { 2388 offdiagIdx[r] = 0; 2389 } else offdiagIdx[r] = cend; 2390 } else { /* ncols > 0 */ 2391 offdiagIdx[r] = cmap[ncols-1] + 1; 2392 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2393 } 2394 } 2395 } 2396 2397 for (j=0; j<ncols; j++) { 2398 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2399 ba++; bj++; 2400 } 2401 } 2402 2403 PetscCall(VecGetArrayWrite(v, &a)); 2404 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2405 for (r = 0; r < m; ++r) { 2406 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2407 a[r] = diagA[r]; 2408 if (idx) idx[r] = cstart + diagIdx[r]; 2409 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2410 a[r] = diagA[r]; 2411 if (idx) { 2412 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2413 idx[r] = cstart + diagIdx[r]; 2414 } else idx[r] = offdiagIdx[r]; 2415 } 2416 } else { 2417 a[r] = offdiagA[r]; 2418 if (idx) idx[r] = offdiagIdx[r]; 2419 } 2420 } 2421 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2422 PetscCall(VecRestoreArrayWrite(v, &a)); 2423 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2424 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2425 PetscCall(VecDestroy(&diagV)); 2426 PetscCall(VecDestroy(&offdiagV)); 2427 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2428 PetscFunctionReturn(0); 2429 } 2430 2431 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2432 { 2433 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2434 PetscInt m = A->rmap->n,n = A->cmap->n; 2435 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2436 PetscInt *cmap = mat->garray; 2437 PetscInt *diagIdx, *offdiagIdx; 2438 Vec diagV, offdiagV; 2439 PetscScalar *a, *diagA, *offdiagA; 2440 const PetscScalar *ba,*bav; 2441 PetscInt r,j,col,ncols,*bi,*bj; 2442 Mat B = mat->B; 2443 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2444 2445 PetscFunctionBegin; 2446 /* When a process holds entire A and other processes have no entry */ 2447 if (A->cmap->N == n) { 2448 PetscCall(VecGetArrayWrite(v,&diagA)); 2449 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2450 PetscCall(MatGetRowMax(mat->A,diagV,idx)); 2451 PetscCall(VecDestroy(&diagV)); 2452 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2453 PetscFunctionReturn(0); 2454 } else if (n == 0) { 2455 if (m) { 2456 PetscCall(VecGetArrayWrite(v,&a)); 2457 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2458 PetscCall(VecRestoreArrayWrite(v,&a)); 2459 } 2460 PetscFunctionReturn(0); 2461 } 2462 2463 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2464 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2465 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2466 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2467 2468 /* Get offdiagIdx[] for implicit 0.0 */ 2469 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2470 ba = bav; 2471 bi = b->i; 2472 bj = b->j; 2473 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2474 for (r = 0; r < m; r++) { 2475 ncols = bi[r+1] - bi[r]; 2476 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2477 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2478 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2479 offdiagA[r] = 0.0; 2480 2481 /* Find first hole in the cmap */ 2482 for (j=0; j<ncols; j++) { 2483 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2484 if (col > j && j < cstart) { 2485 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2486 break; 2487 } else if (col > j + n && j >= cstart) { 2488 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2489 break; 2490 } 2491 } 2492 if (j == ncols && ncols < A->cmap->N - n) { 2493 /* a hole is outside compressed Bcols */ 2494 if (ncols == 0) { 2495 if (cstart) { 2496 offdiagIdx[r] = 0; 2497 } else offdiagIdx[r] = cend; 2498 } else { /* ncols > 0 */ 2499 offdiagIdx[r] = cmap[ncols-1] + 1; 2500 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2501 } 2502 } 2503 } 2504 2505 for (j=0; j<ncols; j++) { 2506 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2507 ba++; bj++; 2508 } 2509 } 2510 2511 PetscCall(VecGetArrayWrite(v, &a)); 2512 PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 2513 for (r = 0; r < m; ++r) { 2514 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2515 a[r] = diagA[r]; 2516 if (idx) idx[r] = cstart + diagIdx[r]; 2517 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2518 a[r] = diagA[r]; 2519 if (idx) { 2520 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2521 idx[r] = cstart + diagIdx[r]; 2522 } else idx[r] = offdiagIdx[r]; 2523 } 2524 } else { 2525 a[r] = offdiagA[r]; 2526 if (idx) idx[r] = offdiagIdx[r]; 2527 } 2528 } 2529 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2530 PetscCall(VecRestoreArrayWrite(v, &a)); 2531 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2532 PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 2533 PetscCall(VecDestroy(&diagV)); 2534 PetscCall(VecDestroy(&offdiagV)); 2535 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2540 { 2541 Mat *dummy; 2542 2543 PetscFunctionBegin; 2544 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2545 *newmat = *dummy; 2546 PetscCall(PetscFree(dummy)); 2547 PetscFunctionReturn(0); 2548 } 2549 2550 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2551 { 2552 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2553 2554 PetscFunctionBegin; 2555 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2556 A->factorerrortype = a->A->factorerrortype; 2557 PetscFunctionReturn(0); 2558 } 2559 2560 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2561 { 2562 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2563 2564 PetscFunctionBegin; 2565 PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2566 PetscCall(MatSetRandom(aij->A,rctx)); 2567 if (x->assembled) { 2568 PetscCall(MatSetRandom(aij->B,rctx)); 2569 } else { 2570 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2571 } 2572 PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 2573 PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 2574 PetscFunctionReturn(0); 2575 } 2576 2577 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2578 { 2579 PetscFunctionBegin; 2580 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2581 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2582 PetscFunctionReturn(0); 2583 } 2584 2585 /*@ 2586 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2587 2588 Not collective 2589 2590 Input Parameter: 2591 . A - the matrix 2592 2593 Output Parameter: 2594 . nz - the number of nonzeros 2595 2596 Level: advanced 2597 2598 @*/ 2599 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A,PetscCount *nz) 2600 { 2601 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)A->data; 2602 Mat_SeqAIJ *aaij = (Mat_SeqAIJ*)maij->A->data, *baij = (Mat_SeqAIJ*)maij->B->data; 2603 2604 PetscFunctionBegin; 2605 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2606 PetscFunctionReturn(0); 2607 } 2608 2609 /*@ 2610 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2611 2612 Collective on Mat 2613 2614 Input Parameters: 2615 + A - the matrix 2616 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2617 2618 Level: advanced 2619 2620 @*/ 2621 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2622 { 2623 PetscFunctionBegin; 2624 PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2625 PetscFunctionReturn(0); 2626 } 2627 2628 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2629 { 2630 PetscBool sc = PETSC_FALSE,flg; 2631 2632 PetscFunctionBegin; 2633 PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options"); 2634 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2635 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2636 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2637 PetscOptionsHeadEnd(); 2638 PetscFunctionReturn(0); 2639 } 2640 2641 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2642 { 2643 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2644 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2645 2646 PetscFunctionBegin; 2647 if (!Y->preallocated) { 2648 PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2649 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2650 PetscInt nonew = aij->nonew; 2651 PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2652 aij->nonew = nonew; 2653 } 2654 PetscCall(MatShift_Basic(Y,a)); 2655 PetscFunctionReturn(0); 2656 } 2657 2658 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2659 { 2660 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2661 2662 PetscFunctionBegin; 2663 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2664 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2665 if (d) { 2666 PetscInt rstart; 2667 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2668 *d += rstart; 2669 2670 } 2671 PetscFunctionReturn(0); 2672 } 2673 2674 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2675 { 2676 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2677 2678 PetscFunctionBegin; 2679 PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2680 PetscFunctionReturn(0); 2681 } 2682 2683 /* -------------------------------------------------------------------*/ 2684 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2685 MatGetRow_MPIAIJ, 2686 MatRestoreRow_MPIAIJ, 2687 MatMult_MPIAIJ, 2688 /* 4*/ MatMultAdd_MPIAIJ, 2689 MatMultTranspose_MPIAIJ, 2690 MatMultTransposeAdd_MPIAIJ, 2691 NULL, 2692 NULL, 2693 NULL, 2694 /*10*/ NULL, 2695 NULL, 2696 NULL, 2697 MatSOR_MPIAIJ, 2698 MatTranspose_MPIAIJ, 2699 /*15*/ MatGetInfo_MPIAIJ, 2700 MatEqual_MPIAIJ, 2701 MatGetDiagonal_MPIAIJ, 2702 MatDiagonalScale_MPIAIJ, 2703 MatNorm_MPIAIJ, 2704 /*20*/ MatAssemblyBegin_MPIAIJ, 2705 MatAssemblyEnd_MPIAIJ, 2706 MatSetOption_MPIAIJ, 2707 MatZeroEntries_MPIAIJ, 2708 /*24*/ MatZeroRows_MPIAIJ, 2709 NULL, 2710 NULL, 2711 NULL, 2712 NULL, 2713 /*29*/ MatSetUp_MPIAIJ, 2714 NULL, 2715 NULL, 2716 MatGetDiagonalBlock_MPIAIJ, 2717 NULL, 2718 /*34*/ MatDuplicate_MPIAIJ, 2719 NULL, 2720 NULL, 2721 NULL, 2722 NULL, 2723 /*39*/ MatAXPY_MPIAIJ, 2724 MatCreateSubMatrices_MPIAIJ, 2725 MatIncreaseOverlap_MPIAIJ, 2726 MatGetValues_MPIAIJ, 2727 MatCopy_MPIAIJ, 2728 /*44*/ MatGetRowMax_MPIAIJ, 2729 MatScale_MPIAIJ, 2730 MatShift_MPIAIJ, 2731 MatDiagonalSet_MPIAIJ, 2732 MatZeroRowsColumns_MPIAIJ, 2733 /*49*/ MatSetRandom_MPIAIJ, 2734 MatGetRowIJ_MPIAIJ, 2735 MatRestoreRowIJ_MPIAIJ, 2736 NULL, 2737 NULL, 2738 /*54*/ MatFDColoringCreate_MPIXAIJ, 2739 NULL, 2740 MatSetUnfactored_MPIAIJ, 2741 MatPermute_MPIAIJ, 2742 NULL, 2743 /*59*/ MatCreateSubMatrix_MPIAIJ, 2744 MatDestroy_MPIAIJ, 2745 MatView_MPIAIJ, 2746 NULL, 2747 NULL, 2748 /*64*/ NULL, 2749 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2750 NULL, 2751 NULL, 2752 NULL, 2753 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2754 MatGetRowMinAbs_MPIAIJ, 2755 NULL, 2756 NULL, 2757 NULL, 2758 NULL, 2759 /*75*/ MatFDColoringApply_AIJ, 2760 MatSetFromOptions_MPIAIJ, 2761 NULL, 2762 NULL, 2763 MatFindZeroDiagonals_MPIAIJ, 2764 /*80*/ NULL, 2765 NULL, 2766 NULL, 2767 /*83*/ MatLoad_MPIAIJ, 2768 MatIsSymmetric_MPIAIJ, 2769 NULL, 2770 NULL, 2771 NULL, 2772 NULL, 2773 /*89*/ NULL, 2774 NULL, 2775 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2779 NULL, 2780 NULL, 2781 NULL, 2782 MatBindToCPU_MPIAIJ, 2783 /*99*/ MatProductSetFromOptions_MPIAIJ, 2784 NULL, 2785 NULL, 2786 MatConjugate_MPIAIJ, 2787 NULL, 2788 /*104*/MatSetValuesRow_MPIAIJ, 2789 MatRealPart_MPIAIJ, 2790 MatImaginaryPart_MPIAIJ, 2791 NULL, 2792 NULL, 2793 /*109*/NULL, 2794 NULL, 2795 MatGetRowMin_MPIAIJ, 2796 NULL, 2797 MatMissingDiagonal_MPIAIJ, 2798 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2799 NULL, 2800 MatGetGhosts_MPIAIJ, 2801 NULL, 2802 NULL, 2803 /*119*/MatMultDiagonalBlock_MPIAIJ, 2804 NULL, 2805 NULL, 2806 NULL, 2807 MatGetMultiProcBlock_MPIAIJ, 2808 /*124*/MatFindNonzeroRows_MPIAIJ, 2809 MatGetColumnReductions_MPIAIJ, 2810 MatInvertBlockDiagonal_MPIAIJ, 2811 MatInvertVariableBlockDiagonal_MPIAIJ, 2812 MatCreateSubMatricesMPI_MPIAIJ, 2813 /*129*/NULL, 2814 NULL, 2815 NULL, 2816 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2817 NULL, 2818 /*134*/NULL, 2819 NULL, 2820 NULL, 2821 NULL, 2822 NULL, 2823 /*139*/MatSetBlockSizes_MPIAIJ, 2824 NULL, 2825 NULL, 2826 MatFDColoringSetUp_MPIXAIJ, 2827 MatFindOffBlockDiagonalEntries_MPIAIJ, 2828 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2829 /*145*/NULL, 2830 NULL, 2831 NULL, 2832 MatCreateGraph_Simple_AIJ, 2833 MatFilter_AIJ 2834 }; 2835 2836 /* ----------------------------------------------------------------------------------------*/ 2837 2838 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2839 { 2840 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2841 2842 PetscFunctionBegin; 2843 PetscCall(MatStoreValues(aij->A)); 2844 PetscCall(MatStoreValues(aij->B)); 2845 PetscFunctionReturn(0); 2846 } 2847 2848 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2849 { 2850 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2851 2852 PetscFunctionBegin; 2853 PetscCall(MatRetrieveValues(aij->A)); 2854 PetscCall(MatRetrieveValues(aij->B)); 2855 PetscFunctionReturn(0); 2856 } 2857 2858 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2859 { 2860 Mat_MPIAIJ *b; 2861 PetscMPIInt size; 2862 2863 PetscFunctionBegin; 2864 PetscCall(PetscLayoutSetUp(B->rmap)); 2865 PetscCall(PetscLayoutSetUp(B->cmap)); 2866 b = (Mat_MPIAIJ*)B->data; 2867 2868 #if defined(PETSC_USE_CTABLE) 2869 PetscCall(PetscTableDestroy(&b->colmap)); 2870 #else 2871 PetscCall(PetscFree(b->colmap)); 2872 #endif 2873 PetscCall(PetscFree(b->garray)); 2874 PetscCall(VecDestroy(&b->lvec)); 2875 PetscCall(VecScatterDestroy(&b->Mvctx)); 2876 2877 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2878 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2879 PetscCall(MatDestroy(&b->B)); 2880 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2881 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2882 PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 2883 PetscCall(MatSetType(b->B,MATSEQAIJ)); 2884 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2885 2886 if (!B->preallocated) { 2887 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2888 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2889 PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 2890 PetscCall(MatSetType(b->A,MATSEQAIJ)); 2891 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2892 } 2893 2894 PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 2895 PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2896 B->preallocated = PETSC_TRUE; 2897 B->was_assembled = PETSC_FALSE; 2898 B->assembled = PETSC_FALSE; 2899 PetscFunctionReturn(0); 2900 } 2901 2902 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2903 { 2904 Mat_MPIAIJ *b; 2905 2906 PetscFunctionBegin; 2907 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2908 PetscCall(PetscLayoutSetUp(B->rmap)); 2909 PetscCall(PetscLayoutSetUp(B->cmap)); 2910 b = (Mat_MPIAIJ*)B->data; 2911 2912 #if defined(PETSC_USE_CTABLE) 2913 PetscCall(PetscTableDestroy(&b->colmap)); 2914 #else 2915 PetscCall(PetscFree(b->colmap)); 2916 #endif 2917 PetscCall(PetscFree(b->garray)); 2918 PetscCall(VecDestroy(&b->lvec)); 2919 PetscCall(VecScatterDestroy(&b->Mvctx)); 2920 2921 PetscCall(MatResetPreallocation(b->A)); 2922 PetscCall(MatResetPreallocation(b->B)); 2923 B->preallocated = PETSC_TRUE; 2924 B->was_assembled = PETSC_FALSE; 2925 B->assembled = PETSC_FALSE; 2926 PetscFunctionReturn(0); 2927 } 2928 2929 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2930 { 2931 Mat mat; 2932 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2933 2934 PetscFunctionBegin; 2935 *newmat = NULL; 2936 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2937 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2938 PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 2939 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2940 a = (Mat_MPIAIJ*)mat->data; 2941 2942 mat->factortype = matin->factortype; 2943 mat->assembled = matin->assembled; 2944 mat->insertmode = NOT_SET_VALUES; 2945 mat->preallocated = matin->preallocated; 2946 2947 a->size = oldmat->size; 2948 a->rank = oldmat->rank; 2949 a->donotstash = oldmat->donotstash; 2950 a->roworiented = oldmat->roworiented; 2951 a->rowindices = NULL; 2952 a->rowvalues = NULL; 2953 a->getrowactive = PETSC_FALSE; 2954 2955 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2956 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2957 2958 if (oldmat->colmap) { 2959 #if defined(PETSC_USE_CTABLE) 2960 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2961 #else 2962 PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 2963 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 2964 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2965 #endif 2966 } else a->colmap = NULL; 2967 if (oldmat->garray) { 2968 PetscInt len; 2969 len = oldmat->B->cmap->n; 2970 PetscCall(PetscMalloc1(len+1,&a->garray)); 2971 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2972 if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2973 } else a->garray = NULL; 2974 2975 /* It may happen MatDuplicate is called with a non-assembled matrix 2976 In fact, MatDuplicate only requires the matrix to be preallocated 2977 This may happen inside a DMCreateMatrix_Shell */ 2978 if (oldmat->lvec) { 2979 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2980 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2981 } 2982 if (oldmat->Mvctx) { 2983 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2984 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2985 } 2986 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2987 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2988 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2989 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2990 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2991 *newmat = mat; 2992 PetscFunctionReturn(0); 2993 } 2994 2995 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2996 { 2997 PetscBool isbinary, ishdf5; 2998 2999 PetscFunctionBegin; 3000 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3001 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3002 /* force binary viewer to load .info file if it has not yet done so */ 3003 PetscCall(PetscViewerSetUp(viewer)); 3004 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 3005 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 3006 if (isbinary) { 3007 PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 3008 } else if (ishdf5) { 3009 #if defined(PETSC_HAVE_HDF5) 3010 PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 3011 #else 3012 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3013 #endif 3014 } else { 3015 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3016 } 3017 PetscFunctionReturn(0); 3018 } 3019 3020 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3021 { 3022 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3023 PetscInt *rowidxs,*colidxs; 3024 PetscScalar *matvals; 3025 3026 PetscFunctionBegin; 3027 PetscCall(PetscViewerSetUp(viewer)); 3028 3029 /* read in matrix header */ 3030 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3031 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3032 M = header[1]; N = header[2]; nz = header[3]; 3033 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3034 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3035 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3036 3037 /* set block sizes from the viewer's .info file */ 3038 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3039 /* set global sizes if not set already */ 3040 if (mat->rmap->N < 0) mat->rmap->N = M; 3041 if (mat->cmap->N < 0) mat->cmap->N = N; 3042 PetscCall(PetscLayoutSetUp(mat->rmap)); 3043 PetscCall(PetscLayoutSetUp(mat->cmap)); 3044 3045 /* check if the matrix sizes are correct */ 3046 PetscCall(MatGetSize(mat,&rows,&cols)); 3047 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3048 3049 /* read in row lengths and build row indices */ 3050 PetscCall(MatGetLocalSize(mat,&m,NULL)); 3051 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3052 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3053 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3054 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3055 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3056 /* read in column indices and matrix values */ 3057 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3058 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3059 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3060 /* store matrix indices and values */ 3061 PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 3062 PetscCall(PetscFree(rowidxs)); 3063 PetscCall(PetscFree2(colidxs,matvals)); 3064 PetscFunctionReturn(0); 3065 } 3066 3067 /* Not scalable because of ISAllGather() unless getting all columns. */ 3068 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3069 { 3070 IS iscol_local; 3071 PetscBool isstride; 3072 PetscMPIInt lisstride=0,gisstride; 3073 3074 PetscFunctionBegin; 3075 /* check if we are grabbing all columns*/ 3076 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 3077 3078 if (isstride) { 3079 PetscInt start,len,mstart,mlen; 3080 PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 3081 PetscCall(ISGetLocalSize(iscol,&len)); 3082 PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3083 if (mstart == start && mlen-mstart == len) lisstride = 1; 3084 } 3085 3086 PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3087 if (gisstride) { 3088 PetscInt N; 3089 PetscCall(MatGetSize(mat,NULL,&N)); 3090 PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 3091 PetscCall(ISSetIdentity(iscol_local)); 3092 PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3093 } else { 3094 PetscInt cbs; 3095 PetscCall(ISGetBlockSize(iscol,&cbs)); 3096 PetscCall(ISAllGather(iscol,&iscol_local)); 3097 PetscCall(ISSetBlockSize(iscol_local,cbs)); 3098 } 3099 3100 *isseq = iscol_local; 3101 PetscFunctionReturn(0); 3102 } 3103 3104 /* 3105 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3106 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3107 3108 Input Parameters: 3109 mat - matrix 3110 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3111 i.e., mat->rstart <= isrow[i] < mat->rend 3112 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3113 i.e., mat->cstart <= iscol[i] < mat->cend 3114 Output Parameter: 3115 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3116 iscol_o - sequential column index set for retrieving mat->B 3117 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3118 */ 3119 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3120 { 3121 Vec x,cmap; 3122 const PetscInt *is_idx; 3123 PetscScalar *xarray,*cmaparray; 3124 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3125 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3126 Mat B=a->B; 3127 Vec lvec=a->lvec,lcmap; 3128 PetscInt i,cstart,cend,Bn=B->cmap->N; 3129 MPI_Comm comm; 3130 VecScatter Mvctx=a->Mvctx; 3131 3132 PetscFunctionBegin; 3133 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3134 PetscCall(ISGetLocalSize(iscol,&ncols)); 3135 3136 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3137 PetscCall(MatCreateVecs(mat,&x,NULL)); 3138 PetscCall(VecSet(x,-1.0)); 3139 PetscCall(VecDuplicate(x,&cmap)); 3140 PetscCall(VecSet(cmap,-1.0)); 3141 3142 /* Get start indices */ 3143 PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3144 isstart -= ncols; 3145 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3146 3147 PetscCall(ISGetIndices(iscol,&is_idx)); 3148 PetscCall(VecGetArray(x,&xarray)); 3149 PetscCall(VecGetArray(cmap,&cmaparray)); 3150 PetscCall(PetscMalloc1(ncols,&idx)); 3151 for (i=0; i<ncols; i++) { 3152 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3153 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3154 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3155 } 3156 PetscCall(VecRestoreArray(x,&xarray)); 3157 PetscCall(VecRestoreArray(cmap,&cmaparray)); 3158 PetscCall(ISRestoreIndices(iscol,&is_idx)); 3159 3160 /* Get iscol_d */ 3161 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 3162 PetscCall(ISGetBlockSize(iscol,&i)); 3163 PetscCall(ISSetBlockSize(*iscol_d,i)); 3164 3165 /* Get isrow_d */ 3166 PetscCall(ISGetLocalSize(isrow,&m)); 3167 rstart = mat->rmap->rstart; 3168 PetscCall(PetscMalloc1(m,&idx)); 3169 PetscCall(ISGetIndices(isrow,&is_idx)); 3170 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3171 PetscCall(ISRestoreIndices(isrow,&is_idx)); 3172 3173 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 3174 PetscCall(ISGetBlockSize(isrow,&i)); 3175 PetscCall(ISSetBlockSize(*isrow_d,i)); 3176 3177 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3178 PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3179 PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3180 3181 PetscCall(VecDuplicate(lvec,&lcmap)); 3182 3183 PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3184 PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3185 3186 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3187 /* off-process column indices */ 3188 count = 0; 3189 PetscCall(PetscMalloc1(Bn,&idx)); 3190 PetscCall(PetscMalloc1(Bn,&cmap1)); 3191 3192 PetscCall(VecGetArray(lvec,&xarray)); 3193 PetscCall(VecGetArray(lcmap,&cmaparray)); 3194 for (i=0; i<Bn; i++) { 3195 if (PetscRealPart(xarray[i]) > -1.0) { 3196 idx[count] = i; /* local column index in off-diagonal part B */ 3197 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3198 count++; 3199 } 3200 } 3201 PetscCall(VecRestoreArray(lvec,&xarray)); 3202 PetscCall(VecRestoreArray(lcmap,&cmaparray)); 3203 3204 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3205 /* cannot ensure iscol_o has same blocksize as iscol! */ 3206 3207 PetscCall(PetscFree(idx)); 3208 *garray = cmap1; 3209 3210 PetscCall(VecDestroy(&x)); 3211 PetscCall(VecDestroy(&cmap)); 3212 PetscCall(VecDestroy(&lcmap)); 3213 PetscFunctionReturn(0); 3214 } 3215 3216 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3217 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3218 { 3219 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3220 Mat M = NULL; 3221 MPI_Comm comm; 3222 IS iscol_d,isrow_d,iscol_o; 3223 Mat Asub = NULL,Bsub = NULL; 3224 PetscInt n; 3225 3226 PetscFunctionBegin; 3227 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3228 3229 if (call == MAT_REUSE_MATRIX) { 3230 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3231 PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 3232 PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3233 3234 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 3235 PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3236 3237 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 3238 PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3239 3240 /* Update diagonal and off-diagonal portions of submat */ 3241 asub = (Mat_MPIAIJ*)(*submat)->data; 3242 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 3243 PetscCall(ISGetLocalSize(iscol_o,&n)); 3244 if (n) { 3245 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 3246 } 3247 PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 3248 PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 3249 3250 } else { /* call == MAT_INITIAL_MATRIX) */ 3251 const PetscInt *garray; 3252 PetscInt BsubN; 3253 3254 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3255 PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 3256 3257 /* Create local submatrices Asub and Bsub */ 3258 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 3259 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 3260 3261 /* Create submatrix M */ 3262 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 3263 3264 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3265 asub = (Mat_MPIAIJ*)M->data; 3266 3267 PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3268 n = asub->B->cmap->N; 3269 if (BsubN > n) { 3270 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3271 const PetscInt *idx; 3272 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3273 PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 3274 3275 PetscCall(PetscMalloc1(n,&idx_new)); 3276 j = 0; 3277 PetscCall(ISGetIndices(iscol_o,&idx)); 3278 for (i=0; i<n; i++) { 3279 if (j >= BsubN) break; 3280 while (subgarray[i] > garray[j]) j++; 3281 3282 if (subgarray[i] == garray[j]) { 3283 idx_new[i] = idx[j++]; 3284 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3285 } 3286 PetscCall(ISRestoreIndices(iscol_o,&idx)); 3287 3288 PetscCall(ISDestroy(&iscol_o)); 3289 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 3290 3291 } else if (BsubN < n) { 3292 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3293 } 3294 3295 PetscCall(PetscFree(garray)); 3296 *submat = M; 3297 3298 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3299 PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 3300 PetscCall(ISDestroy(&isrow_d)); 3301 3302 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 3303 PetscCall(ISDestroy(&iscol_d)); 3304 3305 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 3306 PetscCall(ISDestroy(&iscol_o)); 3307 } 3308 PetscFunctionReturn(0); 3309 } 3310 3311 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3312 { 3313 IS iscol_local=NULL,isrow_d; 3314 PetscInt csize; 3315 PetscInt n,i,j,start,end; 3316 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3317 MPI_Comm comm; 3318 3319 PetscFunctionBegin; 3320 /* If isrow has same processor distribution as mat, 3321 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3322 if (call == MAT_REUSE_MATRIX) { 3323 PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3324 if (isrow_d) { 3325 sameRowDist = PETSC_TRUE; 3326 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3327 } else { 3328 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3329 if (iscol_local) { 3330 sameRowDist = PETSC_TRUE; 3331 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3332 } 3333 } 3334 } else { 3335 /* Check if isrow has same processor distribution as mat */ 3336 sameDist[0] = PETSC_FALSE; 3337 PetscCall(ISGetLocalSize(isrow,&n)); 3338 if (!n) { 3339 sameDist[0] = PETSC_TRUE; 3340 } else { 3341 PetscCall(ISGetMinMax(isrow,&i,&j)); 3342 PetscCall(MatGetOwnershipRange(mat,&start,&end)); 3343 if (i >= start && j < end) { 3344 sameDist[0] = PETSC_TRUE; 3345 } 3346 } 3347 3348 /* Check if iscol has same processor distribution as mat */ 3349 sameDist[1] = PETSC_FALSE; 3350 PetscCall(ISGetLocalSize(iscol,&n)); 3351 if (!n) { 3352 sameDist[1] = PETSC_TRUE; 3353 } else { 3354 PetscCall(ISGetMinMax(iscol,&i,&j)); 3355 PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 3356 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3357 } 3358 3359 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3360 PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 3361 sameRowDist = tsameDist[0]; 3362 } 3363 3364 if (sameRowDist) { 3365 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3366 /* isrow and iscol have same processor distribution as mat */ 3367 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 3368 PetscFunctionReturn(0); 3369 } else { /* sameRowDist */ 3370 /* isrow has same processor distribution as mat */ 3371 if (call == MAT_INITIAL_MATRIX) { 3372 PetscBool sorted; 3373 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3374 PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 3375 PetscCall(ISGetSize(iscol,&i)); 3376 PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3377 3378 PetscCall(ISSorted(iscol_local,&sorted)); 3379 if (sorted) { 3380 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3381 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 3382 PetscFunctionReturn(0); 3383 } 3384 } else { /* call == MAT_REUSE_MATRIX */ 3385 IS iscol_sub; 3386 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3387 if (iscol_sub) { 3388 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 3389 PetscFunctionReturn(0); 3390 } 3391 } 3392 } 3393 } 3394 3395 /* General case: iscol -> iscol_local which has global size of iscol */ 3396 if (call == MAT_REUSE_MATRIX) { 3397 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 3398 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3399 } else { 3400 if (!iscol_local) { 3401 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3402 } 3403 } 3404 3405 PetscCall(ISGetLocalSize(iscol,&csize)); 3406 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 3407 3408 if (call == MAT_INITIAL_MATRIX) { 3409 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 3410 PetscCall(ISDestroy(&iscol_local)); 3411 } 3412 PetscFunctionReturn(0); 3413 } 3414 3415 /*@C 3416 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3417 and "off-diagonal" part of the matrix in CSR format. 3418 3419 Collective 3420 3421 Input Parameters: 3422 + comm - MPI communicator 3423 . A - "diagonal" portion of matrix 3424 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3425 - garray - global index of B columns 3426 3427 Output Parameter: 3428 . mat - the matrix, with input A as its local diagonal matrix 3429 Level: advanced 3430 3431 Notes: 3432 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3433 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3434 3435 .seealso: `MatCreateMPIAIJWithSplitArrays()` 3436 @*/ 3437 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3438 { 3439 Mat_MPIAIJ *maij; 3440 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3441 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3442 const PetscScalar *oa; 3443 Mat Bnew; 3444 PetscInt m,n,N; 3445 MatType mpi_mat_type; 3446 3447 PetscFunctionBegin; 3448 PetscCall(MatCreate(comm,mat)); 3449 PetscCall(MatGetSize(A,&m,&n)); 3450 PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3451 PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3452 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3453 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3454 3455 /* Get global columns of mat */ 3456 PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3457 3458 PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 3459 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3460 PetscCall(MatGetMPIMatType_Private(A,&mpi_mat_type)); 3461 PetscCall(MatSetType(*mat,mpi_mat_type)); 3462 3463 PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3464 maij = (Mat_MPIAIJ*)(*mat)->data; 3465 3466 (*mat)->preallocated = PETSC_TRUE; 3467 3468 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3469 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3470 3471 /* Set A as diagonal portion of *mat */ 3472 maij->A = A; 3473 3474 nz = oi[m]; 3475 for (i=0; i<nz; i++) { 3476 col = oj[i]; 3477 oj[i] = garray[col]; 3478 } 3479 3480 /* Set Bnew as off-diagonal portion of *mat */ 3481 PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 3482 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 3483 PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3484 bnew = (Mat_SeqAIJ*)Bnew->data; 3485 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3486 maij->B = Bnew; 3487 3488 PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3489 3490 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3491 b->free_a = PETSC_FALSE; 3492 b->free_ij = PETSC_FALSE; 3493 PetscCall(MatDestroy(&B)); 3494 3495 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3496 bnew->free_a = PETSC_TRUE; 3497 bnew->free_ij = PETSC_TRUE; 3498 3499 /* condense columns of maij->B */ 3500 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3501 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3502 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3503 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 3504 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3505 PetscFunctionReturn(0); 3506 } 3507 3508 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3509 3510 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3511 { 3512 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3513 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3514 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3515 Mat M,Msub,B=a->B; 3516 MatScalar *aa; 3517 Mat_SeqAIJ *aij; 3518 PetscInt *garray = a->garray,*colsub,Ncols; 3519 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3520 IS iscol_sub,iscmap; 3521 const PetscInt *is_idx,*cmap; 3522 PetscBool allcolumns=PETSC_FALSE; 3523 MPI_Comm comm; 3524 3525 PetscFunctionBegin; 3526 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3527 if (call == MAT_REUSE_MATRIX) { 3528 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3529 PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3530 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3531 3532 PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 3533 PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3534 3535 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 3536 PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3537 3538 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3539 3540 } else { /* call == MAT_INITIAL_MATRIX) */ 3541 PetscBool flg; 3542 3543 PetscCall(ISGetLocalSize(iscol,&n)); 3544 PetscCall(ISGetSize(iscol,&Ncols)); 3545 3546 /* (1) iscol -> nonscalable iscol_local */ 3547 /* Check for special case: each processor gets entire matrix columns */ 3548 PetscCall(ISIdentity(iscol_local,&flg)); 3549 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3550 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3551 if (allcolumns) { 3552 iscol_sub = iscol_local; 3553 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3554 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3555 3556 } else { 3557 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3558 PetscInt *idx,*cmap1,k; 3559 PetscCall(PetscMalloc1(Ncols,&idx)); 3560 PetscCall(PetscMalloc1(Ncols,&cmap1)); 3561 PetscCall(ISGetIndices(iscol_local,&is_idx)); 3562 count = 0; 3563 k = 0; 3564 for (i=0; i<Ncols; i++) { 3565 j = is_idx[i]; 3566 if (j >= cstart && j < cend) { 3567 /* diagonal part of mat */ 3568 idx[count] = j; 3569 cmap1[count++] = i; /* column index in submat */ 3570 } else if (Bn) { 3571 /* off-diagonal part of mat */ 3572 if (j == garray[k]) { 3573 idx[count] = j; 3574 cmap1[count++] = i; /* column index in submat */ 3575 } else if (j > garray[k]) { 3576 while (j > garray[k] && k < Bn-1) k++; 3577 if (j == garray[k]) { 3578 idx[count] = j; 3579 cmap1[count++] = i; /* column index in submat */ 3580 } 3581 } 3582 } 3583 } 3584 PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 3585 3586 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 3587 PetscCall(ISGetBlockSize(iscol,&cbs)); 3588 PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3589 3590 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3591 } 3592 3593 /* (3) Create sequential Msub */ 3594 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3595 } 3596 3597 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3598 aij = (Mat_SeqAIJ*)(Msub)->data; 3599 ii = aij->i; 3600 PetscCall(ISGetIndices(iscmap,&cmap)); 3601 3602 /* 3603 m - number of local rows 3604 Ncols - number of columns (same on all processors) 3605 rstart - first row in new global matrix generated 3606 */ 3607 PetscCall(MatGetSize(Msub,&m,NULL)); 3608 3609 if (call == MAT_INITIAL_MATRIX) { 3610 /* (4) Create parallel newmat */ 3611 PetscMPIInt rank,size; 3612 PetscInt csize; 3613 3614 PetscCallMPI(MPI_Comm_size(comm,&size)); 3615 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3616 3617 /* 3618 Determine the number of non-zeros in the diagonal and off-diagonal 3619 portions of the matrix in order to do correct preallocation 3620 */ 3621 3622 /* first get start and end of "diagonal" columns */ 3623 PetscCall(ISGetLocalSize(iscol,&csize)); 3624 if (csize == PETSC_DECIDE) { 3625 PetscCall(ISGetSize(isrow,&mglobal)); 3626 if (mglobal == Ncols) { /* square matrix */ 3627 nlocal = m; 3628 } else { 3629 nlocal = Ncols/size + ((Ncols % size) > rank); 3630 } 3631 } else { 3632 nlocal = csize; 3633 } 3634 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3635 rstart = rend - nlocal; 3636 PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3637 3638 /* next, compute all the lengths */ 3639 jj = aij->j; 3640 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3641 olens = dlens + m; 3642 for (i=0; i<m; i++) { 3643 jend = ii[i+1] - ii[i]; 3644 olen = 0; 3645 dlen = 0; 3646 for (j=0; j<jend; j++) { 3647 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3648 else dlen++; 3649 jj++; 3650 } 3651 olens[i] = olen; 3652 dlens[i] = dlen; 3653 } 3654 3655 PetscCall(ISGetBlockSize(isrow,&bs)); 3656 PetscCall(ISGetBlockSize(iscol,&cbs)); 3657 3658 PetscCall(MatCreate(comm,&M)); 3659 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 3660 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3661 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3662 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3663 PetscCall(PetscFree(dlens)); 3664 3665 } else { /* call == MAT_REUSE_MATRIX */ 3666 M = *newmat; 3667 PetscCall(MatGetLocalSize(M,&i,NULL)); 3668 PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3669 PetscCall(MatZeroEntries(M)); 3670 /* 3671 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3672 rather than the slower MatSetValues(). 3673 */ 3674 M->was_assembled = PETSC_TRUE; 3675 M->assembled = PETSC_FALSE; 3676 } 3677 3678 /* (5) Set values of Msub to *newmat */ 3679 PetscCall(PetscMalloc1(count,&colsub)); 3680 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 3681 3682 jj = aij->j; 3683 PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3684 for (i=0; i<m; i++) { 3685 row = rstart + i; 3686 nz = ii[i+1] - ii[i]; 3687 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3688 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 3689 jj += nz; aa += nz; 3690 } 3691 PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 3692 PetscCall(ISRestoreIndices(iscmap,&cmap)); 3693 3694 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3695 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3696 3697 PetscCall(PetscFree(colsub)); 3698 3699 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3700 if (call == MAT_INITIAL_MATRIX) { 3701 *newmat = M; 3702 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 3703 PetscCall(MatDestroy(&Msub)); 3704 3705 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 3706 PetscCall(ISDestroy(&iscol_sub)); 3707 3708 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 3709 PetscCall(ISDestroy(&iscmap)); 3710 3711 if (iscol_local) { 3712 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 3713 PetscCall(ISDestroy(&iscol_local)); 3714 } 3715 } 3716 PetscFunctionReturn(0); 3717 } 3718 3719 /* 3720 Not great since it makes two copies of the submatrix, first an SeqAIJ 3721 in local and then by concatenating the local matrices the end result. 3722 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3723 3724 Note: This requires a sequential iscol with all indices. 3725 */ 3726 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3727 { 3728 PetscMPIInt rank,size; 3729 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3730 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3731 Mat M,Mreuse; 3732 MatScalar *aa,*vwork; 3733 MPI_Comm comm; 3734 Mat_SeqAIJ *aij; 3735 PetscBool colflag,allcolumns=PETSC_FALSE; 3736 3737 PetscFunctionBegin; 3738 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3739 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3740 PetscCallMPI(MPI_Comm_size(comm,&size)); 3741 3742 /* Check for special case: each processor gets entire matrix columns */ 3743 PetscCall(ISIdentity(iscol,&colflag)); 3744 PetscCall(ISGetLocalSize(iscol,&n)); 3745 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3746 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3747 3748 if (call == MAT_REUSE_MATRIX) { 3749 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 3750 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3751 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3752 } else { 3753 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3754 } 3755 3756 /* 3757 m - number of local rows 3758 n - number of columns (same on all processors) 3759 rstart - first row in new global matrix generated 3760 */ 3761 PetscCall(MatGetSize(Mreuse,&m,&n)); 3762 PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3763 if (call == MAT_INITIAL_MATRIX) { 3764 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3765 ii = aij->i; 3766 jj = aij->j; 3767 3768 /* 3769 Determine the number of non-zeros in the diagonal and off-diagonal 3770 portions of the matrix in order to do correct preallocation 3771 */ 3772 3773 /* first get start and end of "diagonal" columns */ 3774 if (csize == PETSC_DECIDE) { 3775 PetscCall(ISGetSize(isrow,&mglobal)); 3776 if (mglobal == n) { /* square matrix */ 3777 nlocal = m; 3778 } else { 3779 nlocal = n/size + ((n % size) > rank); 3780 } 3781 } else { 3782 nlocal = csize; 3783 } 3784 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3785 rstart = rend - nlocal; 3786 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3787 3788 /* next, compute all the lengths */ 3789 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3790 olens = dlens + m; 3791 for (i=0; i<m; i++) { 3792 jend = ii[i+1] - ii[i]; 3793 olen = 0; 3794 dlen = 0; 3795 for (j=0; j<jend; j++) { 3796 if (*jj < rstart || *jj >= rend) olen++; 3797 else dlen++; 3798 jj++; 3799 } 3800 olens[i] = olen; 3801 dlens[i] = dlen; 3802 } 3803 PetscCall(MatCreate(comm,&M)); 3804 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 3805 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3806 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3807 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3808 PetscCall(PetscFree(dlens)); 3809 } else { 3810 PetscInt ml,nl; 3811 3812 M = *newmat; 3813 PetscCall(MatGetLocalSize(M,&ml,&nl)); 3814 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3815 PetscCall(MatZeroEntries(M)); 3816 /* 3817 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3818 rather than the slower MatSetValues(). 3819 */ 3820 M->was_assembled = PETSC_TRUE; 3821 M->assembled = PETSC_FALSE; 3822 } 3823 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3824 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3825 ii = aij->i; 3826 jj = aij->j; 3827 3828 /* trigger copy to CPU if needed */ 3829 PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3830 for (i=0; i<m; i++) { 3831 row = rstart + i; 3832 nz = ii[i+1] - ii[i]; 3833 cwork = jj; jj += nz; 3834 vwork = aa; aa += nz; 3835 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3836 } 3837 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3838 3839 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3840 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3841 *newmat = M; 3842 3843 /* save submatrix used in processor for next request */ 3844 if (call == MAT_INITIAL_MATRIX) { 3845 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 3846 PetscCall(MatDestroy(&Mreuse)); 3847 } 3848 PetscFunctionReturn(0); 3849 } 3850 3851 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3852 { 3853 PetscInt m,cstart, cend,j,nnz,i,d,*ld; 3854 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3855 const PetscInt *JJ; 3856 PetscBool nooffprocentries; 3857 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)B->data; 3858 3859 PetscFunctionBegin; 3860 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3861 3862 PetscCall(PetscLayoutSetUp(B->rmap)); 3863 PetscCall(PetscLayoutSetUp(B->cmap)); 3864 m = B->rmap->n; 3865 cstart = B->cmap->rstart; 3866 cend = B->cmap->rend; 3867 rstart = B->rmap->rstart; 3868 3869 PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3870 3871 if (PetscDefined(USE_DEBUG)) { 3872 for (i=0; i<m; i++) { 3873 nnz = Ii[i+1]- Ii[i]; 3874 JJ = J + Ii[i]; 3875 PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3876 PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3877 PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3878 } 3879 } 3880 3881 for (i=0; i<m; i++) { 3882 nnz = Ii[i+1]- Ii[i]; 3883 JJ = J + Ii[i]; 3884 nnz_max = PetscMax(nnz_max,nnz); 3885 d = 0; 3886 for (j=0; j<nnz; j++) { 3887 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3888 } 3889 d_nnz[i] = d; 3890 o_nnz[i] = nnz - d; 3891 } 3892 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 3893 PetscCall(PetscFree2(d_nnz,o_nnz)); 3894 3895 for (i=0; i<m; i++) { 3896 ii = i + rstart; 3897 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3898 } 3899 nooffprocentries = B->nooffprocentries; 3900 B->nooffprocentries = PETSC_TRUE; 3901 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3902 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3903 B->nooffprocentries = nooffprocentries; 3904 3905 /* count number of entries below block diagonal */ 3906 PetscCall(PetscFree(Aij->ld)); 3907 PetscCall(PetscCalloc1(m,&ld)); 3908 Aij->ld = ld; 3909 for (i=0; i<m; i++) { 3910 nnz = Ii[i+1] - Ii[i]; 3911 j = 0; 3912 while (j < nnz && J[j] < cstart) {j++;} 3913 ld[i] = j; 3914 J += nnz; 3915 } 3916 3917 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3918 PetscFunctionReturn(0); 3919 } 3920 3921 /*@ 3922 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3923 (the default parallel PETSc format). 3924 3925 Collective 3926 3927 Input Parameters: 3928 + B - the matrix 3929 . i - the indices into j for the start of each local row (starts with zero) 3930 . j - the column indices for each local row (starts with zero) 3931 - v - optional values in the matrix 3932 3933 Level: developer 3934 3935 Notes: 3936 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3937 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3938 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3939 3940 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3941 3942 The format which is used for the sparse matrix input, is equivalent to a 3943 row-major ordering.. i.e for the following matrix, the input data expected is 3944 as shown 3945 3946 $ 1 0 0 3947 $ 2 0 3 P0 3948 $ ------- 3949 $ 4 5 6 P1 3950 $ 3951 $ Process0 [P0]: rows_owned=[0,1] 3952 $ i = {0,1,3} [size = nrow+1 = 2+1] 3953 $ j = {0,0,2} [size = 3] 3954 $ v = {1,2,3} [size = 3] 3955 $ 3956 $ Process1 [P1]: rows_owned=[2] 3957 $ i = {0,3} [size = nrow+1 = 1+1] 3958 $ j = {0,1,2} [size = 3] 3959 $ v = {4,5,6} [size = 3] 3960 3961 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3962 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3963 @*/ 3964 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3965 { 3966 PetscFunctionBegin; 3967 PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3968 PetscFunctionReturn(0); 3969 } 3970 3971 /*@C 3972 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3973 (the default parallel PETSc format). For good matrix assembly performance 3974 the user should preallocate the matrix storage by setting the parameters 3975 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3976 performance can be increased by more than a factor of 50. 3977 3978 Collective 3979 3980 Input Parameters: 3981 + B - the matrix 3982 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3983 (same value is used for all local rows) 3984 . d_nnz - array containing the number of nonzeros in the various rows of the 3985 DIAGONAL portion of the local submatrix (possibly different for each row) 3986 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3987 The size of this array is equal to the number of local rows, i.e 'm'. 3988 For matrices that will be factored, you must leave room for (and set) 3989 the diagonal entry even if it is zero. 3990 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3991 submatrix (same value is used for all local rows). 3992 - o_nnz - array containing the number of nonzeros in the various rows of the 3993 OFF-DIAGONAL portion of the local submatrix (possibly different for 3994 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3995 structure. The size of this array is equal to the number 3996 of local rows, i.e 'm'. 3997 3998 If the *_nnz parameter is given then the *_nz parameter is ignored 3999 4000 The AIJ format (also called the Yale sparse matrix format or 4001 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4002 storage. The stored row and column indices begin with zero. 4003 See Users-Manual: ch_mat for details. 4004 4005 The parallel matrix is partitioned such that the first m0 rows belong to 4006 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4007 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4008 4009 The DIAGONAL portion of the local submatrix of a processor can be defined 4010 as the submatrix which is obtained by extraction the part corresponding to 4011 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4012 first row that belongs to the processor, r2 is the last row belonging to 4013 the this processor, and c1-c2 is range of indices of the local part of a 4014 vector suitable for applying the matrix to. This is an mxn matrix. In the 4015 common case of a square matrix, the row and column ranges are the same and 4016 the DIAGONAL part is also square. The remaining portion of the local 4017 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4018 4019 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4020 4021 You can call MatGetInfo() to get information on how effective the preallocation was; 4022 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4023 You can also run with the option -info and look for messages with the string 4024 malloc in them to see if additional memory allocation was needed. 4025 4026 Example usage: 4027 4028 Consider the following 8x8 matrix with 34 non-zero values, that is 4029 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4030 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4031 as follows: 4032 4033 .vb 4034 1 2 0 | 0 3 0 | 0 4 4035 Proc0 0 5 6 | 7 0 0 | 8 0 4036 9 0 10 | 11 0 0 | 12 0 4037 ------------------------------------- 4038 13 0 14 | 15 16 17 | 0 0 4039 Proc1 0 18 0 | 19 20 21 | 0 0 4040 0 0 0 | 22 23 0 | 24 0 4041 ------------------------------------- 4042 Proc2 25 26 27 | 0 0 28 | 29 0 4043 30 0 0 | 31 32 33 | 0 34 4044 .ve 4045 4046 This can be represented as a collection of submatrices as: 4047 4048 .vb 4049 A B C 4050 D E F 4051 G H I 4052 .ve 4053 4054 Where the submatrices A,B,C are owned by proc0, D,E,F are 4055 owned by proc1, G,H,I are owned by proc2. 4056 4057 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4058 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4059 The 'M','N' parameters are 8,8, and have the same values on all procs. 4060 4061 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4062 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4063 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4064 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4065 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4066 matrix, ans [DF] as another SeqAIJ matrix. 4067 4068 When d_nz, o_nz parameters are specified, d_nz storage elements are 4069 allocated for every row of the local diagonal submatrix, and o_nz 4070 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4071 One way to choose d_nz and o_nz is to use the max nonzerors per local 4072 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4073 In this case, the values of d_nz,o_nz are: 4074 .vb 4075 proc0 : dnz = 2, o_nz = 2 4076 proc1 : dnz = 3, o_nz = 2 4077 proc2 : dnz = 1, o_nz = 4 4078 .ve 4079 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4080 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4081 for proc3. i.e we are using 12+15+10=37 storage locations to store 4082 34 values. 4083 4084 When d_nnz, o_nnz parameters are specified, the storage is specified 4085 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4086 In the above case the values for d_nnz,o_nnz are: 4087 .vb 4088 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4089 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4090 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4091 .ve 4092 Here the space allocated is sum of all the above values i.e 34, and 4093 hence pre-allocation is perfect. 4094 4095 Level: intermediate 4096 4097 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4098 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4099 @*/ 4100 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4101 { 4102 PetscFunctionBegin; 4103 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4104 PetscValidType(B,1); 4105 PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4106 PetscFunctionReturn(0); 4107 } 4108 4109 /*@ 4110 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4111 CSR format for the local rows. 4112 4113 Collective 4114 4115 Input Parameters: 4116 + comm - MPI communicator 4117 . m - number of local rows (Cannot be PETSC_DECIDE) 4118 . n - This value should be the same as the local size used in creating the 4119 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4120 calculated if N is given) For square matrices n is almost always m. 4121 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4122 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4123 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4124 . j - column indices 4125 - a - optional matrix values 4126 4127 Output Parameter: 4128 . mat - the matrix 4129 4130 Level: intermediate 4131 4132 Notes: 4133 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4134 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4135 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4136 4137 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4138 4139 The format which is used for the sparse matrix input, is equivalent to a 4140 row-major ordering.. i.e for the following matrix, the input data expected is 4141 as shown 4142 4143 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4144 4145 $ 1 0 0 4146 $ 2 0 3 P0 4147 $ ------- 4148 $ 4 5 6 P1 4149 $ 4150 $ Process0 [P0]: rows_owned=[0,1] 4151 $ i = {0,1,3} [size = nrow+1 = 2+1] 4152 $ j = {0,0,2} [size = 3] 4153 $ v = {1,2,3} [size = 3] 4154 $ 4155 $ Process1 [P1]: rows_owned=[2] 4156 $ i = {0,3} [size = nrow+1 = 1+1] 4157 $ j = {0,1,2} [size = 3] 4158 $ v = {4,5,6} [size = 3] 4159 4160 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4161 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4162 @*/ 4163 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4164 { 4165 PetscFunctionBegin; 4166 PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4167 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4168 PetscCall(MatCreate(comm,mat)); 4169 PetscCall(MatSetSizes(*mat,m,n,M,N)); 4170 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4171 PetscCall(MatSetType(*mat,MATMPIAIJ)); 4172 PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 4173 PetscFunctionReturn(0); 4174 } 4175 4176 /*@ 4177 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4178 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from MatCreateMPIAIJWithArrays() 4179 4180 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4181 4182 Collective 4183 4184 Input Parameters: 4185 + mat - the matrix 4186 . m - number of local rows (Cannot be PETSC_DECIDE) 4187 . n - This value should be the same as the local size used in creating the 4188 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4189 calculated if N is given) For square matrices n is almost always m. 4190 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4191 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4192 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4193 . J - column indices 4194 - v - matrix values 4195 4196 Level: intermediate 4197 4198 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4199 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4200 @*/ 4201 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4202 { 4203 PetscInt nnz,i; 4204 PetscBool nooffprocentries; 4205 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4206 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4207 PetscScalar *ad,*ao; 4208 PetscInt ldi,Iii,md; 4209 const PetscInt *Adi = Ad->i; 4210 PetscInt *ld = Aij->ld; 4211 4212 PetscFunctionBegin; 4213 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4214 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4215 PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4216 PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4217 4218 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4219 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4220 4221 for (i=0; i<m; i++) { 4222 nnz = Ii[i+1]- Ii[i]; 4223 Iii = Ii[i]; 4224 ldi = ld[i]; 4225 md = Adi[i+1]-Adi[i]; 4226 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4227 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4228 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4229 ad += md; 4230 ao += nnz - md; 4231 } 4232 nooffprocentries = mat->nooffprocentries; 4233 mat->nooffprocentries = PETSC_TRUE; 4234 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4235 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4236 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4237 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4238 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4239 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4240 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4241 mat->nooffprocentries = nooffprocentries; 4242 PetscFunctionReturn(0); 4243 } 4244 4245 /*@ 4246 MatUpdateMPIAIJWithArray - updates an MPI AIJ matrix using an array that contains the nonzero values 4247 4248 Collective 4249 4250 Input Parameters: 4251 + mat - the matrix 4252 - v - matrix values, stored by row 4253 4254 Level: intermediate 4255 4256 Notes: 4257 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4258 4259 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4260 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4261 @*/ 4262 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat,const PetscScalar v[]) 4263 { 4264 PetscInt nnz,i,m; 4265 PetscBool nooffprocentries; 4266 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4267 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4268 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)Aij->B->data; 4269 PetscScalar *ad,*ao; 4270 const PetscInt *Adi = Ad->i,*Adj = Ao->i; 4271 PetscInt ldi,Iii,md; 4272 PetscInt *ld = Aij->ld; 4273 4274 PetscFunctionBegin; 4275 m = mat->rmap->n; 4276 4277 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4278 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4279 Iii = 0; 4280 for (i=0; i<m; i++) { 4281 nnz = Adi[i+1]-Adi[i] + Adj[i+1]-Adj[i]; 4282 ldi = ld[i]; 4283 md = Adi[i+1]-Adi[i]; 4284 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4285 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4286 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4287 ad += md; 4288 ao += nnz - md; 4289 Iii += nnz; 4290 } 4291 nooffprocentries = mat->nooffprocentries; 4292 mat->nooffprocentries = PETSC_TRUE; 4293 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4294 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4295 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4296 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4297 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4298 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4299 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4300 mat->nooffprocentries = nooffprocentries; 4301 PetscFunctionReturn(0); 4302 } 4303 4304 /*@C 4305 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4306 (the default parallel PETSc format). For good matrix assembly performance 4307 the user should preallocate the matrix storage by setting the parameters 4308 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4309 performance can be increased by more than a factor of 50. 4310 4311 Collective 4312 4313 Input Parameters: 4314 + comm - MPI communicator 4315 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4316 This value should be the same as the local size used in creating the 4317 y vector for the matrix-vector product y = Ax. 4318 . n - This value should be the same as the local size used in creating the 4319 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4320 calculated if N is given) For square matrices n is almost always m. 4321 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4322 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4323 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4324 (same value is used for all local rows) 4325 . d_nnz - array containing the number of nonzeros in the various rows of the 4326 DIAGONAL portion of the local submatrix (possibly different for each row) 4327 or NULL, if d_nz is used to specify the nonzero structure. 4328 The size of this array is equal to the number of local rows, i.e 'm'. 4329 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4330 submatrix (same value is used for all local rows). 4331 - o_nnz - array containing the number of nonzeros in the various rows of the 4332 OFF-DIAGONAL portion of the local submatrix (possibly different for 4333 each row) or NULL, if o_nz is used to specify the nonzero 4334 structure. The size of this array is equal to the number 4335 of local rows, i.e 'm'. 4336 4337 Output Parameter: 4338 . A - the matrix 4339 4340 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4341 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4342 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4343 4344 Notes: 4345 If the *_nnz parameter is given then the *_nz parameter is ignored 4346 4347 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4348 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4349 storage requirements for this matrix. 4350 4351 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4352 processor than it must be used on all processors that share the object for 4353 that argument. 4354 4355 The user MUST specify either the local or global matrix dimensions 4356 (possibly both). 4357 4358 The parallel matrix is partitioned across processors such that the 4359 first m0 rows belong to process 0, the next m1 rows belong to 4360 process 1, the next m2 rows belong to process 2 etc.. where 4361 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4362 values corresponding to [m x N] submatrix. 4363 4364 The columns are logically partitioned with the n0 columns belonging 4365 to 0th partition, the next n1 columns belonging to the next 4366 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4367 4368 The DIAGONAL portion of the local submatrix on any given processor 4369 is the submatrix corresponding to the rows and columns m,n 4370 corresponding to the given processor. i.e diagonal matrix on 4371 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4372 etc. The remaining portion of the local submatrix [m x (N-n)] 4373 constitute the OFF-DIAGONAL portion. The example below better 4374 illustrates this concept. 4375 4376 For a square global matrix we define each processor's diagonal portion 4377 to be its local rows and the corresponding columns (a square submatrix); 4378 each processor's off-diagonal portion encompasses the remainder of the 4379 local matrix (a rectangular submatrix). 4380 4381 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4382 4383 When calling this routine with a single process communicator, a matrix of 4384 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4385 type of communicator, use the construction mechanism 4386 .vb 4387 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4388 .ve 4389 4390 $ MatCreate(...,&A); 4391 $ MatSetType(A,MATMPIAIJ); 4392 $ MatSetSizes(A, m,n,M,N); 4393 $ MatMPIAIJSetPreallocation(A,...); 4394 4395 By default, this format uses inodes (identical nodes) when possible. 4396 We search for consecutive rows with the same nonzero structure, thereby 4397 reusing matrix information to achieve increased efficiency. 4398 4399 Options Database Keys: 4400 + -mat_no_inode - Do not use inodes 4401 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4402 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4403 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4404 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4405 4406 Example usage: 4407 4408 Consider the following 8x8 matrix with 34 non-zero values, that is 4409 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4410 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4411 as follows 4412 4413 .vb 4414 1 2 0 | 0 3 0 | 0 4 4415 Proc0 0 5 6 | 7 0 0 | 8 0 4416 9 0 10 | 11 0 0 | 12 0 4417 ------------------------------------- 4418 13 0 14 | 15 16 17 | 0 0 4419 Proc1 0 18 0 | 19 20 21 | 0 0 4420 0 0 0 | 22 23 0 | 24 0 4421 ------------------------------------- 4422 Proc2 25 26 27 | 0 0 28 | 29 0 4423 30 0 0 | 31 32 33 | 0 34 4424 .ve 4425 4426 This can be represented as a collection of submatrices as 4427 4428 .vb 4429 A B C 4430 D E F 4431 G H I 4432 .ve 4433 4434 Where the submatrices A,B,C are owned by proc0, D,E,F are 4435 owned by proc1, G,H,I are owned by proc2. 4436 4437 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4438 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4439 The 'M','N' parameters are 8,8, and have the same values on all procs. 4440 4441 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4442 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4443 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4444 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4445 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4446 matrix, ans [DF] as another SeqAIJ matrix. 4447 4448 When d_nz, o_nz parameters are specified, d_nz storage elements are 4449 allocated for every row of the local diagonal submatrix, and o_nz 4450 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4451 One way to choose d_nz and o_nz is to use the max nonzerors per local 4452 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4453 In this case, the values of d_nz,o_nz are 4454 .vb 4455 proc0 : dnz = 2, o_nz = 2 4456 proc1 : dnz = 3, o_nz = 2 4457 proc2 : dnz = 1, o_nz = 4 4458 .ve 4459 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4460 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4461 for proc3. i.e we are using 12+15+10=37 storage locations to store 4462 34 values. 4463 4464 When d_nnz, o_nnz parameters are specified, the storage is specified 4465 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4466 In the above case the values for d_nnz,o_nnz are 4467 .vb 4468 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4469 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4470 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4471 .ve 4472 Here the space allocated is sum of all the above values i.e 34, and 4473 hence pre-allocation is perfect. 4474 4475 Level: intermediate 4476 4477 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4478 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4479 @*/ 4480 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4481 { 4482 PetscMPIInt size; 4483 4484 PetscFunctionBegin; 4485 PetscCall(MatCreate(comm,A)); 4486 PetscCall(MatSetSizes(*A,m,n,M,N)); 4487 PetscCallMPI(MPI_Comm_size(comm,&size)); 4488 if (size > 1) { 4489 PetscCall(MatSetType(*A,MATMPIAIJ)); 4490 PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4491 } else { 4492 PetscCall(MatSetType(*A,MATSEQAIJ)); 4493 PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4494 } 4495 PetscFunctionReturn(0); 4496 } 4497 4498 /*@C 4499 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4500 4501 Not collective 4502 4503 Input Parameter: 4504 . A - The MPIAIJ matrix 4505 4506 Output Parameters: 4507 + Ad - The local diagonal block as a SeqAIJ matrix 4508 . Ao - The local off-diagonal block as a SeqAIJ matrix 4509 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4510 4511 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4512 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4513 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4514 local column numbers to global column numbers in the original matrix. 4515 4516 Level: intermediate 4517 4518 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4519 @*/ 4520 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4521 { 4522 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4523 PetscBool flg; 4524 4525 PetscFunctionBegin; 4526 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 4527 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4528 if (Ad) *Ad = a->A; 4529 if (Ao) *Ao = a->B; 4530 if (colmap) *colmap = a->garray; 4531 PetscFunctionReturn(0); 4532 } 4533 4534 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4535 { 4536 PetscInt m,N,i,rstart,nnz,Ii; 4537 PetscInt *indx; 4538 PetscScalar *values; 4539 MatType rootType; 4540 4541 PetscFunctionBegin; 4542 PetscCall(MatGetSize(inmat,&m,&N)); 4543 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4544 PetscInt *dnz,*onz,sum,bs,cbs; 4545 4546 if (n == PETSC_DECIDE) { 4547 PetscCall(PetscSplitOwnership(comm,&n,&N)); 4548 } 4549 /* Check sum(n) = N */ 4550 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 4551 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4552 4553 PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 4554 rstart -= m; 4555 4556 MatPreallocateBegin(comm,m,n,dnz,onz); 4557 for (i=0; i<m; i++) { 4558 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4559 PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 4560 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4561 } 4562 4563 PetscCall(MatCreate(comm,outmat)); 4564 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4565 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 4566 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 4567 PetscCall(MatGetRootType_Private(inmat,&rootType)); 4568 PetscCall(MatSetType(*outmat,rootType)); 4569 PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 4570 PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4571 MatPreallocateEnd(dnz,onz); 4572 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 4573 } 4574 4575 /* numeric phase */ 4576 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 4577 for (i=0; i<m; i++) { 4578 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4579 Ii = i + rstart; 4580 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 4581 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4582 } 4583 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 4584 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4585 PetscFunctionReturn(0); 4586 } 4587 4588 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4589 { 4590 PetscMPIInt rank; 4591 PetscInt m,N,i,rstart,nnz; 4592 size_t len; 4593 const PetscInt *indx; 4594 PetscViewer out; 4595 char *name; 4596 Mat B; 4597 const PetscScalar *values; 4598 4599 PetscFunctionBegin; 4600 PetscCall(MatGetLocalSize(A,&m,NULL)); 4601 PetscCall(MatGetSize(A,NULL,&N)); 4602 /* Should this be the type of the diagonal block of A? */ 4603 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 4604 PetscCall(MatSetSizes(B,m,N,m,N)); 4605 PetscCall(MatSetBlockSizesFromMats(B,A,A)); 4606 PetscCall(MatSetType(B,MATSEQAIJ)); 4607 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 4608 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4609 for (i=0; i<m; i++) { 4610 PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 4611 PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 4612 PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4613 } 4614 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 4615 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4616 4617 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 4618 PetscCall(PetscStrlen(outfile,&len)); 4619 PetscCall(PetscMalloc1(len+6,&name)); 4620 PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 4621 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 4622 PetscCall(PetscFree(name)); 4623 PetscCall(MatView(B,out)); 4624 PetscCall(PetscViewerDestroy(&out)); 4625 PetscCall(MatDestroy(&B)); 4626 PetscFunctionReturn(0); 4627 } 4628 4629 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4630 { 4631 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4632 4633 PetscFunctionBegin; 4634 if (!merge) PetscFunctionReturn(0); 4635 PetscCall(PetscFree(merge->id_r)); 4636 PetscCall(PetscFree(merge->len_s)); 4637 PetscCall(PetscFree(merge->len_r)); 4638 PetscCall(PetscFree(merge->bi)); 4639 PetscCall(PetscFree(merge->bj)); 4640 PetscCall(PetscFree(merge->buf_ri[0])); 4641 PetscCall(PetscFree(merge->buf_ri)); 4642 PetscCall(PetscFree(merge->buf_rj[0])); 4643 PetscCall(PetscFree(merge->buf_rj)); 4644 PetscCall(PetscFree(merge->coi)); 4645 PetscCall(PetscFree(merge->coj)); 4646 PetscCall(PetscFree(merge->owners_co)); 4647 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4648 PetscCall(PetscFree(merge)); 4649 PetscFunctionReturn(0); 4650 } 4651 4652 #include <../src/mat/utils/freespace.h> 4653 #include <petscbt.h> 4654 4655 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4656 { 4657 MPI_Comm comm; 4658 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4659 PetscMPIInt size,rank,taga,*len_s; 4660 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4661 PetscInt proc,m; 4662 PetscInt **buf_ri,**buf_rj; 4663 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4664 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4665 MPI_Request *s_waits,*r_waits; 4666 MPI_Status *status; 4667 const MatScalar *aa,*a_a; 4668 MatScalar **abuf_r,*ba_i; 4669 Mat_Merge_SeqsToMPI *merge; 4670 PetscContainer container; 4671 4672 PetscFunctionBegin; 4673 PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 4674 PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 4675 4676 PetscCallMPI(MPI_Comm_size(comm,&size)); 4677 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4678 4679 PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 4680 PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4681 PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 4682 PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4683 aa = a_a; 4684 4685 bi = merge->bi; 4686 bj = merge->bj; 4687 buf_ri = merge->buf_ri; 4688 buf_rj = merge->buf_rj; 4689 4690 PetscCall(PetscMalloc1(size,&status)); 4691 owners = merge->rowmap->range; 4692 len_s = merge->len_s; 4693 4694 /* send and recv matrix values */ 4695 /*-----------------------------*/ 4696 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 4697 PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 4698 4699 PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 4700 for (proc=0,k=0; proc<size; proc++) { 4701 if (!len_s[proc]) continue; 4702 i = owners[proc]; 4703 PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 4704 k++; 4705 } 4706 4707 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 4708 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 4709 PetscCall(PetscFree(status)); 4710 4711 PetscCall(PetscFree(s_waits)); 4712 PetscCall(PetscFree(r_waits)); 4713 4714 /* insert mat values of mpimat */ 4715 /*----------------------------*/ 4716 PetscCall(PetscMalloc1(N,&ba_i)); 4717 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4718 4719 for (k=0; k<merge->nrecv; k++) { 4720 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4721 nrows = *(buf_ri_k[k]); 4722 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4723 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4724 } 4725 4726 /* set values of ba */ 4727 m = merge->rowmap->n; 4728 for (i=0; i<m; i++) { 4729 arow = owners[rank] + i; 4730 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4731 bnzi = bi[i+1] - bi[i]; 4732 PetscCall(PetscArrayzero(ba_i,bnzi)); 4733 4734 /* add local non-zero vals of this proc's seqmat into ba */ 4735 anzi = ai[arow+1] - ai[arow]; 4736 aj = a->j + ai[arow]; 4737 aa = a_a + ai[arow]; 4738 nextaj = 0; 4739 for (j=0; nextaj<anzi; j++) { 4740 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4741 ba_i[j] += aa[nextaj++]; 4742 } 4743 } 4744 4745 /* add received vals into ba */ 4746 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4747 /* i-th row */ 4748 if (i == *nextrow[k]) { 4749 anzi = *(nextai[k]+1) - *nextai[k]; 4750 aj = buf_rj[k] + *(nextai[k]); 4751 aa = abuf_r[k] + *(nextai[k]); 4752 nextaj = 0; 4753 for (j=0; nextaj<anzi; j++) { 4754 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4755 ba_i[j] += aa[nextaj++]; 4756 } 4757 } 4758 nextrow[k]++; nextai[k]++; 4759 } 4760 } 4761 PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 4762 } 4763 PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 4764 PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 4765 PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 4766 4767 PetscCall(PetscFree(abuf_r[0])); 4768 PetscCall(PetscFree(abuf_r)); 4769 PetscCall(PetscFree(ba_i)); 4770 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4771 PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 4772 PetscFunctionReturn(0); 4773 } 4774 4775 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4776 { 4777 Mat B_mpi; 4778 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4779 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4780 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4781 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4782 PetscInt len,proc,*dnz,*onz,bs,cbs; 4783 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi; 4784 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4785 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4786 MPI_Status *status; 4787 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4788 PetscBT lnkbt; 4789 Mat_Merge_SeqsToMPI *merge; 4790 PetscContainer container; 4791 4792 PetscFunctionBegin; 4793 PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 4794 4795 /* make sure it is a PETSc comm */ 4796 PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 4797 PetscCallMPI(MPI_Comm_size(comm,&size)); 4798 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4799 4800 PetscCall(PetscNew(&merge)); 4801 PetscCall(PetscMalloc1(size,&status)); 4802 4803 /* determine row ownership */ 4804 /*---------------------------------------------------------*/ 4805 PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 4806 PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 4807 PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 4808 PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 4809 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4810 PetscCall(PetscMalloc1(size,&len_si)); 4811 PetscCall(PetscMalloc1(size,&merge->len_s)); 4812 4813 m = merge->rowmap->n; 4814 owners = merge->rowmap->range; 4815 4816 /* determine the number of messages to send, their lengths */ 4817 /*---------------------------------------------------------*/ 4818 len_s = merge->len_s; 4819 4820 len = 0; /* length of buf_si[] */ 4821 merge->nsend = 0; 4822 for (proc=0; proc<size; proc++) { 4823 len_si[proc] = 0; 4824 if (proc == rank) { 4825 len_s[proc] = 0; 4826 } else { 4827 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4828 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4829 } 4830 if (len_s[proc]) { 4831 merge->nsend++; 4832 nrows = 0; 4833 for (i=owners[proc]; i<owners[proc+1]; i++) { 4834 if (ai[i+1] > ai[i]) nrows++; 4835 } 4836 len_si[proc] = 2*(nrows+1); 4837 len += len_si[proc]; 4838 } 4839 } 4840 4841 /* determine the number and length of messages to receive for ij-structure */ 4842 /*-------------------------------------------------------------------------*/ 4843 PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 4844 PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4845 4846 /* post the Irecv of j-structure */ 4847 /*-------------------------------*/ 4848 PetscCall(PetscCommGetNewTag(comm,&tagj)); 4849 PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 4850 4851 /* post the Isend of j-structure */ 4852 /*--------------------------------*/ 4853 PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 4854 4855 for (proc=0, k=0; proc<size; proc++) { 4856 if (!len_s[proc]) continue; 4857 i = owners[proc]; 4858 PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 4859 k++; 4860 } 4861 4862 /* receives and sends of j-structure are complete */ 4863 /*------------------------------------------------*/ 4864 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 4865 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 4866 4867 /* send and recv i-structure */ 4868 /*---------------------------*/ 4869 PetscCall(PetscCommGetNewTag(comm,&tagi)); 4870 PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 4871 4872 PetscCall(PetscMalloc1(len+1,&buf_s)); 4873 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4874 for (proc=0,k=0; proc<size; proc++) { 4875 if (!len_s[proc]) continue; 4876 /* form outgoing message for i-structure: 4877 buf_si[0]: nrows to be sent 4878 [1:nrows]: row index (global) 4879 [nrows+1:2*nrows+1]: i-structure index 4880 */ 4881 /*-------------------------------------------*/ 4882 nrows = len_si[proc]/2 - 1; 4883 buf_si_i = buf_si + nrows+1; 4884 buf_si[0] = nrows; 4885 buf_si_i[0] = 0; 4886 nrows = 0; 4887 for (i=owners[proc]; i<owners[proc+1]; i++) { 4888 anzi = ai[i+1] - ai[i]; 4889 if (anzi) { 4890 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4891 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4892 nrows++; 4893 } 4894 } 4895 PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 4896 k++; 4897 buf_si += len_si[proc]; 4898 } 4899 4900 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 4901 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 4902 4903 PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 4904 for (i=0; i<merge->nrecv; i++) { 4905 PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 4906 } 4907 4908 PetscCall(PetscFree(len_si)); 4909 PetscCall(PetscFree(len_ri)); 4910 PetscCall(PetscFree(rj_waits)); 4911 PetscCall(PetscFree2(si_waits,sj_waits)); 4912 PetscCall(PetscFree(ri_waits)); 4913 PetscCall(PetscFree(buf_s)); 4914 PetscCall(PetscFree(status)); 4915 4916 /* compute a local seq matrix in each processor */ 4917 /*----------------------------------------------*/ 4918 /* allocate bi array and free space for accumulating nonzero column info */ 4919 PetscCall(PetscMalloc1(m+1,&bi)); 4920 bi[0] = 0; 4921 4922 /* create and initialize a linked list */ 4923 nlnk = N+1; 4924 PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 4925 4926 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4927 len = ai[owners[rank+1]] - ai[owners[rank]]; 4928 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 4929 4930 current_space = free_space; 4931 4932 /* determine symbolic info for each local row */ 4933 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4934 4935 for (k=0; k<merge->nrecv; k++) { 4936 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4937 nrows = *buf_ri_k[k]; 4938 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4939 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4940 } 4941 4942 MatPreallocateBegin(comm,m,n,dnz,onz); 4943 len = 0; 4944 for (i=0; i<m; i++) { 4945 bnzi = 0; 4946 /* add local non-zero cols of this proc's seqmat into lnk */ 4947 arow = owners[rank] + i; 4948 anzi = ai[arow+1] - ai[arow]; 4949 aj = a->j + ai[arow]; 4950 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4951 bnzi += nlnk; 4952 /* add received col data into lnk */ 4953 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4954 if (i == *nextrow[k]) { /* i-th row */ 4955 anzi = *(nextai[k]+1) - *nextai[k]; 4956 aj = buf_rj[k] + *nextai[k]; 4957 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4958 bnzi += nlnk; 4959 nextrow[k]++; nextai[k]++; 4960 } 4961 } 4962 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4963 4964 /* if free space is not available, make more free space */ 4965 if (current_space->local_remaining<bnzi) { 4966 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 4967 } 4968 /* copy data into free space, then initialize lnk */ 4969 PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 4970 PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4971 4972 current_space->array += bnzi; 4973 current_space->local_used += bnzi; 4974 current_space->local_remaining -= bnzi; 4975 4976 bi[i+1] = bi[i] + bnzi; 4977 } 4978 4979 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4980 4981 PetscCall(PetscMalloc1(bi[m]+1,&bj)); 4982 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4983 PetscCall(PetscLLDestroy(lnk,lnkbt)); 4984 4985 /* create symbolic parallel matrix B_mpi */ 4986 /*---------------------------------------*/ 4987 PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 4988 PetscCall(MatCreate(comm,&B_mpi)); 4989 if (n==PETSC_DECIDE) { 4990 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 4991 } else { 4992 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4993 } 4994 PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 4995 PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 4996 PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 4997 MatPreallocateEnd(dnz,onz); 4998 PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 4999 5000 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5001 B_mpi->assembled = PETSC_FALSE; 5002 merge->bi = bi; 5003 merge->bj = bj; 5004 merge->buf_ri = buf_ri; 5005 merge->buf_rj = buf_rj; 5006 merge->coi = NULL; 5007 merge->coj = NULL; 5008 merge->owners_co = NULL; 5009 5010 PetscCall(PetscCommDestroy(&comm)); 5011 5012 /* attach the supporting struct to B_mpi for reuse */ 5013 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 5014 PetscCall(PetscContainerSetPointer(container,merge)); 5015 PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 5016 PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 5017 PetscCall(PetscContainerDestroy(&container)); 5018 *mpimat = B_mpi; 5019 5020 PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 5021 PetscFunctionReturn(0); 5022 } 5023 5024 /*@C 5025 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5026 matrices from each processor 5027 5028 Collective 5029 5030 Input Parameters: 5031 + comm - the communicators the parallel matrix will live on 5032 . seqmat - the input sequential matrices 5033 . m - number of local rows (or PETSC_DECIDE) 5034 . n - number of local columns (or PETSC_DECIDE) 5035 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5036 5037 Output Parameter: 5038 . mpimat - the parallel matrix generated 5039 5040 Level: advanced 5041 5042 Notes: 5043 The dimensions of the sequential matrix in each processor MUST be the same. 5044 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5045 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5046 @*/ 5047 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5048 { 5049 PetscMPIInt size; 5050 5051 PetscFunctionBegin; 5052 PetscCallMPI(MPI_Comm_size(comm,&size)); 5053 if (size == 1) { 5054 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 5055 if (scall == MAT_INITIAL_MATRIX) { 5056 PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 5057 } else { 5058 PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 5059 } 5060 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 5061 PetscFunctionReturn(0); 5062 } 5063 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 5064 if (scall == MAT_INITIAL_MATRIX) { 5065 PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 5066 } 5067 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 5068 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 5069 PetscFunctionReturn(0); 5070 } 5071 5072 /*@ 5073 MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5074 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5075 with MatGetSize() 5076 5077 Not Collective 5078 5079 Input Parameters: 5080 + A - the matrix 5081 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5082 5083 Output Parameter: 5084 . A_loc - the local sequential matrix generated 5085 5086 Level: developer 5087 5088 Notes: 5089 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5090 5091 Destroy the matrix with MatDestroy() 5092 5093 .seealso: MatMPIAIJGetLocalMat() 5094 5095 @*/ 5096 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc) 5097 { 5098 PetscBool mpi; 5099 5100 PetscFunctionBegin; 5101 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi)); 5102 if (mpi) { 5103 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc)); 5104 } else { 5105 *A_loc = A; 5106 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5107 } 5108 PetscFunctionReturn(0); 5109 } 5110 5111 /*@ 5112 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5113 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5114 with MatGetSize() 5115 5116 Not Collective 5117 5118 Input Parameters: 5119 + A - the matrix 5120 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5121 5122 Output Parameter: 5123 . A_loc - the local sequential matrix generated 5124 5125 Level: developer 5126 5127 Notes: 5128 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5129 5130 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5131 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5132 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5133 modify the values of the returned A_loc. 5134 5135 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5136 @*/ 5137 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5138 { 5139 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5140 Mat_SeqAIJ *mat,*a,*b; 5141 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5142 const PetscScalar *aa,*ba,*aav,*bav; 5143 PetscScalar *ca,*cam; 5144 PetscMPIInt size; 5145 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5146 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5147 PetscBool match; 5148 5149 PetscFunctionBegin; 5150 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 5151 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5152 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5153 if (size == 1) { 5154 if (scall == MAT_INITIAL_MATRIX) { 5155 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5156 *A_loc = mpimat->A; 5157 } else if (scall == MAT_REUSE_MATRIX) { 5158 PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 5159 } 5160 PetscFunctionReturn(0); 5161 } 5162 5163 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5164 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5165 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5166 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5167 PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 5168 PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5169 aa = aav; 5170 ba = bav; 5171 if (scall == MAT_INITIAL_MATRIX) { 5172 PetscCall(PetscMalloc1(1+am,&ci)); 5173 ci[0] = 0; 5174 for (i=0; i<am; i++) { 5175 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5176 } 5177 PetscCall(PetscMalloc1(1+ci[am],&cj)); 5178 PetscCall(PetscMalloc1(1+ci[am],&ca)); 5179 k = 0; 5180 for (i=0; i<am; i++) { 5181 ncols_o = bi[i+1] - bi[i]; 5182 ncols_d = ai[i+1] - ai[i]; 5183 /* off-diagonal portion of A */ 5184 for (jo=0; jo<ncols_o; jo++) { 5185 col = cmap[*bj]; 5186 if (col >= cstart) break; 5187 cj[k] = col; bj++; 5188 ca[k++] = *ba++; 5189 } 5190 /* diagonal portion of A */ 5191 for (j=0; j<ncols_d; j++) { 5192 cj[k] = cstart + *aj++; 5193 ca[k++] = *aa++; 5194 } 5195 /* off-diagonal portion of A */ 5196 for (j=jo; j<ncols_o; j++) { 5197 cj[k] = cmap[*bj++]; 5198 ca[k++] = *ba++; 5199 } 5200 } 5201 /* put together the new matrix */ 5202 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5203 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5204 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5205 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5206 mat->free_a = PETSC_TRUE; 5207 mat->free_ij = PETSC_TRUE; 5208 mat->nonew = 0; 5209 } else if (scall == MAT_REUSE_MATRIX) { 5210 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5211 ci = mat->i; 5212 cj = mat->j; 5213 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 5214 for (i=0; i<am; i++) { 5215 /* off-diagonal portion of A */ 5216 ncols_o = bi[i+1] - bi[i]; 5217 for (jo=0; jo<ncols_o; jo++) { 5218 col = cmap[*bj]; 5219 if (col >= cstart) break; 5220 *cam++ = *ba++; bj++; 5221 } 5222 /* diagonal portion of A */ 5223 ncols_d = ai[i+1] - ai[i]; 5224 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5225 /* off-diagonal portion of A */ 5226 for (j=jo; j<ncols_o; j++) { 5227 *cam++ = *ba++; bj++; 5228 } 5229 } 5230 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 5231 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5232 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 5233 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 5234 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5235 PetscFunctionReturn(0); 5236 } 5237 5238 /*@ 5239 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5240 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5241 5242 Not Collective 5243 5244 Input Parameters: 5245 + A - the matrix 5246 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5247 5248 Output Parameters: 5249 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5250 - A_loc - the local sequential matrix generated 5251 5252 Level: developer 5253 5254 Notes: 5255 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5256 5257 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5258 5259 @*/ 5260 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5261 { 5262 Mat Ao,Ad; 5263 const PetscInt *cmap; 5264 PetscMPIInt size; 5265 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5266 5267 PetscFunctionBegin; 5268 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 5269 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5270 if (size == 1) { 5271 if (scall == MAT_INITIAL_MATRIX) { 5272 PetscCall(PetscObjectReference((PetscObject)Ad)); 5273 *A_loc = Ad; 5274 } else if (scall == MAT_REUSE_MATRIX) { 5275 PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5276 } 5277 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5278 PetscFunctionReturn(0); 5279 } 5280 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 5281 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5282 if (f) { 5283 PetscCall((*f)(A,scall,glob,A_loc)); 5284 } else { 5285 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5286 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5287 Mat_SeqAIJ *c; 5288 PetscInt *ai = a->i, *aj = a->j; 5289 PetscInt *bi = b->i, *bj = b->j; 5290 PetscInt *ci,*cj; 5291 const PetscScalar *aa,*ba; 5292 PetscScalar *ca; 5293 PetscInt i,j,am,dn,on; 5294 5295 PetscCall(MatGetLocalSize(Ad,&am,&dn)); 5296 PetscCall(MatGetLocalSize(Ao,NULL,&on)); 5297 PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 5298 PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5299 if (scall == MAT_INITIAL_MATRIX) { 5300 PetscInt k; 5301 PetscCall(PetscMalloc1(1+am,&ci)); 5302 PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 5303 PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5304 ci[0] = 0; 5305 for (i=0,k=0; i<am; i++) { 5306 const PetscInt ncols_o = bi[i+1] - bi[i]; 5307 const PetscInt ncols_d = ai[i+1] - ai[i]; 5308 ci[i+1] = ci[i] + ncols_o + ncols_d; 5309 /* diagonal portion of A */ 5310 for (j=0; j<ncols_d; j++,k++) { 5311 cj[k] = *aj++; 5312 ca[k] = *aa++; 5313 } 5314 /* off-diagonal portion of A */ 5315 for (j=0; j<ncols_o; j++,k++) { 5316 cj[k] = dn + *bj++; 5317 ca[k] = *ba++; 5318 } 5319 } 5320 /* put together the new matrix */ 5321 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5322 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5323 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5324 c = (Mat_SeqAIJ*)(*A_loc)->data; 5325 c->free_a = PETSC_TRUE; 5326 c->free_ij = PETSC_TRUE; 5327 c->nonew = 0; 5328 PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5329 } else if (scall == MAT_REUSE_MATRIX) { 5330 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5331 for (i=0; i<am; i++) { 5332 const PetscInt ncols_d = ai[i+1] - ai[i]; 5333 const PetscInt ncols_o = bi[i+1] - bi[i]; 5334 /* diagonal portion of A */ 5335 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5336 /* off-diagonal portion of A */ 5337 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5338 } 5339 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 5340 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5341 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 5342 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5343 if (glob) { 5344 PetscInt cst, *gidx; 5345 5346 PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 5347 PetscCall(PetscMalloc1(dn+on,&gidx)); 5348 for (i=0; i<dn; i++) gidx[i] = cst + i; 5349 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5350 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5351 } 5352 } 5353 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5354 PetscFunctionReturn(0); 5355 } 5356 5357 /*@C 5358 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5359 5360 Not Collective 5361 5362 Input Parameters: 5363 + A - the matrix 5364 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5365 - row, col - index sets of rows and columns to extract (or NULL) 5366 5367 Output Parameter: 5368 . A_loc - the local sequential matrix generated 5369 5370 Level: developer 5371 5372 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5373 5374 @*/ 5375 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5376 { 5377 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5378 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5379 IS isrowa,iscola; 5380 Mat *aloc; 5381 PetscBool match; 5382 5383 PetscFunctionBegin; 5384 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 5385 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5386 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 5387 if (!row) { 5388 start = A->rmap->rstart; end = A->rmap->rend; 5389 PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 5390 } else { 5391 isrowa = *row; 5392 } 5393 if (!col) { 5394 start = A->cmap->rstart; 5395 cmap = a->garray; 5396 nzA = a->A->cmap->n; 5397 nzB = a->B->cmap->n; 5398 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5399 ncols = 0; 5400 for (i=0; i<nzB; i++) { 5401 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5402 else break; 5403 } 5404 imark = i; 5405 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5406 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5407 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 5408 } else { 5409 iscola = *col; 5410 } 5411 if (scall != MAT_INITIAL_MATRIX) { 5412 PetscCall(PetscMalloc1(1,&aloc)); 5413 aloc[0] = *A_loc; 5414 } 5415 PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5416 if (!col) { /* attach global id of condensed columns */ 5417 PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5418 } 5419 *A_loc = aloc[0]; 5420 PetscCall(PetscFree(aloc)); 5421 if (!row) { 5422 PetscCall(ISDestroy(&isrowa)); 5423 } 5424 if (!col) { 5425 PetscCall(ISDestroy(&iscola)); 5426 } 5427 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 5428 PetscFunctionReturn(0); 5429 } 5430 5431 /* 5432 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5433 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5434 * on a global size. 5435 * */ 5436 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5437 { 5438 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5439 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5440 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5441 PetscMPIInt owner; 5442 PetscSFNode *iremote,*oiremote; 5443 const PetscInt *lrowindices; 5444 PetscSF sf,osf; 5445 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5446 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5447 MPI_Comm comm; 5448 ISLocalToGlobalMapping mapping; 5449 const PetscScalar *pd_a,*po_a; 5450 5451 PetscFunctionBegin; 5452 PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 5453 /* plocalsize is the number of roots 5454 * nrows is the number of leaves 5455 * */ 5456 PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 5457 PetscCall(ISGetLocalSize(rows,&nrows)); 5458 PetscCall(PetscCalloc1(nrows,&iremote)); 5459 PetscCall(ISGetIndices(rows,&lrowindices)); 5460 for (i=0;i<nrows;i++) { 5461 /* Find a remote index and an owner for a row 5462 * The row could be local or remote 5463 * */ 5464 owner = 0; 5465 lidx = 0; 5466 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 5467 iremote[i].index = lidx; 5468 iremote[i].rank = owner; 5469 } 5470 /* Create SF to communicate how many nonzero columns for each row */ 5471 PetscCall(PetscSFCreate(comm,&sf)); 5472 /* SF will figure out the number of nonzero colunms for each row, and their 5473 * offsets 5474 * */ 5475 PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5476 PetscCall(PetscSFSetFromOptions(sf)); 5477 PetscCall(PetscSFSetUp(sf)); 5478 5479 PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 5480 PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 5481 PetscCall(PetscCalloc1(nrows,&pnnz)); 5482 roffsets[0] = 0; 5483 roffsets[1] = 0; 5484 for (i=0;i<plocalsize;i++) { 5485 /* diag */ 5486 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5487 /* off diag */ 5488 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5489 /* compute offsets so that we relative location for each row */ 5490 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5491 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5492 } 5493 PetscCall(PetscCalloc1(2*nrows,&nlcols)); 5494 PetscCall(PetscCalloc1(2*nrows,&loffsets)); 5495 /* 'r' means root, and 'l' means leaf */ 5496 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5497 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5498 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5499 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5500 PetscCall(PetscSFDestroy(&sf)); 5501 PetscCall(PetscFree(roffsets)); 5502 PetscCall(PetscFree(nrcols)); 5503 dntotalcols = 0; 5504 ontotalcols = 0; 5505 ncol = 0; 5506 for (i=0;i<nrows;i++) { 5507 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5508 ncol = PetscMax(pnnz[i],ncol); 5509 /* diag */ 5510 dntotalcols += nlcols[i*2+0]; 5511 /* off diag */ 5512 ontotalcols += nlcols[i*2+1]; 5513 } 5514 /* We do not need to figure the right number of columns 5515 * since all the calculations will be done by going through the raw data 5516 * */ 5517 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 5518 PetscCall(MatSetUp(*P_oth)); 5519 PetscCall(PetscFree(pnnz)); 5520 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5521 /* diag */ 5522 PetscCall(PetscCalloc1(dntotalcols,&iremote)); 5523 /* off diag */ 5524 PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 5525 /* diag */ 5526 PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 5527 /* off diag */ 5528 PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 5529 dntotalcols = 0; 5530 ontotalcols = 0; 5531 ntotalcols = 0; 5532 for (i=0;i<nrows;i++) { 5533 owner = 0; 5534 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 5535 /* Set iremote for diag matrix */ 5536 for (j=0;j<nlcols[i*2+0];j++) { 5537 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5538 iremote[dntotalcols].rank = owner; 5539 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5540 ilocal[dntotalcols++] = ntotalcols++; 5541 } 5542 /* off diag */ 5543 for (j=0;j<nlcols[i*2+1];j++) { 5544 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5545 oiremote[ontotalcols].rank = owner; 5546 oilocal[ontotalcols++] = ntotalcols++; 5547 } 5548 } 5549 PetscCall(ISRestoreIndices(rows,&lrowindices)); 5550 PetscCall(PetscFree(loffsets)); 5551 PetscCall(PetscFree(nlcols)); 5552 PetscCall(PetscSFCreate(comm,&sf)); 5553 /* P serves as roots and P_oth is leaves 5554 * Diag matrix 5555 * */ 5556 PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5557 PetscCall(PetscSFSetFromOptions(sf)); 5558 PetscCall(PetscSFSetUp(sf)); 5559 5560 PetscCall(PetscSFCreate(comm,&osf)); 5561 /* Off diag */ 5562 PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 5563 PetscCall(PetscSFSetFromOptions(osf)); 5564 PetscCall(PetscSFSetUp(osf)); 5565 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5566 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5567 /* We operate on the matrix internal data for saving memory */ 5568 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5569 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5570 PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 5571 /* Convert to global indices for diag matrix */ 5572 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5573 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5574 /* We want P_oth store global indices */ 5575 PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 5576 /* Use memory scalable approach */ 5577 PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 5578 PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 5579 PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5580 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5581 /* Convert back to local indices */ 5582 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5583 PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5584 nout = 0; 5585 PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 5586 PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5587 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5588 /* Exchange values */ 5589 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5590 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5591 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5592 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5593 /* Stop PETSc from shrinking memory */ 5594 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5595 PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 5596 PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 5597 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5598 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 5599 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 5600 PetscCall(PetscSFDestroy(&sf)); 5601 PetscCall(PetscSFDestroy(&osf)); 5602 PetscFunctionReturn(0); 5603 } 5604 5605 /* 5606 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5607 * This supports MPIAIJ and MAIJ 5608 * */ 5609 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5610 { 5611 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5612 Mat_SeqAIJ *p_oth; 5613 IS rows,map; 5614 PetscHMapI hamp; 5615 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5616 MPI_Comm comm; 5617 PetscSF sf,osf; 5618 PetscBool has; 5619 5620 PetscFunctionBegin; 5621 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5622 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 5623 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5624 * and then create a submatrix (that often is an overlapping matrix) 5625 * */ 5626 if (reuse == MAT_INITIAL_MATRIX) { 5627 /* Use a hash table to figure out unique keys */ 5628 PetscCall(PetscHMapICreate(&hamp)); 5629 PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 5630 PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5631 count = 0; 5632 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5633 for (i=0;i<a->B->cmap->n;i++) { 5634 key = a->garray[i]/dof; 5635 PetscCall(PetscHMapIHas(hamp,key,&has)); 5636 if (!has) { 5637 mapping[i] = count; 5638 PetscCall(PetscHMapISet(hamp,key,count++)); 5639 } else { 5640 /* Current 'i' has the same value the previous step */ 5641 mapping[i] = count-1; 5642 } 5643 } 5644 PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 5645 PetscCall(PetscHMapIGetSize(hamp,&htsize)); 5646 PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5647 PetscCall(PetscCalloc1(htsize,&rowindices)); 5648 off = 0; 5649 PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 5650 PetscCall(PetscHMapIDestroy(&hamp)); 5651 PetscCall(PetscSortInt(htsize,rowindices)); 5652 PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 5653 /* In case, the matrix was already created but users want to recreate the matrix */ 5654 PetscCall(MatDestroy(P_oth)); 5655 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 5656 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 5657 PetscCall(ISDestroy(&map)); 5658 PetscCall(ISDestroy(&rows)); 5659 } else if (reuse == MAT_REUSE_MATRIX) { 5660 /* If matrix was already created, we simply update values using SF objects 5661 * that as attached to the matrix ealier. 5662 */ 5663 const PetscScalar *pd_a,*po_a; 5664 5665 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 5666 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 5667 PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5668 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5669 /* Update values in place */ 5670 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5671 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5672 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5673 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5674 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5675 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5676 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5677 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5678 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5679 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 5680 PetscFunctionReturn(0); 5681 } 5682 5683 /*@C 5684 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5685 5686 Collective on Mat 5687 5688 Input Parameters: 5689 + A - the first matrix in mpiaij format 5690 . B - the second matrix in mpiaij format 5691 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5692 5693 Output Parameters: 5694 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5695 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5696 - B_seq - the sequential matrix generated 5697 5698 Level: developer 5699 5700 @*/ 5701 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5702 { 5703 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5704 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5705 IS isrowb,iscolb; 5706 Mat *bseq=NULL; 5707 5708 PetscFunctionBegin; 5709 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5710 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5711 } 5712 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 5713 5714 if (scall == MAT_INITIAL_MATRIX) { 5715 start = A->cmap->rstart; 5716 cmap = a->garray; 5717 nzA = a->A->cmap->n; 5718 nzB = a->B->cmap->n; 5719 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5720 ncols = 0; 5721 for (i=0; i<nzB; i++) { /* row < local row index */ 5722 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5723 else break; 5724 } 5725 imark = i; 5726 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5727 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5728 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 5729 PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 5730 } else { 5731 PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5732 isrowb = *rowb; iscolb = *colb; 5733 PetscCall(PetscMalloc1(1,&bseq)); 5734 bseq[0] = *B_seq; 5735 } 5736 PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 5737 *B_seq = bseq[0]; 5738 PetscCall(PetscFree(bseq)); 5739 if (!rowb) { 5740 PetscCall(ISDestroy(&isrowb)); 5741 } else { 5742 *rowb = isrowb; 5743 } 5744 if (!colb) { 5745 PetscCall(ISDestroy(&iscolb)); 5746 } else { 5747 *colb = iscolb; 5748 } 5749 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 5750 PetscFunctionReturn(0); 5751 } 5752 5753 /* 5754 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5755 of the OFF-DIAGONAL portion of local A 5756 5757 Collective on Mat 5758 5759 Input Parameters: 5760 + A,B - the matrices in mpiaij format 5761 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5762 5763 Output Parameter: 5764 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5765 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5766 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5767 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5768 5769 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5770 for this matrix. This is not desirable.. 5771 5772 Level: developer 5773 5774 */ 5775 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5776 { 5777 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5778 Mat_SeqAIJ *b_oth; 5779 VecScatter ctx; 5780 MPI_Comm comm; 5781 const PetscMPIInt *rprocs,*sprocs; 5782 const PetscInt *srow,*rstarts,*sstarts; 5783 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5784 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5785 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5786 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5787 PetscMPIInt size,tag,rank,nreqs; 5788 5789 PetscFunctionBegin; 5790 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5791 PetscCallMPI(MPI_Comm_size(comm,&size)); 5792 5793 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5794 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5795 } 5796 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 5797 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5798 5799 if (size == 1) { 5800 startsj_s = NULL; 5801 bufa_ptr = NULL; 5802 *B_oth = NULL; 5803 PetscFunctionReturn(0); 5804 } 5805 5806 ctx = a->Mvctx; 5807 tag = ((PetscObject)ctx)->tag; 5808 5809 PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5810 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5811 PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 5812 PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 5813 PetscCall(PetscMalloc1(nreqs,&reqs)); 5814 rwaits = reqs; 5815 swaits = reqs + nrecvs; 5816 5817 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5818 if (scall == MAT_INITIAL_MATRIX) { 5819 /* i-array */ 5820 /*---------*/ 5821 /* post receives */ 5822 if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5823 for (i=0; i<nrecvs; i++) { 5824 rowlen = rvalues + rstarts[i]*rbs; 5825 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5826 PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5827 } 5828 5829 /* pack the outgoing message */ 5830 PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 5831 5832 sstartsj[0] = 0; 5833 rstartsj[0] = 0; 5834 len = 0; /* total length of j or a array to be sent */ 5835 if (nsends) { 5836 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5837 PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 5838 } 5839 for (i=0; i<nsends; i++) { 5840 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5841 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5842 for (j=0; j<nrows; j++) { 5843 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5844 for (l=0; l<sbs; l++) { 5845 PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 5846 5847 rowlen[j*sbs+l] = ncols; 5848 5849 len += ncols; 5850 PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5851 } 5852 k++; 5853 } 5854 PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5855 5856 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5857 } 5858 /* recvs and sends of i-array are completed */ 5859 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5860 PetscCall(PetscFree(svalues)); 5861 5862 /* allocate buffers for sending j and a arrays */ 5863 PetscCall(PetscMalloc1(len+1,&bufj)); 5864 PetscCall(PetscMalloc1(len+1,&bufa)); 5865 5866 /* create i-array of B_oth */ 5867 PetscCall(PetscMalloc1(aBn+2,&b_othi)); 5868 5869 b_othi[0] = 0; 5870 len = 0; /* total length of j or a array to be received */ 5871 k = 0; 5872 for (i=0; i<nrecvs; i++) { 5873 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5874 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5875 for (j=0; j<nrows; j++) { 5876 b_othi[k+1] = b_othi[k] + rowlen[j]; 5877 PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5878 k++; 5879 } 5880 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5881 } 5882 PetscCall(PetscFree(rvalues)); 5883 5884 /* allocate space for j and a arrays of B_oth */ 5885 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 5886 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5887 5888 /* j-array */ 5889 /*---------*/ 5890 /* post receives of j-array */ 5891 for (i=0; i<nrecvs; i++) { 5892 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5893 PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5894 } 5895 5896 /* pack the outgoing message j-array */ 5897 if (nsends) k = sstarts[0]; 5898 for (i=0; i<nsends; i++) { 5899 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5900 bufJ = bufj+sstartsj[i]; 5901 for (j=0; j<nrows; j++) { 5902 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5903 for (ll=0; ll<sbs; ll++) { 5904 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5905 for (l=0; l<ncols; l++) { 5906 *bufJ++ = cols[l]; 5907 } 5908 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5909 } 5910 } 5911 PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5912 } 5913 5914 /* recvs and sends of j-array are completed */ 5915 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5916 } else if (scall == MAT_REUSE_MATRIX) { 5917 sstartsj = *startsj_s; 5918 rstartsj = *startsj_r; 5919 bufa = *bufa_ptr; 5920 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5921 PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5922 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5923 5924 /* a-array */ 5925 /*---------*/ 5926 /* post receives of a-array */ 5927 for (i=0; i<nrecvs; i++) { 5928 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5929 PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 5930 } 5931 5932 /* pack the outgoing message a-array */ 5933 if (nsends) k = sstarts[0]; 5934 for (i=0; i<nsends; i++) { 5935 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5936 bufA = bufa+sstartsj[i]; 5937 for (j=0; j<nrows; j++) { 5938 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5939 for (ll=0; ll<sbs; ll++) { 5940 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5941 for (l=0; l<ncols; l++) { 5942 *bufA++ = vals[l]; 5943 } 5944 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5945 } 5946 } 5947 PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5948 } 5949 /* recvs and sends of a-array are completed */ 5950 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5951 PetscCall(PetscFree(reqs)); 5952 5953 if (scall == MAT_INITIAL_MATRIX) { 5954 /* put together the new matrix */ 5955 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5956 5957 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5958 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5959 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5960 b_oth->free_a = PETSC_TRUE; 5961 b_oth->free_ij = PETSC_TRUE; 5962 b_oth->nonew = 0; 5963 5964 PetscCall(PetscFree(bufj)); 5965 if (!startsj_s || !bufa_ptr) { 5966 PetscCall(PetscFree2(sstartsj,rstartsj)); 5967 PetscCall(PetscFree(bufa_ptr)); 5968 } else { 5969 *startsj_s = sstartsj; 5970 *startsj_r = rstartsj; 5971 *bufa_ptr = bufa; 5972 } 5973 } else if (scall == MAT_REUSE_MATRIX) { 5974 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5975 } 5976 5977 PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5978 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 5979 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5980 PetscFunctionReturn(0); 5981 } 5982 5983 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5984 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5985 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5986 #if defined(PETSC_HAVE_MKL_SPARSE) 5987 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5988 #endif 5989 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5990 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5991 #if defined(PETSC_HAVE_ELEMENTAL) 5992 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5993 #endif 5994 #if defined(PETSC_HAVE_SCALAPACK) 5995 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5996 #endif 5997 #if defined(PETSC_HAVE_HYPRE) 5998 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5999 #endif 6000 #if defined(PETSC_HAVE_CUDA) 6001 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 6002 #endif 6003 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6004 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 6005 #endif 6006 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 6007 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 6008 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6009 6010 /* 6011 Computes (B'*A')' since computing B*A directly is untenable 6012 6013 n p p 6014 [ ] [ ] [ ] 6015 m [ A ] * n [ B ] = m [ C ] 6016 [ ] [ ] [ ] 6017 6018 */ 6019 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 6020 { 6021 Mat At,Bt,Ct; 6022 6023 PetscFunctionBegin; 6024 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 6025 PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 6026 PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 6027 PetscCall(MatDestroy(&At)); 6028 PetscCall(MatDestroy(&Bt)); 6029 PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 6030 PetscCall(MatDestroy(&Ct)); 6031 PetscFunctionReturn(0); 6032 } 6033 6034 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 6035 { 6036 PetscBool cisdense; 6037 6038 PetscFunctionBegin; 6039 PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 6040 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 6041 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 6042 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 6043 if (!cisdense) { 6044 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 6045 } 6046 PetscCall(MatSetUp(C)); 6047 6048 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6049 PetscFunctionReturn(0); 6050 } 6051 6052 /* ----------------------------------------------------------------*/ 6053 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6054 { 6055 Mat_Product *product = C->product; 6056 Mat A = product->A,B=product->B; 6057 6058 PetscFunctionBegin; 6059 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6060 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6061 6062 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6063 C->ops->productsymbolic = MatProductSymbolic_AB; 6064 PetscFunctionReturn(0); 6065 } 6066 6067 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6068 { 6069 Mat_Product *product = C->product; 6070 6071 PetscFunctionBegin; 6072 if (product->type == MATPRODUCT_AB) { 6073 PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6074 } 6075 PetscFunctionReturn(0); 6076 } 6077 6078 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6079 6080 Input Parameters: 6081 6082 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6083 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6084 6085 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6086 6087 For Set1, j1[] contains column indices of the nonzeros. 6088 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6089 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6090 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6091 6092 Similar for Set2. 6093 6094 This routine merges the two sets of nonzeros row by row and removes repeats. 6095 6096 Output Parameters: (memory is allocated by the caller) 6097 6098 i[],j[]: the CSR of the merged matrix, which has m rows. 6099 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6100 imap2[]: similar to imap1[], but for Set2. 6101 Note we order nonzeros row-by-row and from left to right. 6102 */ 6103 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 6104 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 6105 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 6106 { 6107 PetscInt r,m; /* Row index of mat */ 6108 PetscCount t,t1,t2,b1,e1,b2,e2; 6109 6110 PetscFunctionBegin; 6111 PetscCall(MatGetLocalSize(mat,&m,NULL)); 6112 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6113 i[0] = 0; 6114 for (r=0; r<m; r++) { /* Do row by row merging */ 6115 b1 = rowBegin1[r]; 6116 e1 = rowEnd1[r]; 6117 b2 = rowBegin2[r]; 6118 e2 = rowEnd2[r]; 6119 while (b1 < e1 && b2 < e2) { 6120 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6121 j[t] = j1[b1]; 6122 imap1[t1] = t; 6123 imap2[t2] = t; 6124 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6125 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6126 t1++; t2++; t++; 6127 } else if (j1[b1] < j2[b2]) { 6128 j[t] = j1[b1]; 6129 imap1[t1] = t; 6130 b1 += jmap1[t1+1] - jmap1[t1]; 6131 t1++; t++; 6132 } else { 6133 j[t] = j2[b2]; 6134 imap2[t2] = t; 6135 b2 += jmap2[t2+1] - jmap2[t2]; 6136 t2++; t++; 6137 } 6138 } 6139 /* Merge the remaining in either j1[] or j2[] */ 6140 while (b1 < e1) { 6141 j[t] = j1[b1]; 6142 imap1[t1] = t; 6143 b1 += jmap1[t1+1] - jmap1[t1]; 6144 t1++; t++; 6145 } 6146 while (b2 < e2) { 6147 j[t] = j2[b2]; 6148 imap2[t2] = t; 6149 b2 += jmap2[t2+1] - jmap2[t2]; 6150 t2++; t++; 6151 } 6152 i[r+1] = t; 6153 } 6154 PetscFunctionReturn(0); 6155 } 6156 6157 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6158 6159 Input Parameters: 6160 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6161 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6162 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6163 6164 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6165 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6166 6167 Output Parameters: 6168 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6169 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6170 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6171 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6172 6173 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6174 Atot: number of entries belonging to the diagonal block. 6175 Annz: number of unique nonzeros belonging to the diagonal block. 6176 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6177 repeats (i.e., same 'i,j' pair). 6178 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6179 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6180 6181 Atot: number of entries belonging to the diagonal block 6182 Annz: number of unique nonzeros belonging to the diagonal block. 6183 6184 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6185 6186 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6187 */ 6188 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6189 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6190 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6191 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6192 { 6193 PetscInt cstart,cend,rstart,rend,row,col; 6194 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6195 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6196 PetscCount k,m,p,q,r,s,mid; 6197 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6198 6199 PetscFunctionBegin; 6200 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6201 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6202 m = rend - rstart; 6203 6204 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6205 6206 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6207 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6208 */ 6209 while (k<n) { 6210 row = i[k]; 6211 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6212 for (s=k; s<n; s++) if (i[s] != row) break; 6213 for (p=k; p<s; p++) { 6214 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6215 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6216 } 6217 PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6218 PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6219 rowBegin[row-rstart] = k; 6220 rowMid[row-rstart] = mid; 6221 rowEnd[row-rstart] = s; 6222 6223 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6224 Atot += mid - k; 6225 Btot += s - mid; 6226 6227 /* Count unique nonzeros of this diag/offdiag row */ 6228 for (p=k; p<mid;) { 6229 col = j[p]; 6230 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6231 Annz++; 6232 } 6233 6234 for (p=mid; p<s;) { 6235 col = j[p]; 6236 do {p++;} while (p<s && j[p] == col); 6237 Bnnz++; 6238 } 6239 k = s; 6240 } 6241 6242 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6243 PetscCall(PetscMalloc1(Atot,&Aperm)); 6244 PetscCall(PetscMalloc1(Btot,&Bperm)); 6245 PetscCall(PetscMalloc1(Annz+1,&Ajmap)); 6246 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap)); 6247 6248 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6249 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6250 for (r=0; r<m; r++) { 6251 k = rowBegin[r]; 6252 mid = rowMid[r]; 6253 s = rowEnd[r]; 6254 PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 6255 PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6256 Atot += mid - k; 6257 Btot += s - mid; 6258 6259 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6260 for (p=k; p<mid;) { 6261 col = j[p]; 6262 q = p; 6263 do {p++;} while (p<mid && j[p] == col); 6264 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6265 Annz++; 6266 } 6267 6268 for (p=mid; p<s;) { 6269 col = j[p]; 6270 q = p; 6271 do {p++;} while (p<s && j[p] == col); 6272 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6273 Bnnz++; 6274 } 6275 } 6276 /* Output */ 6277 *Aperm_ = Aperm; 6278 *Annz_ = Annz; 6279 *Atot_ = Atot; 6280 *Ajmap_ = Ajmap; 6281 *Bperm_ = Bperm; 6282 *Bnnz_ = Bnnz; 6283 *Btot_ = Btot; 6284 *Bjmap_ = Bjmap; 6285 PetscFunctionReturn(0); 6286 } 6287 6288 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6289 6290 Input Parameters: 6291 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6292 nnz: number of unique nonzeros in the merged matrix 6293 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6294 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6295 6296 Output Parameter: (memory is allocated by the caller) 6297 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6298 6299 Example: 6300 nnz1 = 4 6301 nnz = 6 6302 imap = [1,3,4,5] 6303 jmap = [0,3,5,6,7] 6304 then, 6305 jmap_new = [0,0,3,3,5,6,7] 6306 */ 6307 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[]) 6308 { 6309 PetscCount k,p; 6310 6311 PetscFunctionBegin; 6312 jmap_new[0] = 0; 6313 p = nnz; /* p loops over jmap_new[] backwards */ 6314 for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */ 6315 for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1]; 6316 } 6317 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6318 PetscFunctionReturn(0); 6319 } 6320 6321 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6322 { 6323 MPI_Comm comm; 6324 PetscMPIInt rank,size; 6325 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6326 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6327 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6328 6329 PetscFunctionBegin; 6330 PetscCall(PetscFree(mpiaij->garray)); 6331 PetscCall(VecDestroy(&mpiaij->lvec)); 6332 #if defined(PETSC_USE_CTABLE) 6333 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6334 #else 6335 PetscCall(PetscFree(mpiaij->colmap)); 6336 #endif 6337 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6338 mat->assembled = PETSC_FALSE; 6339 mat->was_assembled = PETSC_FALSE; 6340 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6341 6342 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 6343 PetscCallMPI(MPI_Comm_size(comm,&size)); 6344 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 6345 PetscCall(PetscLayoutSetUp(mat->rmap)); 6346 PetscCall(PetscLayoutSetUp(mat->cmap)); 6347 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6348 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6349 PetscCall(MatGetLocalSize(mat,&m,&n)); 6350 PetscCall(MatGetSize(mat,&M,&N)); 6351 6352 /* ---------------------------------------------------------------------------*/ 6353 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6354 /* entries come first, then local rows, then remote rows. */ 6355 /* ---------------------------------------------------------------------------*/ 6356 PetscCount n1 = coo_n,*perm1; 6357 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6358 PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1)); 6359 PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */ 6360 PetscCall(PetscArraycpy(j1,coo_j,n1)); 6361 for (k=0; k<n1; k++) perm1[k] = k; 6362 6363 /* Manipulate indices so that entries with negative row or col indices will have smallest 6364 row indices, local entries will have greater but negative row indices, and remote entries 6365 will have positive row indices. 6366 */ 6367 for (k=0; k<n1; k++) { 6368 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6369 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6370 else { 6371 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6372 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6373 } 6374 } 6375 6376 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6377 PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6378 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6379 PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6380 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6381 6382 /* ---------------------------------------------------------------------------*/ 6383 /* Split local rows into diag/offdiag portions */ 6384 /* ---------------------------------------------------------------------------*/ 6385 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6386 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6387 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6388 6389 PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 6390 PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 6391 PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6392 6393 /* ---------------------------------------------------------------------------*/ 6394 /* Send remote rows to their owner */ 6395 /* ---------------------------------------------------------------------------*/ 6396 /* Find which rows should be sent to which remote ranks*/ 6397 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6398 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6399 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6400 const PetscInt *ranges; 6401 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6402 6403 PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 6404 PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6405 for (k=rem; k<n1;) { 6406 PetscMPIInt owner; 6407 PetscInt firstRow,lastRow; 6408 6409 /* Locate a row range */ 6410 firstRow = i1[k]; /* first row of this owner */ 6411 PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6412 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6413 6414 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6415 PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6416 6417 /* All entries in [k,p) belong to this remote owner */ 6418 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6419 PetscMPIInt *sendto2; 6420 PetscInt *nentries2; 6421 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6422 6423 PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 6424 PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 6425 PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 6426 PetscCall(PetscFree2(sendto,nentries2)); 6427 sendto = sendto2; 6428 nentries = nentries2; 6429 maxNsend = maxNsend2; 6430 } 6431 sendto[nsend] = owner; 6432 nentries[nsend] = p - k; 6433 PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6434 nsend++; 6435 k = p; 6436 } 6437 6438 /* Build 1st SF to know offsets on remote to send data */ 6439 PetscSF sf1; 6440 PetscInt nroots = 1,nroots2 = 0; 6441 PetscInt nleaves = nsend,nleaves2 = 0; 6442 PetscInt *offsets; 6443 PetscSFNode *iremote; 6444 6445 PetscCall(PetscSFCreate(comm,&sf1)); 6446 PetscCall(PetscMalloc1(nsend,&iremote)); 6447 PetscCall(PetscMalloc1(nsend,&offsets)); 6448 for (k=0; k<nsend; k++) { 6449 iremote[k].rank = sendto[k]; 6450 iremote[k].index = 0; 6451 nleaves2 += nentries[k]; 6452 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6453 } 6454 PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6455 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 6456 PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6457 PetscCall(PetscSFDestroy(&sf1)); 6458 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem); 6459 6460 /* Build 2nd SF to send remote COOs to their owner */ 6461 PetscSF sf2; 6462 nroots = nroots2; 6463 nleaves = nleaves2; 6464 PetscCall(PetscSFCreate(comm,&sf2)); 6465 PetscCall(PetscSFSetFromOptions(sf2)); 6466 PetscCall(PetscMalloc1(nleaves,&iremote)); 6467 p = 0; 6468 for (k=0; k<nsend; k++) { 6469 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6470 for (q=0; q<nentries[k]; q++,p++) { 6471 iremote[p].rank = sendto[k]; 6472 iremote[p].index = offsets[k] + q; 6473 } 6474 } 6475 PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6476 6477 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6478 PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6479 6480 /* Send the remote COOs to their owner */ 6481 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6482 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6483 PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 6484 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 6485 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 6486 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 6487 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6488 6489 PetscCall(PetscFree(offsets)); 6490 PetscCall(PetscFree2(sendto,nentries)); 6491 6492 /* ---------------------------------------------------------------*/ 6493 /* Sort received COOs by row along with the permutation array */ 6494 /* ---------------------------------------------------------------*/ 6495 for (k=0; k<n2; k++) perm2[k] = k; 6496 PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6497 6498 /* ---------------------------------------------------------------*/ 6499 /* Split received COOs into diag/offdiag portions */ 6500 /* ---------------------------------------------------------------*/ 6501 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6502 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6503 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6504 6505 PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 6506 PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6507 6508 /* --------------------------------------------------------------------------*/ 6509 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6510 /* --------------------------------------------------------------------------*/ 6511 PetscInt *Ai,*Bi; 6512 PetscInt *Aj,*Bj; 6513 6514 PetscCall(PetscMalloc1(m+1,&Ai)); 6515 PetscCall(PetscMalloc1(m+1,&Bi)); 6516 PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6517 PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6518 6519 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6520 PetscCall(PetscMalloc1(Annz1,&Aimap1)); 6521 PetscCall(PetscMalloc1(Bnnz1,&Bimap1)); 6522 PetscCall(PetscMalloc1(Annz2,&Aimap2)); 6523 PetscCall(PetscMalloc1(Bnnz2,&Bimap2)); 6524 6525 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 6526 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6527 6528 /* --------------------------------------------------------------------------*/ 6529 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6530 /* expect nonzeros in A/B most likely have local contributing entries */ 6531 /* --------------------------------------------------------------------------*/ 6532 PetscInt Annz = Ai[m]; 6533 PetscInt Bnnz = Bi[m]; 6534 PetscCount *Ajmap1_new,*Bjmap1_new; 6535 6536 PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new)); 6537 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new)); 6538 6539 PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new)); 6540 PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new)); 6541 6542 PetscCall(PetscFree(Aimap1)); 6543 PetscCall(PetscFree(Ajmap1)); 6544 PetscCall(PetscFree(Bimap1)); 6545 PetscCall(PetscFree(Bjmap1)); 6546 PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 6547 PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 6548 PetscCall(PetscFree3(i1,j1,perm1)); 6549 PetscCall(PetscFree3(i2,j2,perm2)); 6550 6551 Ajmap1 = Ajmap1_new; 6552 Bjmap1 = Bjmap1_new; 6553 6554 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6555 if (Annz < Annz1 + Annz2) { 6556 PetscInt *Aj_new; 6557 PetscCall(PetscMalloc1(Annz,&Aj_new)); 6558 PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 6559 PetscCall(PetscFree(Aj)); 6560 Aj = Aj_new; 6561 } 6562 6563 if (Bnnz < Bnnz1 + Bnnz2) { 6564 PetscInt *Bj_new; 6565 PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 6566 PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 6567 PetscCall(PetscFree(Bj)); 6568 Bj = Bj_new; 6569 } 6570 6571 /* --------------------------------------------------------------------------------*/ 6572 /* Create new submatrices for on-process and off-process coupling */ 6573 /* --------------------------------------------------------------------------------*/ 6574 PetscScalar *Aa,*Ba; 6575 MatType rtype; 6576 Mat_SeqAIJ *a,*b; 6577 PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 6578 PetscCall(PetscCalloc1(Bnnz,&Ba)); 6579 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6580 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6581 PetscCall(MatDestroy(&mpiaij->A)); 6582 PetscCall(MatDestroy(&mpiaij->B)); 6583 PetscCall(MatGetRootType_Private(mat,&rtype)); 6584 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 6585 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 6586 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6587 6588 a = (Mat_SeqAIJ*)mpiaij->A->data; 6589 b = (Mat_SeqAIJ*)mpiaij->B->data; 6590 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6591 a->free_a = b->free_a = PETSC_TRUE; 6592 a->free_ij = b->free_ij = PETSC_TRUE; 6593 6594 /* conversion must happen AFTER multiply setup */ 6595 PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 6596 PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 6597 PetscCall(VecDestroy(&mpiaij->lvec)); 6598 PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 6599 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6600 6601 mpiaij->coo_n = coo_n; 6602 mpiaij->coo_sf = sf2; 6603 mpiaij->sendlen = nleaves; 6604 mpiaij->recvlen = nroots; 6605 6606 mpiaij->Annz = Annz; 6607 mpiaij->Bnnz = Bnnz; 6608 6609 mpiaij->Annz2 = Annz2; 6610 mpiaij->Bnnz2 = Bnnz2; 6611 6612 mpiaij->Atot1 = Atot1; 6613 mpiaij->Atot2 = Atot2; 6614 mpiaij->Btot1 = Btot1; 6615 mpiaij->Btot2 = Btot2; 6616 6617 mpiaij->Ajmap1 = Ajmap1; 6618 mpiaij->Aperm1 = Aperm1; 6619 6620 mpiaij->Bjmap1 = Bjmap1; 6621 mpiaij->Bperm1 = Bperm1; 6622 6623 mpiaij->Aimap2 = Aimap2; 6624 mpiaij->Ajmap2 = Ajmap2; 6625 mpiaij->Aperm2 = Aperm2; 6626 6627 mpiaij->Bimap2 = Bimap2; 6628 mpiaij->Bjmap2 = Bjmap2; 6629 mpiaij->Bperm2 = Bperm2; 6630 6631 mpiaij->Cperm1 = Cperm1; 6632 6633 /* Allocate in preallocation. If not used, it has zero cost on host */ 6634 PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6635 PetscFunctionReturn(0); 6636 } 6637 6638 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6639 { 6640 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6641 Mat A = mpiaij->A,B = mpiaij->B; 6642 PetscCount Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2; 6643 PetscScalar *Aa,*Ba; 6644 PetscScalar *sendbuf = mpiaij->sendbuf; 6645 PetscScalar *recvbuf = mpiaij->recvbuf; 6646 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2; 6647 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2; 6648 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6649 const PetscCount *Cperm1 = mpiaij->Cperm1; 6650 6651 PetscFunctionBegin; 6652 PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 6653 PetscCall(MatSeqAIJGetArray(B,&Ba)); 6654 6655 /* Pack entries to be sent to remote */ 6656 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6657 6658 /* Send remote entries to their owner and overlap the communication with local computation */ 6659 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6660 /* Add local entries to A and B */ 6661 for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6662 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6663 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]]; 6664 Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum; 6665 } 6666 for (PetscCount i=0; i<Bnnz; i++) { 6667 PetscScalar sum = 0.0; 6668 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]]; 6669 Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum; 6670 } 6671 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6672 6673 /* Add received remote entries to A and B */ 6674 for (PetscCount i=0; i<Annz2; i++) { 6675 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6676 } 6677 for (PetscCount i=0; i<Bnnz2; i++) { 6678 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6679 } 6680 PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 6681 PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6682 PetscFunctionReturn(0); 6683 } 6684 6685 /* ----------------------------------------------------------------*/ 6686 6687 /*MC 6688 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6689 6690 Options Database Keys: 6691 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6692 6693 Level: beginner 6694 6695 Notes: 6696 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6697 in this case the values associated with the rows and columns one passes in are set to zero 6698 in the matrix 6699 6700 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6701 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6702 6703 .seealso: `MatCreateAIJ()` 6704 M*/ 6705 6706 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6707 { 6708 Mat_MPIAIJ *b; 6709 PetscMPIInt size; 6710 6711 PetscFunctionBegin; 6712 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 6713 6714 PetscCall(PetscNewLog(B,&b)); 6715 B->data = (void*)b; 6716 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6717 B->assembled = PETSC_FALSE; 6718 B->insertmode = NOT_SET_VALUES; 6719 b->size = size; 6720 6721 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6722 6723 /* build cache for off array entries formed */ 6724 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 6725 6726 b->donotstash = PETSC_FALSE; 6727 b->colmap = NULL; 6728 b->garray = NULL; 6729 b->roworiented = PETSC_TRUE; 6730 6731 /* stuff used for matrix vector multiply */ 6732 b->lvec = NULL; 6733 b->Mvctx = NULL; 6734 6735 /* stuff for MatGetRow() */ 6736 b->rowindices = NULL; 6737 b->rowvalues = NULL; 6738 b->getrowactive = PETSC_FALSE; 6739 6740 /* flexible pointer used in CUSPARSE classes */ 6741 b->spptr = NULL; 6742 6743 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6744 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 6745 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 6746 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 6747 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 6748 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 6749 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6750 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 6751 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 6752 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 6753 #if defined(PETSC_HAVE_CUDA) 6754 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6755 #endif 6756 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6757 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 6758 #endif 6759 #if defined(PETSC_HAVE_MKL_SPARSE) 6760 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6761 #endif 6762 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 6763 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 6764 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 6765 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 6766 #if defined(PETSC_HAVE_ELEMENTAL) 6767 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 6768 #endif 6769 #if defined(PETSC_HAVE_SCALAPACK) 6770 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6771 #endif 6772 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 6773 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 6774 #if defined(PETSC_HAVE_HYPRE) 6775 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 6776 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6777 #endif 6778 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 6779 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 6780 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 6781 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 6782 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6783 PetscFunctionReturn(0); 6784 } 6785 6786 /*@C 6787 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6788 and "off-diagonal" part of the matrix in CSR format. 6789 6790 Collective 6791 6792 Input Parameters: 6793 + comm - MPI communicator 6794 . m - number of local rows (Cannot be PETSC_DECIDE) 6795 . n - This value should be the same as the local size used in creating the 6796 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6797 calculated if N is given) For square matrices n is almost always m. 6798 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6799 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6800 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6801 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6802 . a - matrix values 6803 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6804 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6805 - oa - matrix values 6806 6807 Output Parameter: 6808 . mat - the matrix 6809 6810 Level: advanced 6811 6812 Notes: 6813 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6814 must free the arrays once the matrix has been destroyed and not before. 6815 6816 The i and j indices are 0 based 6817 6818 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6819 6820 This sets local rows and cannot be used to set off-processor values. 6821 6822 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6823 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6824 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6825 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6826 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6827 communication if it is known that only local entries will be set. 6828 6829 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6830 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6831 @*/ 6832 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6833 { 6834 Mat_MPIAIJ *maij; 6835 6836 PetscFunctionBegin; 6837 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6838 PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6839 PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6840 PetscCall(MatCreate(comm,mat)); 6841 PetscCall(MatSetSizes(*mat,m,n,M,N)); 6842 PetscCall(MatSetType(*mat,MATMPIAIJ)); 6843 maij = (Mat_MPIAIJ*) (*mat)->data; 6844 6845 (*mat)->preallocated = PETSC_TRUE; 6846 6847 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6848 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6849 6850 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 6851 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 6852 6853 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 6854 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 6855 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 6856 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 6857 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 6858 PetscFunctionReturn(0); 6859 } 6860 6861 typedef struct { 6862 Mat *mp; /* intermediate products */ 6863 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6864 PetscInt cp; /* number of intermediate products */ 6865 6866 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6867 PetscInt *startsj_s,*startsj_r; 6868 PetscScalar *bufa; 6869 Mat P_oth; 6870 6871 /* may take advantage of merging product->B */ 6872 Mat Bloc; /* B-local by merging diag and off-diag */ 6873 6874 /* cusparse does not have support to split between symbolic and numeric phases. 6875 When api_user is true, we don't need to update the numerical values 6876 of the temporary storage */ 6877 PetscBool reusesym; 6878 6879 /* support for COO values insertion */ 6880 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6881 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6882 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6883 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6884 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6885 PetscMemType mtype; 6886 6887 /* customization */ 6888 PetscBool abmerge; 6889 PetscBool P_oth_bind; 6890 } MatMatMPIAIJBACKEND; 6891 6892 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6893 { 6894 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6895 PetscInt i; 6896 6897 PetscFunctionBegin; 6898 PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 6899 PetscCall(PetscFree(mmdata->bufa)); 6900 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 6901 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 6902 PetscCall(MatDestroy(&mmdata->P_oth)); 6903 PetscCall(MatDestroy(&mmdata->Bloc)); 6904 PetscCall(PetscSFDestroy(&mmdata->sf)); 6905 for (i = 0; i < mmdata->cp; i++) { 6906 PetscCall(MatDestroy(&mmdata->mp[i])); 6907 } 6908 PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 6909 PetscCall(PetscFree(mmdata->own[0])); 6910 PetscCall(PetscFree(mmdata->own)); 6911 PetscCall(PetscFree(mmdata->off[0])); 6912 PetscCall(PetscFree(mmdata->off)); 6913 PetscCall(PetscFree(mmdata)); 6914 PetscFunctionReturn(0); 6915 } 6916 6917 /* Copy selected n entries with indices in idx[] of A to v[]. 6918 If idx is NULL, copy the whole data array of A to v[] 6919 */ 6920 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6921 { 6922 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6923 6924 PetscFunctionBegin; 6925 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6926 if (f) { 6927 PetscCall((*f)(A,n,idx,v)); 6928 } else { 6929 const PetscScalar *vv; 6930 6931 PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6932 if (n && idx) { 6933 PetscScalar *w = v; 6934 const PetscInt *oi = idx; 6935 PetscInt j; 6936 6937 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6938 } else { 6939 PetscCall(PetscArraycpy(v,vv,n)); 6940 } 6941 PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6942 } 6943 PetscFunctionReturn(0); 6944 } 6945 6946 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6947 { 6948 MatMatMPIAIJBACKEND *mmdata; 6949 PetscInt i,n_d,n_o; 6950 6951 PetscFunctionBegin; 6952 MatCheckProduct(C,1); 6953 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6954 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6955 if (!mmdata->reusesym) { /* update temporary matrices */ 6956 if (mmdata->P_oth) { 6957 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6958 } 6959 if (mmdata->Bloc) { 6960 PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 6961 } 6962 } 6963 mmdata->reusesym = PETSC_FALSE; 6964 6965 for (i = 0; i < mmdata->cp; i++) { 6966 PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6967 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6968 } 6969 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6970 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6971 6972 if (mmdata->mptmp[i]) continue; 6973 if (noff) { 6974 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6975 6976 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 6977 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 6978 n_o += noff; 6979 n_d += nown; 6980 } else { 6981 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6982 6983 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 6984 n_d += mm->nz; 6985 } 6986 } 6987 if (mmdata->hasoffproc) { /* offprocess insertion */ 6988 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6989 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6990 } 6991 PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 6992 PetscFunctionReturn(0); 6993 } 6994 6995 /* Support for Pt * A, A * P, or Pt * A * P */ 6996 #define MAX_NUMBER_INTERMEDIATE 4 6997 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6998 { 6999 Mat_Product *product = C->product; 7000 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7001 Mat_MPIAIJ *a,*p; 7002 MatMatMPIAIJBACKEND *mmdata; 7003 ISLocalToGlobalMapping P_oth_l2g = NULL; 7004 IS glob = NULL; 7005 const char *prefix; 7006 char pprefix[256]; 7007 const PetscInt *globidx,*P_oth_idx; 7008 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 7009 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 7010 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7011 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7012 /* a base offset; type-2: sparse with a local to global map table */ 7013 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7014 7015 MatProductType ptype; 7016 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 7017 PetscMPIInt size; 7018 7019 PetscFunctionBegin; 7020 MatCheckProduct(C,1); 7021 PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 7022 ptype = product->type; 7023 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 7024 ptype = MATPRODUCT_AB; 7025 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7026 } 7027 switch (ptype) { 7028 case MATPRODUCT_AB: 7029 A = product->A; 7030 P = product->B; 7031 m = A->rmap->n; 7032 n = P->cmap->n; 7033 M = A->rmap->N; 7034 N = P->cmap->N; 7035 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7036 break; 7037 case MATPRODUCT_AtB: 7038 P = product->A; 7039 A = product->B; 7040 m = P->cmap->n; 7041 n = A->cmap->n; 7042 M = P->cmap->N; 7043 N = A->cmap->N; 7044 hasoffproc = PETSC_TRUE; 7045 break; 7046 case MATPRODUCT_PtAP: 7047 A = product->A; 7048 P = product->B; 7049 m = P->cmap->n; 7050 n = P->cmap->n; 7051 M = P->cmap->N; 7052 N = P->cmap->N; 7053 hasoffproc = PETSC_TRUE; 7054 break; 7055 default: 7056 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7057 } 7058 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 7059 if (size == 1) hasoffproc = PETSC_FALSE; 7060 7061 /* defaults */ 7062 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 7063 mp[i] = NULL; 7064 mptmp[i] = PETSC_FALSE; 7065 rmapt[i] = -1; 7066 cmapt[i] = -1; 7067 rmapa[i] = NULL; 7068 cmapa[i] = NULL; 7069 } 7070 7071 /* customization */ 7072 PetscCall(PetscNew(&mmdata)); 7073 mmdata->reusesym = product->api_user; 7074 if (ptype == MATPRODUCT_AB) { 7075 if (product->api_user) { 7076 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 7077 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 7078 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7079 PetscOptionsEnd(); 7080 } else { 7081 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 7082 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 7083 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7084 PetscOptionsEnd(); 7085 } 7086 } else if (ptype == MATPRODUCT_PtAP) { 7087 if (product->api_user) { 7088 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 7089 PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7090 PetscOptionsEnd(); 7091 } else { 7092 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 7093 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7094 PetscOptionsEnd(); 7095 } 7096 } 7097 a = (Mat_MPIAIJ*)A->data; 7098 p = (Mat_MPIAIJ*)P->data; 7099 PetscCall(MatSetSizes(C,m,n,M,N)); 7100 PetscCall(PetscLayoutSetUp(C->rmap)); 7101 PetscCall(PetscLayoutSetUp(C->cmap)); 7102 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 7103 PetscCall(MatGetOptionsPrefix(C,&prefix)); 7104 7105 cp = 0; 7106 switch (ptype) { 7107 case MATPRODUCT_AB: /* A * P */ 7108 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7109 7110 /* A_diag * P_local (merged or not) */ 7111 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7112 /* P is product->B */ 7113 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7114 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7115 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7116 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7117 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7118 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7119 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7120 mp[cp]->product->api_user = product->api_user; 7121 PetscCall(MatProductSetFromOptions(mp[cp])); 7122 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7123 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7124 PetscCall(ISGetIndices(glob,&globidx)); 7125 rmapt[cp] = 1; 7126 cmapt[cp] = 2; 7127 cmapa[cp] = globidx; 7128 mptmp[cp] = PETSC_FALSE; 7129 cp++; 7130 } else { /* A_diag * P_diag and A_diag * P_off */ 7131 PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 7132 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7133 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7134 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7135 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7136 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7137 mp[cp]->product->api_user = product->api_user; 7138 PetscCall(MatProductSetFromOptions(mp[cp])); 7139 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7140 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7141 rmapt[cp] = 1; 7142 cmapt[cp] = 1; 7143 mptmp[cp] = PETSC_FALSE; 7144 cp++; 7145 PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 7146 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7147 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7148 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7149 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7150 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7151 mp[cp]->product->api_user = product->api_user; 7152 PetscCall(MatProductSetFromOptions(mp[cp])); 7153 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7154 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7155 rmapt[cp] = 1; 7156 cmapt[cp] = 2; 7157 cmapa[cp] = p->garray; 7158 mptmp[cp] = PETSC_FALSE; 7159 cp++; 7160 } 7161 7162 /* A_off * P_other */ 7163 if (mmdata->P_oth) { 7164 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 7165 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7166 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7167 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7168 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7169 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7170 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7171 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7172 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7173 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7174 mp[cp]->product->api_user = product->api_user; 7175 PetscCall(MatProductSetFromOptions(mp[cp])); 7176 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7177 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7178 rmapt[cp] = 1; 7179 cmapt[cp] = 2; 7180 cmapa[cp] = P_oth_idx; 7181 mptmp[cp] = PETSC_FALSE; 7182 cp++; 7183 } 7184 break; 7185 7186 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7187 /* A is product->B */ 7188 PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7189 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7190 PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 7191 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7192 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7193 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7194 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7195 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7196 mp[cp]->product->api_user = product->api_user; 7197 PetscCall(MatProductSetFromOptions(mp[cp])); 7198 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7199 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7200 PetscCall(ISGetIndices(glob,&globidx)); 7201 rmapt[cp] = 2; 7202 rmapa[cp] = globidx; 7203 cmapt[cp] = 2; 7204 cmapa[cp] = globidx; 7205 mptmp[cp] = PETSC_FALSE; 7206 cp++; 7207 } else { 7208 PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 7209 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7210 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7211 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7212 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7213 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7214 mp[cp]->product->api_user = product->api_user; 7215 PetscCall(MatProductSetFromOptions(mp[cp])); 7216 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7217 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7218 PetscCall(ISGetIndices(glob,&globidx)); 7219 rmapt[cp] = 1; 7220 cmapt[cp] = 2; 7221 cmapa[cp] = globidx; 7222 mptmp[cp] = PETSC_FALSE; 7223 cp++; 7224 PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 7225 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7226 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7227 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7228 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7229 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7230 mp[cp]->product->api_user = product->api_user; 7231 PetscCall(MatProductSetFromOptions(mp[cp])); 7232 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7233 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7234 rmapt[cp] = 2; 7235 rmapa[cp] = p->garray; 7236 cmapt[cp] = 2; 7237 cmapa[cp] = globidx; 7238 mptmp[cp] = PETSC_FALSE; 7239 cp++; 7240 } 7241 break; 7242 case MATPRODUCT_PtAP: 7243 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7244 /* P is product->B */ 7245 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7246 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7247 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 7248 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7249 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7250 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7251 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7252 mp[cp]->product->api_user = product->api_user; 7253 PetscCall(MatProductSetFromOptions(mp[cp])); 7254 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7255 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7256 PetscCall(ISGetIndices(glob,&globidx)); 7257 rmapt[cp] = 2; 7258 rmapa[cp] = globidx; 7259 cmapt[cp] = 2; 7260 cmapa[cp] = globidx; 7261 mptmp[cp] = PETSC_FALSE; 7262 cp++; 7263 if (mmdata->P_oth) { 7264 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 7265 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7266 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7267 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7268 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7269 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7270 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7271 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7272 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7273 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7274 mp[cp]->product->api_user = product->api_user; 7275 PetscCall(MatProductSetFromOptions(mp[cp])); 7276 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7277 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7278 mptmp[cp] = PETSC_TRUE; 7279 cp++; 7280 PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 7281 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7282 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7283 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7284 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7285 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7286 mp[cp]->product->api_user = product->api_user; 7287 PetscCall(MatProductSetFromOptions(mp[cp])); 7288 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7289 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7290 rmapt[cp] = 2; 7291 rmapa[cp] = globidx; 7292 cmapt[cp] = 2; 7293 cmapa[cp] = P_oth_idx; 7294 mptmp[cp] = PETSC_FALSE; 7295 cp++; 7296 } 7297 break; 7298 default: 7299 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7300 } 7301 /* sanity check */ 7302 if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7303 7304 PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7305 for (i = 0; i < cp; i++) { 7306 mmdata->mp[i] = mp[i]; 7307 mmdata->mptmp[i] = mptmp[i]; 7308 } 7309 mmdata->cp = cp; 7310 C->product->data = mmdata; 7311 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7312 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7313 7314 /* memory type */ 7315 mmdata->mtype = PETSC_MEMTYPE_HOST; 7316 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 7317 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7318 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7319 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7320 7321 /* prepare coo coordinates for values insertion */ 7322 7323 /* count total nonzeros of those intermediate seqaij Mats 7324 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7325 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7326 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7327 */ 7328 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7329 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7330 if (mptmp[cp]) continue; 7331 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7332 const PetscInt *rmap = rmapa[cp]; 7333 const PetscInt mr = mp[cp]->rmap->n; 7334 const PetscInt rs = C->rmap->rstart; 7335 const PetscInt re = C->rmap->rend; 7336 const PetscInt *ii = mm->i; 7337 for (i = 0; i < mr; i++) { 7338 const PetscInt gr = rmap[i]; 7339 const PetscInt nz = ii[i+1] - ii[i]; 7340 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7341 else ncoo_oown += nz; /* this row is local */ 7342 } 7343 } else ncoo_d += mm->nz; 7344 } 7345 7346 /* 7347 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7348 7349 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7350 7351 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7352 7353 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7354 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7355 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7356 7357 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7358 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7359 */ 7360 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 7361 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7362 7363 /* gather (i,j) of nonzeros inserted by remote procs */ 7364 if (hasoffproc) { 7365 PetscSF msf; 7366 PetscInt ncoo2,*coo_i2,*coo_j2; 7367 7368 PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 7369 PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 7370 PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7371 7372 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7373 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7374 PetscInt *idxoff = mmdata->off[cp]; 7375 PetscInt *idxown = mmdata->own[cp]; 7376 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7377 const PetscInt *rmap = rmapa[cp]; 7378 const PetscInt *cmap = cmapa[cp]; 7379 const PetscInt *ii = mm->i; 7380 PetscInt *coi = coo_i + ncoo_o; 7381 PetscInt *coj = coo_j + ncoo_o; 7382 const PetscInt mr = mp[cp]->rmap->n; 7383 const PetscInt rs = C->rmap->rstart; 7384 const PetscInt re = C->rmap->rend; 7385 const PetscInt cs = C->cmap->rstart; 7386 for (i = 0; i < mr; i++) { 7387 const PetscInt *jj = mm->j + ii[i]; 7388 const PetscInt gr = rmap[i]; 7389 const PetscInt nz = ii[i+1] - ii[i]; 7390 if (gr < rs || gr >= re) { /* this is an offproc row */ 7391 for (j = ii[i]; j < ii[i+1]; j++) { 7392 *coi++ = gr; 7393 *idxoff++ = j; 7394 } 7395 if (!cmapt[cp]) { /* already global */ 7396 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7397 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7398 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7399 } else { /* offdiag */ 7400 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7401 } 7402 ncoo_o += nz; 7403 } else { /* this is a local row */ 7404 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7405 } 7406 } 7407 } 7408 mmdata->off[cp + 1] = idxoff; 7409 mmdata->own[cp + 1] = idxown; 7410 } 7411 7412 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7413 PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 7414 PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 7415 PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 7416 ncoo = ncoo_d + ncoo_oown + ncoo2; 7417 PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 7418 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7419 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 7420 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7421 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7422 PetscCall(PetscFree2(coo_i,coo_j)); 7423 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7424 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 7425 coo_i = coo_i2; 7426 coo_j = coo_j2; 7427 } else { /* no offproc values insertion */ 7428 ncoo = ncoo_d; 7429 PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7430 7431 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7432 PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 7433 PetscCall(PetscSFSetUp(mmdata->sf)); 7434 } 7435 mmdata->hasoffproc = hasoffproc; 7436 7437 /* gather (i,j) of nonzeros inserted locally */ 7438 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7439 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7440 PetscInt *coi = coo_i + ncoo_d; 7441 PetscInt *coj = coo_j + ncoo_d; 7442 const PetscInt *jj = mm->j; 7443 const PetscInt *ii = mm->i; 7444 const PetscInt *cmap = cmapa[cp]; 7445 const PetscInt *rmap = rmapa[cp]; 7446 const PetscInt mr = mp[cp]->rmap->n; 7447 const PetscInt rs = C->rmap->rstart; 7448 const PetscInt re = C->rmap->rend; 7449 const PetscInt cs = C->cmap->rstart; 7450 7451 if (mptmp[cp]) continue; 7452 if (rmapt[cp] == 1) { /* consecutive rows */ 7453 /* fill coo_i */ 7454 for (i = 0; i < mr; i++) { 7455 const PetscInt gr = i + rs; 7456 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7457 } 7458 /* fill coo_j */ 7459 if (!cmapt[cp]) { /* type-0, already global */ 7460 PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7461 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7462 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7463 } else { /* type-2, local to global for sparse columns */ 7464 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7465 } 7466 ncoo_d += mm->nz; 7467 } else if (rmapt[cp] == 2) { /* sparse rows */ 7468 for (i = 0; i < mr; i++) { 7469 const PetscInt *jj = mm->j + ii[i]; 7470 const PetscInt gr = rmap[i]; 7471 const PetscInt nz = ii[i+1] - ii[i]; 7472 if (gr >= rs && gr < re) { /* local rows */ 7473 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7474 if (!cmapt[cp]) { /* type-0, already global */ 7475 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7476 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7477 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7478 } else { /* type-2, local to global for sparse columns */ 7479 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7480 } 7481 ncoo_d += nz; 7482 } 7483 } 7484 } 7485 } 7486 if (glob) { 7487 PetscCall(ISRestoreIndices(glob,&globidx)); 7488 } 7489 PetscCall(ISDestroy(&glob)); 7490 if (P_oth_l2g) { 7491 PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 7492 } 7493 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7494 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7495 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 7496 7497 /* preallocate with COO data */ 7498 PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 7499 PetscCall(PetscFree2(coo_i,coo_j)); 7500 PetscFunctionReturn(0); 7501 } 7502 7503 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7504 { 7505 Mat_Product *product = mat->product; 7506 #if defined(PETSC_HAVE_DEVICE) 7507 PetscBool match = PETSC_FALSE; 7508 PetscBool usecpu = PETSC_FALSE; 7509 #else 7510 PetscBool match = PETSC_TRUE; 7511 #endif 7512 7513 PetscFunctionBegin; 7514 MatCheckProduct(mat,1); 7515 #if defined(PETSC_HAVE_DEVICE) 7516 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7517 PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 7518 } 7519 if (match) { /* we can always fallback to the CPU if requested */ 7520 switch (product->type) { 7521 case MATPRODUCT_AB: 7522 if (product->api_user) { 7523 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat"); 7524 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7525 PetscOptionsEnd(); 7526 } else { 7527 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat"); 7528 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7529 PetscOptionsEnd(); 7530 } 7531 break; 7532 case MATPRODUCT_AtB: 7533 if (product->api_user) { 7534 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat"); 7535 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7536 PetscOptionsEnd(); 7537 } else { 7538 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat"); 7539 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7540 PetscOptionsEnd(); 7541 } 7542 break; 7543 case MATPRODUCT_PtAP: 7544 if (product->api_user) { 7545 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat"); 7546 PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7547 PetscOptionsEnd(); 7548 } else { 7549 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat"); 7550 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7551 PetscOptionsEnd(); 7552 } 7553 break; 7554 default: 7555 break; 7556 } 7557 match = (PetscBool)!usecpu; 7558 } 7559 #endif 7560 if (match) { 7561 switch (product->type) { 7562 case MATPRODUCT_AB: 7563 case MATPRODUCT_AtB: 7564 case MATPRODUCT_PtAP: 7565 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7566 break; 7567 default: 7568 break; 7569 } 7570 } 7571 /* fallback to MPIAIJ ops */ 7572 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7573 PetscFunctionReturn(0); 7574 } 7575 7576 /* 7577 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7578 7579 n - the number of block indices in cc[] 7580 cc - the block indices (must be large enough to contain the indices) 7581 */ 7582 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc) 7583 { 7584 PetscInt cnt = -1,nidx,j; 7585 const PetscInt *idx; 7586 7587 PetscFunctionBegin; 7588 PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL)); 7589 if (nidx) { 7590 cnt = 0; 7591 cc[cnt] = idx[0]/bs; 7592 for (j=1; j<nidx; j++) { 7593 if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs; 7594 } 7595 } 7596 PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL)); 7597 *n = cnt+1; 7598 PetscFunctionReturn(0); 7599 } 7600 7601 /* 7602 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7603 7604 ncollapsed - the number of block indices 7605 collapsed - the block indices (must be large enough to contain the indices) 7606 */ 7607 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed) 7608 { 7609 PetscInt i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp; 7610 7611 PetscFunctionBegin; 7612 PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev)); 7613 for (i=start+1; i<start+bs; i++) { 7614 PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur)); 7615 PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged)); 7616 cprevtmp = cprev; cprev = merged; merged = cprevtmp; 7617 } 7618 *ncollapsed = nprev; 7619 if (collapsed) *collapsed = cprev; 7620 PetscFunctionReturn(0); 7621 } 7622 7623 /* -------------------------------------------------------------------------- */ 7624 /* 7625 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7626 7627 Input Parameter: 7628 . Amat - matrix 7629 - symmetrize - make the result symmetric 7630 + scale - scale with diagonal 7631 7632 Output Parameter: 7633 . a_Gmat - output scalar graph >= 0 7634 7635 */ 7636 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat) 7637 { 7638 PetscInt Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs; 7639 MPI_Comm comm; 7640 Mat Gmat; 7641 PetscBool ismpiaij,isseqaij; 7642 Mat a, b, c; 7643 MatType jtype; 7644 7645 PetscFunctionBegin; 7646 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 7647 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7648 PetscCall(MatGetSize(Amat, &MM, &NN)); 7649 PetscCall(MatGetBlockSize(Amat, &bs)); 7650 nloc = (Iend-Istart)/bs; 7651 7652 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij)); 7653 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij)); 7654 PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type"); 7655 7656 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7657 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7658 implementation */ 7659 if (bs > 1) { 7660 PetscCall(MatGetType(Amat,&jtype)); 7661 PetscCall(MatCreate(comm, &Gmat)); 7662 PetscCall(MatSetType(Gmat, jtype)); 7663 PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE)); 7664 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7665 if (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray) { 7666 PetscInt *d_nnz, *o_nnz; 7667 MatScalar *aa,val,AA[4096]; 7668 PetscInt *aj,*ai,AJ[4096],nc; 7669 if (isseqaij) { a = Amat; b = NULL; } 7670 else { 7671 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data; 7672 a = d->A; b = d->B; 7673 } 7674 PetscCall(PetscInfo(Amat,"New bs>1 Graph. nloc=%" PetscInt_FMT "\n",nloc)); 7675 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7676 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7677 PetscInt *nnz = (c==a) ? d_nnz : o_nnz, nmax=0; 7678 const PetscInt *cols; 7679 for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows 7680 PetscCall(MatGetRow(c,brow,&jj,&cols,NULL)); 7681 nnz[brow/bs] = jj/bs; 7682 if (jj%bs) ok = 0; 7683 if (cols) j0 = cols[0]; 7684 else j0 = -1; 7685 PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL)); 7686 if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs]; 7687 for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks 7688 PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL)); 7689 if (jj%bs) ok = 0; 7690 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7691 if (nnz[brow/bs] != jj/bs) ok = 0; 7692 PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL)); 7693 } 7694 if (!ok) { 7695 PetscCall(PetscFree2(d_nnz,o_nnz)); 7696 goto old_bs; 7697 } 7698 } 7699 PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax); 7700 } 7701 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7702 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7703 PetscCall(PetscFree2(d_nnz,o_nnz)); 7704 // diag 7705 for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows 7706 Mat_SeqAIJ *aseq = (Mat_SeqAIJ*)a->data; 7707 ai = aseq->i; 7708 n = ai[brow+1] - ai[brow]; 7709 aj = aseq->j + ai[brow]; 7710 for (int k=0; k<n; k += bs) { // block columns 7711 AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart) 7712 val = 0; 7713 for (int ii=0; ii<bs; ii++) { // rows in block 7714 aa = aseq->a + ai[brow+ii] + k; 7715 for (int jj=0; jj<bs; jj++) { // columns in block 7716 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7717 } 7718 } 7719 AA[k/bs] = val; 7720 } 7721 grow = Istart/bs + brow/bs; 7722 PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES)); 7723 } 7724 // off-diag 7725 if (ismpiaij) { 7726 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)Amat->data; 7727 const PetscScalar *vals; 7728 const PetscInt *cols, *garray = aij->garray; 7729 PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?"); 7730 for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows 7731 PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL)); 7732 for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) { 7733 AA[k/bs] = 0; 7734 AJ[cidx] = garray[cols[k]]/bs; 7735 } 7736 nc = ncols/bs; 7737 PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL)); 7738 for (int ii=0; ii<bs; ii++) { // rows in block 7739 PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals)); 7740 for (int k=0; k<ncols; k += bs) { 7741 for (int jj=0; jj<bs; jj++) { // cols in block 7742 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj])); 7743 } 7744 } 7745 PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals)); 7746 } 7747 grow = Istart/bs + brow/bs; 7748 PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES)); 7749 } 7750 } 7751 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7752 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7753 } else { 7754 const PetscScalar *vals; 7755 const PetscInt *idx; 7756 PetscInt *d_nnz, *o_nnz,*w0,*w1,*w2; 7757 old_bs: 7758 /* 7759 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7760 */ 7761 PetscCall(PetscInfo(Amat,"OLD bs>1 CreateGraph\n")); 7762 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7763 if (isseqaij) { 7764 PetscInt max_d_nnz; 7765 /* 7766 Determine exact preallocation count for (sequential) scalar matrix 7767 */ 7768 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz)); 7769 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7770 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7771 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) { 7772 PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7773 } 7774 PetscCall(PetscFree3(w0,w1,w2)); 7775 } else if (ismpiaij) { 7776 Mat Daij,Oaij; 7777 const PetscInt *garray; 7778 PetscInt max_d_nnz; 7779 PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray)); 7780 /* 7781 Determine exact preallocation count for diagonal block portion of scalar matrix 7782 */ 7783 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz)); 7784 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7785 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7786 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7787 PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7788 } 7789 PetscCall(PetscFree3(w0,w1,w2)); 7790 /* 7791 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7792 */ 7793 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7794 o_nnz[jj] = 0; 7795 for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */ 7796 PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7797 o_nnz[jj] += ncols; 7798 PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7799 } 7800 if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc; 7801 } 7802 } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type"); 7803 /* get scalar copy (norms) of matrix */ 7804 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7805 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7806 PetscCall(PetscFree2(d_nnz,o_nnz)); 7807 for (Ii = Istart; Ii < Iend; Ii++) { 7808 PetscInt dest_row = Ii/bs; 7809 PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals)); 7810 for (jj=0; jj<ncols; jj++) { 7811 PetscInt dest_col = idx[jj]/bs; 7812 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7813 PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES)); 7814 } 7815 PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals)); 7816 } 7817 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7818 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7819 } 7820 } else { 7821 /* TODO GPU: optimization proposal, each class provides fast implementation of this 7822 procedure via MatAbs API */ 7823 /* just copy scalar matrix & abs() */ 7824 PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7825 if (isseqaij) { a = Gmat; b = NULL; } 7826 else { 7827 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7828 a = d->A; b = d->B; 7829 } 7830 /* abs */ 7831 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7832 MatInfo info; 7833 PetscScalar *avals; 7834 PetscCall(MatGetInfo(c,MAT_LOCAL,&info)); 7835 PetscCall(MatSeqAIJGetArray(c,&avals)); 7836 for (int jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7837 PetscCall(MatSeqAIJRestoreArray(c,&avals)); 7838 } 7839 } 7840 if (symmetrize) { 7841 PetscBool issym; 7842 PetscCall(MatGetOption(Amat,MAT_SYMMETRIC,&issym)); 7843 if (!issym) { 7844 Mat matTrans; 7845 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7846 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7847 PetscCall(MatDestroy(&matTrans)); 7848 } 7849 PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE)); 7850 } else { 7851 PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7852 } 7853 if (scale) { 7854 /* scale c for all diagonal values = 1 or -1 */ 7855 Vec diag; 7856 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7857 PetscCall(MatGetDiagonal(Gmat, diag)); 7858 PetscCall(VecReciprocal(diag)); 7859 PetscCall(VecSqrtAbs(diag)); 7860 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7861 PetscCall(VecDestroy(&diag)); 7862 } 7863 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7864 *a_Gmat = Gmat; 7865 PetscFunctionReturn(0); 7866 } 7867 7868 /* -------------------------------------------------------------------------- */ 7869 /*@C 7870 MatFilter_AIJ - filter values with small absolute values 7871 With vfilter < 0 does nothing so should not be called. 7872 7873 Collective on Mat 7874 7875 Input Parameters: 7876 + Gmat - the graph 7877 . vfilter - threshold parameter [0,1) 7878 7879 Output Parameter: 7880 . filteredG - output filtered scalar graph 7881 7882 Level: developer 7883 7884 Notes: 7885 This is called before graph coarsers are called. 7886 This could go into Mat, move 'symm' to GAMG 7887 7888 .seealso: `PCGAMGSetThreshold()` 7889 @*/ 7890 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat,PetscReal vfilter, Mat *filteredG) 7891 { 7892 PetscInt Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc; 7893 Mat tGmat; 7894 MPI_Comm comm; 7895 const PetscScalar *vals; 7896 const PetscInt *idx; 7897 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols=0; 7898 MatScalar *AA; // this is checked in graph 7899 PetscBool isseqaij; 7900 Mat a, b, c; 7901 MatType jtype; 7902 7903 PetscFunctionBegin; 7904 PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm)); 7905 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij)); 7906 PetscCall(MatGetType(Gmat,&jtype)); 7907 PetscCall(MatCreate(comm, &tGmat)); 7908 PetscCall(MatSetType(tGmat, jtype)); 7909 7910 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7911 Also, if the matrix is symmetric, can we skip this 7912 operation? It can be very expensive on large matrices. */ 7913 7914 // global sizes 7915 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7916 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7917 nloc = Iend - Istart; 7918 PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz)); 7919 if (isseqaij) { a = Gmat; b = NULL; } 7920 else { 7921 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7922 a = d->A; b = d->B; 7923 garray = d->garray; 7924 } 7925 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7926 for (PetscInt row=0; row < nloc; row++) { 7927 PetscCall(MatGetRow(a,row,&ncols,NULL,NULL)); 7928 d_nnz[row] = ncols; 7929 if (ncols>maxcols) maxcols=ncols; 7930 PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL)); 7931 } 7932 if (b) { 7933 for (PetscInt row=0; row < nloc; row++) { 7934 PetscCall(MatGetRow(b,row,&ncols,NULL,NULL)); 7935 o_nnz[row] = ncols; 7936 if (ncols>maxcols) maxcols=ncols; 7937 PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL)); 7938 } 7939 } 7940 PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM)); 7941 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7942 PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz)); 7943 PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz)); 7944 PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 7945 PetscCall(PetscFree2(d_nnz,o_nnz)); 7946 // 7947 PetscCall(PetscMalloc2(maxcols, &AA,maxcols, &AJ)); 7948 nnz0 = nnz1 = 0; 7949 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7950 for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) { 7951 PetscCall(MatGetRow(c,row,&ncols,&idx,&vals)); 7952 for (ncol_row=jj=0; jj<ncols; jj++,nnz0++) { 7953 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7954 if (PetscRealPart(sv) > vfilter) { 7955 nnz1++; 7956 PetscInt cid = idx[jj] + Istart; //diag 7957 if (c!=a) cid = garray[idx[jj]]; 7958 AA[ncol_row] = vals[jj]; 7959 AJ[ncol_row] = cid; 7960 ncol_row++; 7961 } 7962 } 7963 PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals)); 7964 PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES)); 7965 } 7966 } 7967 PetscCall(PetscFree2(AA,AJ)); 7968 PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY)); 7969 PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY)); 7970 PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */ 7971 7972 PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", 7973 (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter, 7974 (!nloc) ? 1. : (double)nnz0/(double)nloc,MM,(int)maxcols)); 7975 7976 *filteredG = tGmat; 7977 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7978 PetscFunctionReturn(0); 7979 } 7980 7981 /* 7982 Special version for direct calls from Fortran 7983 */ 7984 #include <petsc/private/fortranimpl.h> 7985 7986 /* Change these macros so can be used in void function */ 7987 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7988 #undef PetscCall 7989 #define PetscCall(...) do { \ 7990 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7991 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7992 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7993 return; \ 7994 } \ 7995 } while (0) 7996 7997 #undef SETERRQ 7998 #define SETERRQ(comm,ierr,...) do { \ 7999 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 8000 return; \ 8001 } while (0) 8002 8003 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8004 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8005 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8006 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8007 #else 8008 #endif 8009 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 8010 { 8011 Mat mat = *mmat; 8012 PetscInt m = *mm, n = *mn; 8013 InsertMode addv = *maddv; 8014 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 8015 PetscScalar value; 8016 8017 MatCheckPreallocated(mat,1); 8018 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8019 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 8020 { 8021 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 8022 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 8023 PetscBool roworiented = aij->roworiented; 8024 8025 /* Some Variables required in the macro */ 8026 Mat A = aij->A; 8027 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 8028 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 8029 MatScalar *aa; 8030 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8031 Mat B = aij->B; 8032 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 8033 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 8034 MatScalar *ba; 8035 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8036 * cannot use "#if defined" inside a macro. */ 8037 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8038 8039 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 8040 PetscInt nonew = a->nonew; 8041 MatScalar *ap1,*ap2; 8042 8043 PetscFunctionBegin; 8044 PetscCall(MatSeqAIJGetArray(A,&aa)); 8045 PetscCall(MatSeqAIJGetArray(B,&ba)); 8046 for (i=0; i<m; i++) { 8047 if (im[i] < 0) continue; 8048 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 8049 if (im[i] >= rstart && im[i] < rend) { 8050 row = im[i] - rstart; 8051 lastcol1 = -1; 8052 rp1 = aj + ai[row]; 8053 ap1 = aa + ai[row]; 8054 rmax1 = aimax[row]; 8055 nrow1 = ailen[row]; 8056 low1 = 0; 8057 high1 = nrow1; 8058 lastcol2 = -1; 8059 rp2 = bj + bi[row]; 8060 ap2 = ba + bi[row]; 8061 rmax2 = bimax[row]; 8062 nrow2 = bilen[row]; 8063 low2 = 0; 8064 high2 = nrow2; 8065 8066 for (j=0; j<n; j++) { 8067 if (roworiented) value = v[i*n+j]; 8068 else value = v[i+j*m]; 8069 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8070 if (in[j] >= cstart && in[j] < cend) { 8071 col = in[j] - cstart; 8072 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 8073 } else if (in[j] < 0) continue; 8074 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8075 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 8076 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 8077 } else { 8078 if (mat->was_assembled) { 8079 if (!aij->colmap) { 8080 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8081 } 8082 #if defined(PETSC_USE_CTABLE) 8083 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 8084 col--; 8085 #else 8086 col = aij->colmap[in[j]] - 1; 8087 #endif 8088 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 8089 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8090 col = in[j]; 8091 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8092 B = aij->B; 8093 b = (Mat_SeqAIJ*)B->data; 8094 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 8095 rp2 = bj + bi[row]; 8096 ap2 = ba + bi[row]; 8097 rmax2 = bimax[row]; 8098 nrow2 = bilen[row]; 8099 low2 = 0; 8100 high2 = nrow2; 8101 bm = aij->B->rmap->n; 8102 ba = b->a; 8103 inserted = PETSC_FALSE; 8104 } 8105 } else col = in[j]; 8106 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 8107 } 8108 } 8109 } else if (!aij->donotstash) { 8110 if (roworiented) { 8111 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8112 } else { 8113 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8114 } 8115 } 8116 } 8117 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 8118 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 8119 } 8120 PetscFunctionReturnVoid(); 8121 } 8122 8123 /* Undefining these here since they were redefined from their original definition above! No 8124 * other PETSc functions should be defined past this point, as it is impossible to recover the 8125 * original definitions */ 8126 #undef PetscCall 8127 #undef SETERRQ 8128