1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B)); 15 PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B)); 16 PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 17 PetscFunctionReturn(0); 18 } 19 20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 21 { 22 Mat B; 23 24 PetscFunctionBegin; 25 PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B)); 26 PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 27 PetscCall(MatDestroy(&B)); 28 PetscFunctionReturn(0); 29 } 30 31 /*MC 32 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 33 34 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 35 and MATMPIAIJ otherwise. As a result, for single process communicators, 36 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 37 for communicators controlling multiple processes. It is recommended that you call both of 38 the above preallocation routines for simplicity. 39 40 Options Database Keys: 41 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 42 43 Developer Notes: 44 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 45 enough exist. 46 47 Level: beginner 48 49 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 50 M*/ 51 52 /*MC 53 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 54 55 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 56 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 57 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 58 for communicators controlling multiple processes. It is recommended that you call both of 59 the above preallocation routines for simplicity. 60 61 Options Database Keys: 62 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 63 64 Level: beginner 65 66 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 67 M*/ 68 69 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 70 { 71 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 72 73 PetscFunctionBegin; 74 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 75 A->boundtocpu = flg; 76 #endif 77 if (a->A) PetscCall(MatBindToCPU(a->A,flg)); 78 if (a->B) PetscCall(MatBindToCPU(a->B,flg)); 79 80 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 81 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 82 * to differ from the parent matrix. */ 83 if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg)); 84 if (a->diag) PetscCall(VecBindToCPU(a->diag,flg)); 85 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 90 { 91 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 92 93 PetscFunctionBegin; 94 if (mat->A) { 95 PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 96 PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 97 } 98 PetscFunctionReturn(0); 99 } 100 101 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 102 { 103 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 104 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 105 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 106 const PetscInt *ia,*ib; 107 const MatScalar *aa,*bb,*aav,*bav; 108 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 109 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 110 111 PetscFunctionBegin; 112 *keptrows = NULL; 113 114 ia = a->i; 115 ib = b->i; 116 PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 117 PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 118 for (i=0; i<m; i++) { 119 na = ia[i+1] - ia[i]; 120 nb = ib[i+1] - ib[i]; 121 if (!na && !nb) { 122 cnt++; 123 goto ok1; 124 } 125 aa = aav + ia[i]; 126 for (j=0; j<na; j++) { 127 if (aa[j] != 0.0) goto ok1; 128 } 129 bb = bav + ib[i]; 130 for (j=0; j <nb; j++) { 131 if (bb[j] != 0.0) goto ok1; 132 } 133 cnt++; 134 ok1:; 135 } 136 PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 137 if (!n0rows) { 138 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 139 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 140 PetscFunctionReturn(0); 141 } 142 PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 143 cnt = 0; 144 for (i=0; i<m; i++) { 145 na = ia[i+1] - ia[i]; 146 nb = ib[i+1] - ib[i]; 147 if (!na && !nb) continue; 148 aa = aav + ia[i]; 149 for (j=0; j<na;j++) { 150 if (aa[j] != 0.0) { 151 rows[cnt++] = rstart + i; 152 goto ok2; 153 } 154 } 155 bb = bav + ib[i]; 156 for (j=0; j<nb; j++) { 157 if (bb[j] != 0.0) { 158 rows[cnt++] = rstart + i; 159 goto ok2; 160 } 161 } 162 ok2:; 163 } 164 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 165 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 166 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 167 PetscFunctionReturn(0); 168 } 169 170 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 171 { 172 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 173 PetscBool cong; 174 175 PetscFunctionBegin; 176 PetscCall(MatHasCongruentLayouts(Y,&cong)); 177 if (Y->assembled && cong) { 178 PetscCall(MatDiagonalSet(aij->A,D,is)); 179 } else { 180 PetscCall(MatDiagonalSet_Default(Y,D,is)); 181 } 182 PetscFunctionReturn(0); 183 } 184 185 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 188 PetscInt i,rstart,nrows,*rows; 189 190 PetscFunctionBegin; 191 *zrows = NULL; 192 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 193 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 194 for (i=0; i<nrows; i++) rows[i] += rstart; 195 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 200 { 201 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 202 PetscInt i,m,n,*garray = aij->garray; 203 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 204 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 205 PetscReal *work; 206 const PetscScalar *dummy; 207 208 PetscFunctionBegin; 209 PetscCall(MatGetSize(A,&m,&n)); 210 PetscCall(PetscCalloc1(n,&work)); 211 PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 212 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 213 PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 214 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 215 if (type == NORM_2) { 216 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 217 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 218 } 219 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 220 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 221 } 222 } else if (type == NORM_1) { 223 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 224 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 225 } 226 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 227 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 228 } 229 } else if (type == NORM_INFINITY) { 230 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 231 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 232 } 233 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 234 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 235 } 236 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 237 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 238 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 239 } 240 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 241 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 242 } 243 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 244 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 245 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 246 } 247 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 248 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 249 } 250 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 251 if (type == NORM_INFINITY) { 252 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 253 } else { 254 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 255 } 256 PetscCall(PetscFree(work)); 257 if (type == NORM_2) { 258 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 259 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 260 for (i=0; i<n; i++) reductions[i] /= m; 261 } 262 PetscFunctionReturn(0); 263 } 264 265 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 266 { 267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 268 IS sis,gis; 269 const PetscInt *isis,*igis; 270 PetscInt n,*iis,nsis,ngis,rstart,i; 271 272 PetscFunctionBegin; 273 PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 274 PetscCall(MatFindNonzeroRows(a->B,&gis)); 275 PetscCall(ISGetSize(gis,&ngis)); 276 PetscCall(ISGetSize(sis,&nsis)); 277 PetscCall(ISGetIndices(sis,&isis)); 278 PetscCall(ISGetIndices(gis,&igis)); 279 280 PetscCall(PetscMalloc1(ngis+nsis,&iis)); 281 PetscCall(PetscArraycpy(iis,igis,ngis)); 282 PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 283 n = ngis + nsis; 284 PetscCall(PetscSortRemoveDupsInt(&n,iis)); 285 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 286 for (i=0; i<n; i++) iis[i] += rstart; 287 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 288 289 PetscCall(ISRestoreIndices(sis,&isis)); 290 PetscCall(ISRestoreIndices(gis,&igis)); 291 PetscCall(ISDestroy(&sis)); 292 PetscCall(ISDestroy(&gis)); 293 PetscFunctionReturn(0); 294 } 295 296 /* 297 Local utility routine that creates a mapping from the global column 298 number to the local number in the off-diagonal part of the local 299 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 300 a slightly higher hash table cost; without it it is not scalable (each processor 301 has an order N integer array but is fast to access. 302 */ 303 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscInt n = aij->B->cmap->n,i; 307 308 PetscFunctionBegin; 309 PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 310 #if defined(PETSC_USE_CTABLE) 311 PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 312 for (i=0; i<n; i++) { 313 PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 314 } 315 #else 316 PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 317 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 318 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 319 #endif 320 PetscFunctionReturn(0); 321 } 322 323 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 324 { \ 325 if (col <= lastcol1) low1 = 0; \ 326 else high1 = nrow1; \ 327 lastcol1 = col;\ 328 while (high1-low1 > 5) { \ 329 t = (low1+high1)/2; \ 330 if (rp1[t] > col) high1 = t; \ 331 else low1 = t; \ 332 } \ 333 for (_i=low1; _i<high1; _i++) { \ 334 if (rp1[_i] > col) break; \ 335 if (rp1[_i] == col) { \ 336 if (addv == ADD_VALUES) { \ 337 ap1[_i] += value; \ 338 /* Not sure LogFlops will slow dow the code or not */ \ 339 (void)PetscLogFlops(1.0); \ 340 } \ 341 else ap1[_i] = value; \ 342 goto a_noinsert; \ 343 } \ 344 } \ 345 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 346 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 347 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 348 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 349 N = nrow1++ - 1; a->nz++; high1++; \ 350 /* shift up all the later entries in this row */ \ 351 PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 352 PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 353 rp1[_i] = col; \ 354 ap1[_i] = value; \ 355 A->nonzerostate++;\ 356 a_noinsert: ; \ 357 ailen[row] = nrow1; \ 358 } 359 360 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 361 { \ 362 if (col <= lastcol2) low2 = 0; \ 363 else high2 = nrow2; \ 364 lastcol2 = col; \ 365 while (high2-low2 > 5) { \ 366 t = (low2+high2)/2; \ 367 if (rp2[t] > col) high2 = t; \ 368 else low2 = t; \ 369 } \ 370 for (_i=low2; _i<high2; _i++) { \ 371 if (rp2[_i] > col) break; \ 372 if (rp2[_i] == col) { \ 373 if (addv == ADD_VALUES) { \ 374 ap2[_i] += value; \ 375 (void)PetscLogFlops(1.0); \ 376 } \ 377 else ap2[_i] = value; \ 378 goto b_noinsert; \ 379 } \ 380 } \ 381 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 382 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 383 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 384 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 385 N = nrow2++ - 1; b->nz++; high2++; \ 386 /* shift up all the later entries in this row */ \ 387 PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 388 PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 389 rp2[_i] = col; \ 390 ap2[_i] = value; \ 391 B->nonzerostate++; \ 392 b_noinsert: ; \ 393 bilen[row] = nrow2; \ 394 } 395 396 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 397 { 398 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 399 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 400 PetscInt l,*garray = mat->garray,diag; 401 PetscScalar *aa,*ba; 402 403 PetscFunctionBegin; 404 /* code only works for square matrices A */ 405 406 /* find size of row to the left of the diagonal part */ 407 PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 408 row = row - diag; 409 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 410 if (garray[b->j[b->i[row]+l]] > diag) break; 411 } 412 if (l) { 413 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 414 PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 415 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 416 } 417 418 /* diagonal part */ 419 if (a->i[row+1]-a->i[row]) { 420 PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 421 PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 422 PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 423 } 424 425 /* right of diagonal part */ 426 if (b->i[row+1]-b->i[row]-l) { 427 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 428 PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 429 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 430 } 431 PetscFunctionReturn(0); 432 } 433 434 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 435 { 436 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 437 PetscScalar value = 0.0; 438 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 439 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 440 PetscBool roworiented = aij->roworiented; 441 442 /* Some Variables required in the macro */ 443 Mat A = aij->A; 444 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 445 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 446 PetscBool ignorezeroentries = a->ignorezeroentries; 447 Mat B = aij->B; 448 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 449 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 450 MatScalar *aa,*ba; 451 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 452 PetscInt nonew; 453 MatScalar *ap1,*ap2; 454 455 PetscFunctionBegin; 456 PetscCall(MatSeqAIJGetArray(A,&aa)); 457 PetscCall(MatSeqAIJGetArray(B,&ba)); 458 for (i=0; i<m; i++) { 459 if (im[i] < 0) continue; 460 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 461 if (im[i] >= rstart && im[i] < rend) { 462 row = im[i] - rstart; 463 lastcol1 = -1; 464 rp1 = aj + ai[row]; 465 ap1 = aa + ai[row]; 466 rmax1 = aimax[row]; 467 nrow1 = ailen[row]; 468 low1 = 0; 469 high1 = nrow1; 470 lastcol2 = -1; 471 rp2 = bj + bi[row]; 472 ap2 = ba + bi[row]; 473 rmax2 = bimax[row]; 474 nrow2 = bilen[row]; 475 low2 = 0; 476 high2 = nrow2; 477 478 for (j=0; j<n; j++) { 479 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 480 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 481 if (in[j] >= cstart && in[j] < cend) { 482 col = in[j] - cstart; 483 nonew = a->nonew; 484 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 485 } else if (in[j] < 0) { 486 continue; 487 } else { 488 PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 489 if (mat->was_assembled) { 490 if (!aij->colmap) { 491 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 492 } 493 #if defined(PETSC_USE_CTABLE) 494 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 495 col--; 496 #else 497 col = aij->colmap[in[j]] - 1; 498 #endif 499 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 500 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 501 col = in[j]; 502 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 503 B = aij->B; 504 b = (Mat_SeqAIJ*)B->data; 505 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 506 rp2 = bj + bi[row]; 507 ap2 = ba + bi[row]; 508 rmax2 = bimax[row]; 509 nrow2 = bilen[row]; 510 low2 = 0; 511 high2 = nrow2; 512 bm = aij->B->rmap->n; 513 ba = b->a; 514 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 515 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 516 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 517 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 518 } 519 } else col = in[j]; 520 nonew = b->nonew; 521 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 522 } 523 } 524 } else { 525 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 526 if (!aij->donotstash) { 527 mat->assembled = PETSC_FALSE; 528 if (roworiented) { 529 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 530 } else { 531 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 532 } 533 } 534 } 535 } 536 PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 537 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 538 PetscFunctionReturn(0); 539 } 540 541 /* 542 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 543 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 544 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 545 */ 546 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 547 { 548 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 549 Mat A = aij->A; /* diagonal part of the matrix */ 550 Mat B = aij->B; /* offdiagonal part of the matrix */ 551 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 552 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 553 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 554 PetscInt *ailen = a->ilen,*aj = a->j; 555 PetscInt *bilen = b->ilen,*bj = b->j; 556 PetscInt am = aij->A->rmap->n,j; 557 PetscInt diag_so_far = 0,dnz; 558 PetscInt offd_so_far = 0,onz; 559 560 PetscFunctionBegin; 561 /* Iterate over all rows of the matrix */ 562 for (j=0; j<am; j++) { 563 dnz = onz = 0; 564 /* Iterate over all non-zero columns of the current row */ 565 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 566 /* If column is in the diagonal */ 567 if (mat_j[col] >= cstart && mat_j[col] < cend) { 568 aj[diag_so_far++] = mat_j[col] - cstart; 569 dnz++; 570 } else { /* off-diagonal entries */ 571 bj[offd_so_far++] = mat_j[col]; 572 onz++; 573 } 574 } 575 ailen[j] = dnz; 576 bilen[j] = onz; 577 } 578 PetscFunctionReturn(0); 579 } 580 581 /* 582 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 583 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 584 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 585 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 586 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 587 */ 588 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 589 { 590 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 591 Mat A = aij->A; /* diagonal part of the matrix */ 592 Mat B = aij->B; /* offdiagonal part of the matrix */ 593 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 594 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 595 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 596 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 597 PetscInt *ailen = a->ilen,*aj = a->j; 598 PetscInt *bilen = b->ilen,*bj = b->j; 599 PetscInt am = aij->A->rmap->n,j; 600 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 601 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 602 PetscScalar *aa = a->a,*ba = b->a; 603 604 PetscFunctionBegin; 605 /* Iterate over all rows of the matrix */ 606 for (j=0; j<am; j++) { 607 dnz_row = onz_row = 0; 608 rowstart_offd = full_offd_i[j]; 609 rowstart_diag = full_diag_i[j]; 610 /* Iterate over all non-zero columns of the current row */ 611 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 612 /* If column is in the diagonal */ 613 if (mat_j[col] >= cstart && mat_j[col] < cend) { 614 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 615 aa[rowstart_diag+dnz_row] = mat_a[col]; 616 dnz_row++; 617 } else { /* off-diagonal entries */ 618 bj[rowstart_offd+onz_row] = mat_j[col]; 619 ba[rowstart_offd+onz_row] = mat_a[col]; 620 onz_row++; 621 } 622 } 623 ailen[j] = dnz_row; 624 bilen[j] = onz_row; 625 } 626 PetscFunctionReturn(0); 627 } 628 629 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* negative row */ 638 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= rstart && idxm[i] < rend) { 640 row = idxm[i] - rstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* negative column */ 643 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= cstart && idxn[j] < cend) { 645 col = idxn[j] - cstart; 646 PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 647 } else { 648 if (!aij->colmap) { 649 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 653 col--; 654 #else 655 col = aij->colmap[idxn[j]] - 1; 656 #endif 657 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 658 else { 659 PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 660 } 661 } 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 671 PetscInt nstash,reallocs; 672 673 PetscFunctionBegin; 674 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 675 676 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 677 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 678 PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 679 PetscFunctionReturn(0); 680 } 681 682 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 PetscMPIInt n; 686 PetscInt i,j,rstart,ncols,flg; 687 PetscInt *row,*col; 688 PetscBool other_disassembled; 689 PetscScalar *val; 690 691 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 692 693 PetscFunctionBegin; 694 if (!aij->donotstash && !mat->nooffprocentries) { 695 while (1) { 696 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 697 if (!flg) break; 698 699 for (i=0; i<n;) { 700 /* Now identify the consecutive vals belonging to the same row */ 701 for (j=i,rstart=row[j]; j<n; j++) { 702 if (row[j] != rstart) break; 703 } 704 if (j < n) ncols = j-i; 705 else ncols = n-i; 706 /* Now assemble all these values with a single function call */ 707 PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 708 i = j; 709 } 710 } 711 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 712 } 713 #if defined(PETSC_HAVE_DEVICE) 714 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 715 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 716 if (mat->boundtocpu) { 717 PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 718 PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 719 } 720 #endif 721 PetscCall(MatAssemblyBegin(aij->A,mode)); 722 PetscCall(MatAssemblyEnd(aij->A,mode)); 723 724 /* determine if any processor has disassembled, if so we must 725 also disassemble ourself, in order that we may reassemble. */ 726 /* 727 if nonzero structure of submatrix B cannot change then we know that 728 no processor disassembled thus we can skip this stuff 729 */ 730 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 731 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 732 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 733 PetscCall(MatDisAssemble_MPIAIJ(mat)); 734 } 735 } 736 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 737 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 738 } 739 PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 740 #if defined(PETSC_HAVE_DEVICE) 741 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 742 #endif 743 PetscCall(MatAssemblyBegin(aij->B,mode)); 744 PetscCall(MatAssemblyEnd(aij->B,mode)); 745 746 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 747 748 aij->rowvalues = NULL; 749 750 PetscCall(VecDestroy(&aij->diag)); 751 752 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 753 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 754 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 755 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 756 } 757 #if defined(PETSC_HAVE_DEVICE) 758 mat->offloadmask = PETSC_OFFLOAD_BOTH; 759 #endif 760 PetscFunctionReturn(0); 761 } 762 763 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 764 { 765 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 766 767 PetscFunctionBegin; 768 PetscCall(MatZeroEntries(l->A)); 769 PetscCall(MatZeroEntries(l->B)); 770 PetscFunctionReturn(0); 771 } 772 773 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 774 { 775 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 776 PetscObjectState sA, sB; 777 PetscInt *lrows; 778 PetscInt r, len; 779 PetscBool cong, lch, gch; 780 781 PetscFunctionBegin; 782 /* get locally owned rows */ 783 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 784 PetscCall(MatHasCongruentLayouts(A,&cong)); 785 /* fix right hand side if needed */ 786 if (x && b) { 787 const PetscScalar *xx; 788 PetscScalar *bb; 789 790 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 791 PetscCall(VecGetArrayRead(x, &xx)); 792 PetscCall(VecGetArray(b, &bb)); 793 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 794 PetscCall(VecRestoreArrayRead(x, &xx)); 795 PetscCall(VecRestoreArray(b, &bb)); 796 } 797 798 sA = mat->A->nonzerostate; 799 sB = mat->B->nonzerostate; 800 801 if (diag != 0.0 && cong) { 802 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 803 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 804 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 805 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 806 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 807 PetscInt nnwA, nnwB; 808 PetscBool nnzA, nnzB; 809 810 nnwA = aijA->nonew; 811 nnwB = aijB->nonew; 812 nnzA = aijA->keepnonzeropattern; 813 nnzB = aijB->keepnonzeropattern; 814 if (!nnzA) { 815 PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 816 aijA->nonew = 0; 817 } 818 if (!nnzB) { 819 PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 820 aijB->nonew = 0; 821 } 822 /* Must zero here before the next loop */ 823 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 824 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 825 for (r = 0; r < len; ++r) { 826 const PetscInt row = lrows[r] + A->rmap->rstart; 827 if (row >= A->cmap->N) continue; 828 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 829 } 830 aijA->nonew = nnwA; 831 aijB->nonew = nnwB; 832 } else { 833 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 834 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 835 } 836 PetscCall(PetscFree(lrows)); 837 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 838 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 839 840 /* reduce nonzerostate */ 841 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 842 PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 843 if (gch) A->nonzerostate++; 844 PetscFunctionReturn(0); 845 } 846 847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 848 { 849 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 850 PetscMPIInt n = A->rmap->n; 851 PetscInt i,j,r,m,len = 0; 852 PetscInt *lrows,*owners = A->rmap->range; 853 PetscMPIInt p = 0; 854 PetscSFNode *rrows; 855 PetscSF sf; 856 const PetscScalar *xx; 857 PetscScalar *bb,*mask,*aij_a; 858 Vec xmask,lmask; 859 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 860 const PetscInt *aj, *ii,*ridx; 861 PetscScalar *aa; 862 863 PetscFunctionBegin; 864 /* Create SF where leaves are input rows and roots are owned rows */ 865 PetscCall(PetscMalloc1(n, &lrows)); 866 for (r = 0; r < n; ++r) lrows[r] = -1; 867 PetscCall(PetscMalloc1(N, &rrows)); 868 for (r = 0; r < N; ++r) { 869 const PetscInt idx = rows[r]; 870 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 871 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 872 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 873 } 874 rrows[r].rank = p; 875 rrows[r].index = rows[r] - owners[p]; 876 } 877 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 878 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 879 /* Collect flags for rows to be zeroed */ 880 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 881 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 882 PetscCall(PetscSFDestroy(&sf)); 883 /* Compress and put in row numbers */ 884 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 885 /* zero diagonal part of matrix */ 886 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 887 /* handle off diagonal part of matrix */ 888 PetscCall(MatCreateVecs(A,&xmask,NULL)); 889 PetscCall(VecDuplicate(l->lvec,&lmask)); 890 PetscCall(VecGetArray(xmask,&bb)); 891 for (i=0; i<len; i++) bb[lrows[i]] = 1; 892 PetscCall(VecRestoreArray(xmask,&bb)); 893 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 894 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 895 PetscCall(VecDestroy(&xmask)); 896 if (x && b) { /* this code is buggy when the row and column layout don't match */ 897 PetscBool cong; 898 899 PetscCall(MatHasCongruentLayouts(A,&cong)); 900 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 901 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 902 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 903 PetscCall(VecGetArrayRead(l->lvec,&xx)); 904 PetscCall(VecGetArray(b,&bb)); 905 } 906 PetscCall(VecGetArray(lmask,&mask)); 907 /* remove zeroed rows of off diagonal matrix */ 908 PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 909 ii = aij->i; 910 for (i=0; i<len; i++) { 911 PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 912 } 913 /* loop over all elements of off process part of matrix zeroing removed columns*/ 914 if (aij->compressedrow.use) { 915 m = aij->compressedrow.nrows; 916 ii = aij->compressedrow.i; 917 ridx = aij->compressedrow.rindex; 918 for (i=0; i<m; i++) { 919 n = ii[i+1] - ii[i]; 920 aj = aij->j + ii[i]; 921 aa = aij_a + ii[i]; 922 923 for (j=0; j<n; j++) { 924 if (PetscAbsScalar(mask[*aj])) { 925 if (b) bb[*ridx] -= *aa*xx[*aj]; 926 *aa = 0.0; 927 } 928 aa++; 929 aj++; 930 } 931 ridx++; 932 } 933 } else { /* do not use compressed row format */ 934 m = l->B->rmap->n; 935 for (i=0; i<m; i++) { 936 n = ii[i+1] - ii[i]; 937 aj = aij->j + ii[i]; 938 aa = aij_a + ii[i]; 939 for (j=0; j<n; j++) { 940 if (PetscAbsScalar(mask[*aj])) { 941 if (b) bb[i] -= *aa*xx[*aj]; 942 *aa = 0.0; 943 } 944 aa++; 945 aj++; 946 } 947 } 948 } 949 if (x && b) { 950 PetscCall(VecRestoreArray(b,&bb)); 951 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 952 } 953 PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 954 PetscCall(VecRestoreArray(lmask,&mask)); 955 PetscCall(VecDestroy(&lmask)); 956 PetscCall(PetscFree(lrows)); 957 958 /* only change matrix nonzero state if pattern was allowed to be changed */ 959 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 960 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 961 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 962 } 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 967 { 968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 969 PetscInt nt; 970 VecScatter Mvctx = a->Mvctx; 971 972 PetscFunctionBegin; 973 PetscCall(VecGetLocalSize(xx,&nt)); 974 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 975 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 976 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 977 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 978 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 979 PetscFunctionReturn(0); 980 } 981 982 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 985 986 PetscFunctionBegin; 987 PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 988 PetscFunctionReturn(0); 989 } 990 991 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 992 { 993 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 994 VecScatter Mvctx = a->Mvctx; 995 996 PetscFunctionBegin; 997 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 998 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 999 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1000 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1005 { 1006 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1007 1008 PetscFunctionBegin; 1009 /* do nondiagonal part */ 1010 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1011 /* do local part */ 1012 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1013 /* add partial results together */ 1014 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1015 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1020 { 1021 MPI_Comm comm; 1022 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1023 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1024 IS Me,Notme; 1025 PetscInt M,N,first,last,*notme,i; 1026 PetscBool lf; 1027 PetscMPIInt size; 1028 1029 PetscFunctionBegin; 1030 /* Easy test: symmetric diagonal block */ 1031 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1032 PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 1033 PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1034 if (!*f) PetscFunctionReturn(0); 1035 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 1036 PetscCallMPI(MPI_Comm_size(comm,&size)); 1037 if (size == 1) PetscFunctionReturn(0); 1038 1039 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1040 PetscCall(MatGetSize(Amat,&M,&N)); 1041 PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 1042 PetscCall(PetscMalloc1(N-last+first,¬me)); 1043 for (i=0; i<first; i++) notme[i] = i; 1044 for (i=last; i<M; i++) notme[i-last+first] = i; 1045 PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 1046 PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 1047 PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 1048 Aoff = Aoffs[0]; 1049 PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 1050 Boff = Boffs[0]; 1051 PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 1052 PetscCall(MatDestroyMatrices(1,&Aoffs)); 1053 PetscCall(MatDestroyMatrices(1,&Boffs)); 1054 PetscCall(ISDestroy(&Me)); 1055 PetscCall(ISDestroy(&Notme)); 1056 PetscCall(PetscFree(notme)); 1057 PetscFunctionReturn(0); 1058 } 1059 1060 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1061 { 1062 PetscFunctionBegin; 1063 PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 1071 PetscFunctionBegin; 1072 /* do nondiagonal part */ 1073 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1074 /* do local part */ 1075 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1076 /* add partial results together */ 1077 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1078 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 /* 1083 This only works correctly for square matrices where the subblock A->A is the 1084 diagonal block 1085 */ 1086 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1087 { 1088 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1089 1090 PetscFunctionBegin; 1091 PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1092 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1093 PetscCall(MatGetDiagonal(a->A,v)); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1098 { 1099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1100 1101 PetscFunctionBegin; 1102 PetscCall(MatScale(a->A,aa)); 1103 PetscCall(MatScale(a->B,aa)); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1108 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1109 { 1110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1111 1112 PetscFunctionBegin; 1113 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1114 PetscCall(PetscFree(aij->Aperm1)); 1115 PetscCall(PetscFree(aij->Bperm1)); 1116 PetscCall(PetscFree(aij->Ajmap1)); 1117 PetscCall(PetscFree(aij->Bjmap1)); 1118 1119 PetscCall(PetscFree(aij->Aimap2)); 1120 PetscCall(PetscFree(aij->Bimap2)); 1121 PetscCall(PetscFree(aij->Aperm2)); 1122 PetscCall(PetscFree(aij->Bperm2)); 1123 PetscCall(PetscFree(aij->Ajmap2)); 1124 PetscCall(PetscFree(aij->Bjmap2)); 1125 1126 PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 1127 PetscCall(PetscFree(aij->Cperm1)); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1132 { 1133 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1134 1135 PetscFunctionBegin; 1136 #if defined(PETSC_USE_LOG) 1137 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1138 #endif 1139 PetscCall(MatStashDestroy_Private(&mat->stash)); 1140 PetscCall(VecDestroy(&aij->diag)); 1141 PetscCall(MatDestroy(&aij->A)); 1142 PetscCall(MatDestroy(&aij->B)); 1143 #if defined(PETSC_USE_CTABLE) 1144 PetscCall(PetscTableDestroy(&aij->colmap)); 1145 #else 1146 PetscCall(PetscFree(aij->colmap)); 1147 #endif 1148 PetscCall(PetscFree(aij->garray)); 1149 PetscCall(VecDestroy(&aij->lvec)); 1150 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1151 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 1152 PetscCall(PetscFree(aij->ld)); 1153 1154 /* Free COO */ 1155 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1156 1157 PetscCall(PetscFree(mat->data)); 1158 1159 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1160 PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 1161 1162 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1164 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 1166 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 1167 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 1169 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 1171 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 1172 #if defined(PETSC_HAVE_CUDA) 1173 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 1174 #endif 1175 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1176 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 1177 #endif 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 1179 #if defined(PETSC_HAVE_ELEMENTAL) 1180 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 1181 #endif 1182 #if defined(PETSC_HAVE_SCALAPACK) 1183 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1184 #endif 1185 #if defined(PETSC_HAVE_HYPRE) 1186 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 1187 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 1188 #endif 1189 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1190 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 1192 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 1195 #if defined(PETSC_HAVE_MKL_SPARSE) 1196 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 1197 #endif 1198 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 1199 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1200 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 1201 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 1202 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1207 { 1208 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1209 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1210 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1211 const PetscInt *garray = aij->garray; 1212 const PetscScalar *aa,*ba; 1213 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1214 PetscInt *rowlens; 1215 PetscInt *colidxs; 1216 PetscScalar *matvals; 1217 1218 PetscFunctionBegin; 1219 PetscCall(PetscViewerSetUp(viewer)); 1220 1221 M = mat->rmap->N; 1222 N = mat->cmap->N; 1223 m = mat->rmap->n; 1224 rs = mat->rmap->rstart; 1225 cs = mat->cmap->rstart; 1226 nz = A->nz + B->nz; 1227 1228 /* write matrix header */ 1229 header[0] = MAT_FILE_CLASSID; 1230 header[1] = M; header[2] = N; header[3] = nz; 1231 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1232 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1233 1234 /* fill in and store row lengths */ 1235 PetscCall(PetscMalloc1(m,&rowlens)); 1236 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1237 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1238 PetscCall(PetscFree(rowlens)); 1239 1240 /* fill in and store column indices */ 1241 PetscCall(PetscMalloc1(nz,&colidxs)); 1242 for (cnt=0, i=0; i<m; i++) { 1243 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1244 if (garray[B->j[jb]] > cs) break; 1245 colidxs[cnt++] = garray[B->j[jb]]; 1246 } 1247 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1248 colidxs[cnt++] = A->j[ja] + cs; 1249 for (; jb<B->i[i+1]; jb++) 1250 colidxs[cnt++] = garray[B->j[jb]]; 1251 } 1252 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 1254 PetscCall(PetscFree(colidxs)); 1255 1256 /* fill in and store nonzero values */ 1257 PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 1258 PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 1259 PetscCall(PetscMalloc1(nz,&matvals)); 1260 for (cnt=0, i=0; i<m; i++) { 1261 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1262 if (garray[B->j[jb]] > cs) break; 1263 matvals[cnt++] = ba[jb]; 1264 } 1265 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1266 matvals[cnt++] = aa[ja]; 1267 for (; jb<B->i[i+1]; jb++) 1268 matvals[cnt++] = ba[jb]; 1269 } 1270 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 1271 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 1272 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1273 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 1274 PetscCall(PetscFree(matvals)); 1275 1276 /* write block size option to the viewer's .info file */ 1277 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #include <petscdraw.h> 1282 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1283 { 1284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1285 PetscMPIInt rank = aij->rank,size = aij->size; 1286 PetscBool isdraw,iascii,isbinary; 1287 PetscViewer sviewer; 1288 PetscViewerFormat format; 1289 1290 PetscFunctionBegin; 1291 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1292 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1293 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1294 if (iascii) { 1295 PetscCall(PetscViewerGetFormat(viewer,&format)); 1296 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1297 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1298 PetscCall(PetscMalloc1(size,&nz)); 1299 PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1300 for (i=0; i<(PetscInt)size; i++) { 1301 nmax = PetscMax(nmax,nz[i]); 1302 nmin = PetscMin(nmin,nz[i]); 1303 navg += nz[i]; 1304 } 1305 PetscCall(PetscFree(nz)); 1306 navg = navg/size; 1307 PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1308 PetscFunctionReturn(0); 1309 } 1310 PetscCall(PetscViewerGetFormat(viewer,&format)); 1311 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1312 MatInfo info; 1313 PetscInt *inodes=NULL; 1314 1315 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 1316 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 1317 PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 1318 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1319 if (!inodes) { 1320 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1321 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1322 } else { 1323 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1324 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1325 } 1326 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 1327 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1328 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 1329 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1330 PetscCall(PetscViewerFlush(viewer)); 1331 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1332 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 1333 PetscCall(VecScatterView(aij->Mvctx,viewer)); 1334 PetscFunctionReturn(0); 1335 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1336 PetscInt inodecount,inodelimit,*inodes; 1337 PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1338 if (inodes) { 1339 PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1340 } else { 1341 PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1342 } 1343 PetscFunctionReturn(0); 1344 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1345 PetscFunctionReturn(0); 1346 } 1347 } else if (isbinary) { 1348 if (size == 1) { 1349 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1350 PetscCall(MatView(aij->A,viewer)); 1351 } else { 1352 PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 1353 } 1354 PetscFunctionReturn(0); 1355 } else if (iascii && size == 1) { 1356 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1357 PetscCall(MatView(aij->A,viewer)); 1358 PetscFunctionReturn(0); 1359 } else if (isdraw) { 1360 PetscDraw draw; 1361 PetscBool isnull; 1362 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1363 PetscCall(PetscDrawIsNull(draw,&isnull)); 1364 if (isnull) PetscFunctionReturn(0); 1365 } 1366 1367 { /* assemble the entire matrix onto first processor */ 1368 Mat A = NULL, Av; 1369 IS isrow,iscol; 1370 1371 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1372 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1373 PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 1374 PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 1375 /* The commented code uses MatCreateSubMatrices instead */ 1376 /* 1377 Mat *AA, A = NULL, Av; 1378 IS isrow,iscol; 1379 1380 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1381 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1382 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1383 if (rank == 0) { 1384 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1385 A = AA[0]; 1386 Av = AA[0]; 1387 } 1388 PetscCall(MatDestroySubMatrices(1,&AA)); 1389 */ 1390 PetscCall(ISDestroy(&iscol)); 1391 PetscCall(ISDestroy(&isrow)); 1392 /* 1393 Everyone has to call to draw the matrix since the graphics waits are 1394 synchronized across all processors that share the PetscDraw object 1395 */ 1396 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1397 if (rank == 0) { 1398 if (((PetscObject)mat)->name) { 1399 PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 1400 } 1401 PetscCall(MatView_SeqAIJ(Av,sviewer)); 1402 } 1403 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1404 PetscCall(PetscViewerFlush(viewer)); 1405 PetscCall(MatDestroy(&A)); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1411 { 1412 PetscBool iascii,isdraw,issocket,isbinary; 1413 1414 PetscFunctionBegin; 1415 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1416 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1417 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1418 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1419 if (iascii || isdraw || isbinary || issocket) { 1420 PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1421 } 1422 PetscFunctionReturn(0); 1423 } 1424 1425 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1426 { 1427 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1428 Vec bb1 = NULL; 1429 PetscBool hasop; 1430 1431 PetscFunctionBegin; 1432 if (flag == SOR_APPLY_UPPER) { 1433 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1438 PetscCall(VecDuplicate(bb,&bb1)); 1439 } 1440 1441 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1442 if (flag & SOR_ZERO_INITIAL_GUESS) { 1443 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1444 its--; 1445 } 1446 1447 while (its--) { 1448 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1449 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1450 1451 /* update rhs: bb1 = bb - B*x */ 1452 PetscCall(VecScale(mat->lvec,-1.0)); 1453 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1454 1455 /* local sweep */ 1456 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 1457 } 1458 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1459 if (flag & SOR_ZERO_INITIAL_GUESS) { 1460 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1461 its--; 1462 } 1463 while (its--) { 1464 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1465 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1466 1467 /* update rhs: bb1 = bb - B*x */ 1468 PetscCall(VecScale(mat->lvec,-1.0)); 1469 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1470 1471 /* local sweep */ 1472 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 1473 } 1474 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1475 if (flag & SOR_ZERO_INITIAL_GUESS) { 1476 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1477 its--; 1478 } 1479 while (its--) { 1480 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1481 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1482 1483 /* update rhs: bb1 = bb - B*x */ 1484 PetscCall(VecScale(mat->lvec,-1.0)); 1485 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1486 1487 /* local sweep */ 1488 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 1489 } 1490 } else if (flag & SOR_EISENSTAT) { 1491 Vec xx1; 1492 1493 PetscCall(VecDuplicate(bb,&xx1)); 1494 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1495 1496 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1497 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1498 if (!mat->diag) { 1499 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 1500 PetscCall(MatGetDiagonal(matin,mat->diag)); 1501 } 1502 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1503 if (hasop) { 1504 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1505 } else { 1506 PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1507 } 1508 PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1509 1510 PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1511 1512 /* local sweep */ 1513 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 1514 PetscCall(VecAXPY(xx,1.0,xx1)); 1515 PetscCall(VecDestroy(&xx1)); 1516 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1517 1518 PetscCall(VecDestroy(&bb1)); 1519 1520 matin->factorerrortype = mat->A->factorerrortype; 1521 PetscFunctionReturn(0); 1522 } 1523 1524 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1525 { 1526 Mat aA,aB,Aperm; 1527 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1528 PetscScalar *aa,*ba; 1529 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1530 PetscSF rowsf,sf; 1531 IS parcolp = NULL; 1532 PetscBool done; 1533 1534 PetscFunctionBegin; 1535 PetscCall(MatGetLocalSize(A,&m,&n)); 1536 PetscCall(ISGetIndices(rowp,&rwant)); 1537 PetscCall(ISGetIndices(colp,&cwant)); 1538 PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 1539 1540 /* Invert row permutation to find out where my rows should go */ 1541 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 1542 PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 1543 PetscCall(PetscSFSetFromOptions(rowsf)); 1544 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1545 PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1546 PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1547 1548 /* Invert column permutation to find out where my columns should go */ 1549 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1550 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 1551 PetscCall(PetscSFSetFromOptions(sf)); 1552 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1553 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1554 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1555 PetscCall(PetscSFDestroy(&sf)); 1556 1557 PetscCall(ISRestoreIndices(rowp,&rwant)); 1558 PetscCall(ISRestoreIndices(colp,&cwant)); 1559 PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 1560 1561 /* Find out where my gcols should go */ 1562 PetscCall(MatGetSize(aB,NULL,&ng)); 1563 PetscCall(PetscMalloc1(ng,&gcdest)); 1564 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1565 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 1566 PetscCall(PetscSFSetFromOptions(sf)); 1567 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1568 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1569 PetscCall(PetscSFDestroy(&sf)); 1570 1571 PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 1572 PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1573 PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1574 for (i=0; i<m; i++) { 1575 PetscInt row = rdest[i]; 1576 PetscMPIInt rowner; 1577 PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 1578 for (j=ai[i]; j<ai[i+1]; j++) { 1579 PetscInt col = cdest[aj[j]]; 1580 PetscMPIInt cowner; 1581 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 1582 if (rowner == cowner) dnnz[i]++; 1583 else onnz[i]++; 1584 } 1585 for (j=bi[i]; j<bi[i+1]; j++) { 1586 PetscInt col = gcdest[bj[j]]; 1587 PetscMPIInt cowner; 1588 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 1589 if (rowner == cowner) dnnz[i]++; 1590 else onnz[i]++; 1591 } 1592 } 1593 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1594 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1595 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1596 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1597 PetscCall(PetscSFDestroy(&rowsf)); 1598 1599 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 1600 PetscCall(MatSeqAIJGetArray(aA,&aa)); 1601 PetscCall(MatSeqAIJGetArray(aB,&ba)); 1602 for (i=0; i<m; i++) { 1603 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1604 PetscInt j0,rowlen; 1605 rowlen = ai[i+1] - ai[i]; 1606 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1607 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1608 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1609 } 1610 rowlen = bi[i+1] - bi[i]; 1611 for (j0=j=0; j<rowlen; j0=j) { 1612 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1613 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1614 } 1615 } 1616 PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 1617 PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 1618 PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1619 PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1620 PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 1621 PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 1622 PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 1623 PetscCall(PetscFree3(work,rdest,cdest)); 1624 PetscCall(PetscFree(gcdest)); 1625 if (parcolp) PetscCall(ISDestroy(&colp)); 1626 *B = Aperm; 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1631 { 1632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1633 1634 PetscFunctionBegin; 1635 PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1636 if (ghosts) *ghosts = aij->garray; 1637 PetscFunctionReturn(0); 1638 } 1639 1640 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1641 { 1642 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1643 Mat A = mat->A,B = mat->B; 1644 PetscLogDouble isend[5],irecv[5]; 1645 1646 PetscFunctionBegin; 1647 info->block_size = 1.0; 1648 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1649 1650 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1651 isend[3] = info->memory; isend[4] = info->mallocs; 1652 1653 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1654 1655 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1656 isend[3] += info->memory; isend[4] += info->mallocs; 1657 if (flag == MAT_LOCAL) { 1658 info->nz_used = isend[0]; 1659 info->nz_allocated = isend[1]; 1660 info->nz_unneeded = isend[2]; 1661 info->memory = isend[3]; 1662 info->mallocs = isend[4]; 1663 } else if (flag == MAT_GLOBAL_MAX) { 1664 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1665 1666 info->nz_used = irecv[0]; 1667 info->nz_allocated = irecv[1]; 1668 info->nz_unneeded = irecv[2]; 1669 info->memory = irecv[3]; 1670 info->mallocs = irecv[4]; 1671 } else if (flag == MAT_GLOBAL_SUM) { 1672 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1673 1674 info->nz_used = irecv[0]; 1675 info->nz_allocated = irecv[1]; 1676 info->nz_unneeded = irecv[2]; 1677 info->memory = irecv[3]; 1678 info->mallocs = irecv[4]; 1679 } 1680 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1681 info->fill_ratio_needed = 0; 1682 info->factor_mallocs = 0; 1683 PetscFunctionReturn(0); 1684 } 1685 1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1687 { 1688 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1689 1690 PetscFunctionBegin; 1691 switch (op) { 1692 case MAT_NEW_NONZERO_LOCATIONS: 1693 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1694 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1695 case MAT_KEEP_NONZERO_PATTERN: 1696 case MAT_NEW_NONZERO_LOCATION_ERR: 1697 case MAT_USE_INODES: 1698 case MAT_IGNORE_ZERO_ENTRIES: 1699 case MAT_FORM_EXPLICIT_TRANSPOSE: 1700 MatCheckPreallocated(A,1); 1701 PetscCall(MatSetOption(a->A,op,flg)); 1702 PetscCall(MatSetOption(a->B,op,flg)); 1703 break; 1704 case MAT_ROW_ORIENTED: 1705 MatCheckPreallocated(A,1); 1706 a->roworiented = flg; 1707 1708 PetscCall(MatSetOption(a->A,op,flg)); 1709 PetscCall(MatSetOption(a->B,op,flg)); 1710 break; 1711 case MAT_FORCE_DIAGONAL_ENTRIES: 1712 case MAT_SORTED_FULL: 1713 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1714 break; 1715 case MAT_IGNORE_OFF_PROC_ENTRIES: 1716 a->donotstash = flg; 1717 break; 1718 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1719 case MAT_SPD: 1720 case MAT_SYMMETRIC: 1721 case MAT_STRUCTURALLY_SYMMETRIC: 1722 case MAT_HERMITIAN: 1723 case MAT_SYMMETRY_ETERNAL: 1724 break; 1725 case MAT_SUBMAT_SINGLEIS: 1726 A->submat_singleis = flg; 1727 break; 1728 case MAT_STRUCTURE_ONLY: 1729 /* The option is handled directly by MatSetOption() */ 1730 break; 1731 default: 1732 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1733 } 1734 PetscFunctionReturn(0); 1735 } 1736 1737 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1738 { 1739 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1740 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1741 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1742 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1743 PetscInt *cmap,*idx_p; 1744 1745 PetscFunctionBegin; 1746 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1747 mat->getrowactive = PETSC_TRUE; 1748 1749 if (!mat->rowvalues && (idx || v)) { 1750 /* 1751 allocate enough space to hold information from the longest row. 1752 */ 1753 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1754 PetscInt max = 1,tmp; 1755 for (i=0; i<matin->rmap->n; i++) { 1756 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1757 if (max < tmp) max = tmp; 1758 } 1759 PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 1760 } 1761 1762 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1763 lrow = row - rstart; 1764 1765 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1766 if (!v) {pvA = NULL; pvB = NULL;} 1767 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1768 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1769 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1770 nztot = nzA + nzB; 1771 1772 cmap = mat->garray; 1773 if (v || idx) { 1774 if (nztot) { 1775 /* Sort by increasing column numbers, assuming A and B already sorted */ 1776 PetscInt imark = -1; 1777 if (v) { 1778 *v = v_p = mat->rowvalues; 1779 for (i=0; i<nzB; i++) { 1780 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1781 else break; 1782 } 1783 imark = i; 1784 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1785 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1786 } 1787 if (idx) { 1788 *idx = idx_p = mat->rowindices; 1789 if (imark > -1) { 1790 for (i=0; i<imark; i++) { 1791 idx_p[i] = cmap[cworkB[i]]; 1792 } 1793 } else { 1794 for (i=0; i<nzB; i++) { 1795 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1796 else break; 1797 } 1798 imark = i; 1799 } 1800 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1801 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1802 } 1803 } else { 1804 if (idx) *idx = NULL; 1805 if (v) *v = NULL; 1806 } 1807 } 1808 *nz = nztot; 1809 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1810 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1811 PetscFunctionReturn(0); 1812 } 1813 1814 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1815 { 1816 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1817 1818 PetscFunctionBegin; 1819 PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1820 aij->getrowactive = PETSC_FALSE; 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1825 { 1826 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1827 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1828 PetscInt i,j,cstart = mat->cmap->rstart; 1829 PetscReal sum = 0.0; 1830 const MatScalar *v,*amata,*bmata; 1831 1832 PetscFunctionBegin; 1833 if (aij->size == 1) { 1834 PetscCall(MatNorm(aij->A,type,norm)); 1835 } else { 1836 PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 1837 PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 1838 if (type == NORM_FROBENIUS) { 1839 v = amata; 1840 for (i=0; i<amat->nz; i++) { 1841 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1842 } 1843 v = bmata; 1844 for (i=0; i<bmat->nz; i++) { 1845 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1846 } 1847 PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1848 *norm = PetscSqrtReal(*norm); 1849 PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 1850 } else if (type == NORM_1) { /* max column norm */ 1851 PetscReal *tmp,*tmp2; 1852 PetscInt *jj,*garray = aij->garray; 1853 PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 1854 PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 1855 *norm = 0.0; 1856 v = amata; jj = amat->j; 1857 for (j=0; j<amat->nz; j++) { 1858 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1859 } 1860 v = bmata; jj = bmat->j; 1861 for (j=0; j<bmat->nz; j++) { 1862 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1863 } 1864 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1865 for (j=0; j<mat->cmap->N; j++) { 1866 if (tmp2[j] > *norm) *norm = tmp2[j]; 1867 } 1868 PetscCall(PetscFree(tmp)); 1869 PetscCall(PetscFree(tmp2)); 1870 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1871 } else if (type == NORM_INFINITY) { /* max row norm */ 1872 PetscReal ntemp = 0.0; 1873 for (j=0; j<aij->A->rmap->n; j++) { 1874 v = amata + amat->i[j]; 1875 sum = 0.0; 1876 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1877 sum += PetscAbsScalar(*v); v++; 1878 } 1879 v = bmata + bmat->i[j]; 1880 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1881 sum += PetscAbsScalar(*v); v++; 1882 } 1883 if (sum > ntemp) ntemp = sum; 1884 } 1885 PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 1886 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1887 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1888 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 1889 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 1890 } 1891 PetscFunctionReturn(0); 1892 } 1893 1894 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1895 { 1896 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1897 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1898 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1899 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1900 Mat B,A_diag,*B_diag; 1901 const MatScalar *pbv,*bv; 1902 1903 PetscFunctionBegin; 1904 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1905 ai = Aloc->i; aj = Aloc->j; 1906 bi = Bloc->i; bj = Bloc->j; 1907 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1908 PetscInt *d_nnz,*g_nnz,*o_nnz; 1909 PetscSFNode *oloc; 1910 PETSC_UNUSED PetscSF sf; 1911 1912 PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 1913 /* compute d_nnz for preallocation */ 1914 PetscCall(PetscArrayzero(d_nnz,na)); 1915 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1916 /* compute local off-diagonal contributions */ 1917 PetscCall(PetscArrayzero(g_nnz,nb)); 1918 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1919 /* map those to global */ 1920 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1921 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 1922 PetscCall(PetscSFSetFromOptions(sf)); 1923 PetscCall(PetscArrayzero(o_nnz,na)); 1924 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1925 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1926 PetscCall(PetscSFDestroy(&sf)); 1927 1928 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1929 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1930 PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1931 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1932 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 1933 PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1934 } else { 1935 B = *matout; 1936 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1937 } 1938 1939 b = (Mat_MPIAIJ*)B->data; 1940 A_diag = a->A; 1941 B_diag = &b->A; 1942 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1943 A_diag_ncol = A_diag->cmap->N; 1944 B_diag_ilen = sub_B_diag->ilen; 1945 B_diag_i = sub_B_diag->i; 1946 1947 /* Set ilen for diagonal of B */ 1948 for (i=0; i<A_diag_ncol; i++) { 1949 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1950 } 1951 1952 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1953 very quickly (=without using MatSetValues), because all writes are local. */ 1954 PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1955 1956 /* copy over the B part */ 1957 PetscCall(PetscMalloc1(bi[mb],&cols)); 1958 PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1959 pbv = bv; 1960 row = A->rmap->rstart; 1961 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1962 cols_tmp = cols; 1963 for (i=0; i<mb; i++) { 1964 ncol = bi[i+1]-bi[i]; 1965 PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 1966 row++; 1967 pbv += ncol; cols_tmp += ncol; 1968 } 1969 PetscCall(PetscFree(cols)); 1970 PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1971 1972 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1973 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1974 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1975 *matout = B; 1976 } else { 1977 PetscCall(MatHeaderMerge(A,&B)); 1978 } 1979 PetscFunctionReturn(0); 1980 } 1981 1982 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1983 { 1984 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1985 Mat a = aij->A,b = aij->B; 1986 PetscInt s1,s2,s3; 1987 1988 PetscFunctionBegin; 1989 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1990 if (rr) { 1991 PetscCall(VecGetLocalSize(rr,&s1)); 1992 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1993 /* Overlap communication with computation. */ 1994 PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1995 } 1996 if (ll) { 1997 PetscCall(VecGetLocalSize(ll,&s1)); 1998 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1999 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 2000 } 2001 /* scale the diagonal block */ 2002 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 2003 2004 if (rr) { 2005 /* Do a scatter end and then right scale the off-diagonal block */ 2006 PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2007 PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 2008 } 2009 PetscFunctionReturn(0); 2010 } 2011 2012 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2013 { 2014 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2015 2016 PetscFunctionBegin; 2017 PetscCall(MatSetUnfactored(a->A)); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2022 { 2023 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2024 Mat a,b,c,d; 2025 PetscBool flg; 2026 2027 PetscFunctionBegin; 2028 a = matA->A; b = matA->B; 2029 c = matB->A; d = matB->B; 2030 2031 PetscCall(MatEqual(a,c,&flg)); 2032 if (flg) { 2033 PetscCall(MatEqual(b,d,&flg)); 2034 } 2035 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2036 PetscFunctionReturn(0); 2037 } 2038 2039 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2040 { 2041 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2042 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2043 2044 PetscFunctionBegin; 2045 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2046 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2047 /* because of the column compression in the off-processor part of the matrix a->B, 2048 the number of columns in a->B and b->B may be different, hence we cannot call 2049 the MatCopy() directly on the two parts. If need be, we can provide a more 2050 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2051 then copying the submatrices */ 2052 PetscCall(MatCopy_Basic(A,B,str)); 2053 } else { 2054 PetscCall(MatCopy(a->A,b->A,str)); 2055 PetscCall(MatCopy(a->B,b->B,str)); 2056 } 2057 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2058 PetscFunctionReturn(0); 2059 } 2060 2061 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2062 { 2063 PetscFunctionBegin; 2064 PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /* 2069 Computes the number of nonzeros per row needed for preallocation when X and Y 2070 have different nonzero structure. 2071 */ 2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2073 { 2074 PetscInt i,j,k,nzx,nzy; 2075 2076 PetscFunctionBegin; 2077 /* Set the number of nonzeros in the new matrix */ 2078 for (i=0; i<m; i++) { 2079 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2080 nzx = xi[i+1] - xi[i]; 2081 nzy = yi[i+1] - yi[i]; 2082 nnz[i] = 0; 2083 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2084 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2085 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2086 nnz[i]++; 2087 } 2088 for (; k<nzy; k++) nnz[i]++; 2089 } 2090 PetscFunctionReturn(0); 2091 } 2092 2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2095 { 2096 PetscInt m = Y->rmap->N; 2097 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2098 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2099 2100 PetscFunctionBegin; 2101 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2106 { 2107 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2108 2109 PetscFunctionBegin; 2110 if (str == SAME_NONZERO_PATTERN) { 2111 PetscCall(MatAXPY(yy->A,a,xx->A,str)); 2112 PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2113 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2114 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2115 } else { 2116 Mat B; 2117 PetscInt *nnz_d,*nnz_o; 2118 2119 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 2120 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 2121 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2122 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2123 PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 2124 PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 2125 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 2126 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 2127 PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 2128 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2129 PetscCall(MatHeaderMerge(Y,&B)); 2130 PetscCall(PetscFree(nnz_d)); 2131 PetscCall(PetscFree(nnz_o)); 2132 } 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2137 2138 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2139 { 2140 PetscFunctionBegin; 2141 if (PetscDefined(USE_COMPLEX)) { 2142 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2143 2144 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2145 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2146 } 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2151 { 2152 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2153 2154 PetscFunctionBegin; 2155 PetscCall(MatRealPart(a->A)); 2156 PetscCall(MatRealPart(a->B)); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2161 { 2162 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2163 2164 PetscFunctionBegin; 2165 PetscCall(MatImaginaryPart(a->A)); 2166 PetscCall(MatImaginaryPart(a->B)); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2171 { 2172 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2173 PetscInt i,*idxb = NULL,m = A->rmap->n; 2174 PetscScalar *va,*vv; 2175 Vec vB,vA; 2176 const PetscScalar *vb; 2177 2178 PetscFunctionBegin; 2179 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 2180 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2181 2182 PetscCall(VecGetArrayWrite(vA,&va)); 2183 if (idx) { 2184 for (i=0; i<m; i++) { 2185 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2186 } 2187 } 2188 2189 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 2190 PetscCall(PetscMalloc1(m,&idxb)); 2191 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2192 2193 PetscCall(VecGetArrayWrite(v,&vv)); 2194 PetscCall(VecGetArrayRead(vB,&vb)); 2195 for (i=0; i<m; i++) { 2196 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2197 vv[i] = vb[i]; 2198 if (idx) idx[i] = a->garray[idxb[i]]; 2199 } else { 2200 vv[i] = va[i]; 2201 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2202 idx[i] = a->garray[idxb[i]]; 2203 } 2204 } 2205 PetscCall(VecRestoreArrayWrite(vA,&vv)); 2206 PetscCall(VecRestoreArrayWrite(vA,&va)); 2207 PetscCall(VecRestoreArrayRead(vB,&vb)); 2208 PetscCall(PetscFree(idxb)); 2209 PetscCall(VecDestroy(&vA)); 2210 PetscCall(VecDestroy(&vB)); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2215 { 2216 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2217 PetscInt m = A->rmap->n,n = A->cmap->n; 2218 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2219 PetscInt *cmap = mat->garray; 2220 PetscInt *diagIdx, *offdiagIdx; 2221 Vec diagV, offdiagV; 2222 PetscScalar *a, *diagA, *offdiagA; 2223 const PetscScalar *ba,*bav; 2224 PetscInt r,j,col,ncols,*bi,*bj; 2225 Mat B = mat->B; 2226 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2227 2228 PetscFunctionBegin; 2229 /* When a process holds entire A and other processes have no entry */ 2230 if (A->cmap->N == n) { 2231 PetscCall(VecGetArrayWrite(v,&diagA)); 2232 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2233 PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 2234 PetscCall(VecDestroy(&diagV)); 2235 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2236 PetscFunctionReturn(0); 2237 } else if (n == 0) { 2238 if (m) { 2239 PetscCall(VecGetArrayWrite(v,&a)); 2240 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2241 PetscCall(VecRestoreArrayWrite(v,&a)); 2242 } 2243 PetscFunctionReturn(0); 2244 } 2245 2246 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2248 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2249 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2250 2251 /* Get offdiagIdx[] for implicit 0.0 */ 2252 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2253 ba = bav; 2254 bi = b->i; 2255 bj = b->j; 2256 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2257 for (r = 0; r < m; r++) { 2258 ncols = bi[r+1] - bi[r]; 2259 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2260 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2261 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2262 offdiagA[r] = 0.0; 2263 2264 /* Find first hole in the cmap */ 2265 for (j=0; j<ncols; j++) { 2266 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2267 if (col > j && j < cstart) { 2268 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2269 break; 2270 } else if (col > j + n && j >= cstart) { 2271 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2272 break; 2273 } 2274 } 2275 if (j == ncols && ncols < A->cmap->N - n) { 2276 /* a hole is outside compressed Bcols */ 2277 if (ncols == 0) { 2278 if (cstart) { 2279 offdiagIdx[r] = 0; 2280 } else offdiagIdx[r] = cend; 2281 } else { /* ncols > 0 */ 2282 offdiagIdx[r] = cmap[ncols-1] + 1; 2283 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2284 } 2285 } 2286 } 2287 2288 for (j=0; j<ncols; j++) { 2289 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2290 ba++; bj++; 2291 } 2292 } 2293 2294 PetscCall(VecGetArrayWrite(v, &a)); 2295 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2296 for (r = 0; r < m; ++r) { 2297 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2298 a[r] = diagA[r]; 2299 if (idx) idx[r] = cstart + diagIdx[r]; 2300 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2301 a[r] = diagA[r]; 2302 if (idx) { 2303 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2304 idx[r] = cstart + diagIdx[r]; 2305 } else idx[r] = offdiagIdx[r]; 2306 } 2307 } else { 2308 a[r] = offdiagA[r]; 2309 if (idx) idx[r] = offdiagIdx[r]; 2310 } 2311 } 2312 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2313 PetscCall(VecRestoreArrayWrite(v, &a)); 2314 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2315 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2316 PetscCall(VecDestroy(&diagV)); 2317 PetscCall(VecDestroy(&offdiagV)); 2318 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2323 { 2324 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2325 PetscInt m = A->rmap->n,n = A->cmap->n; 2326 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2327 PetscInt *cmap = mat->garray; 2328 PetscInt *diagIdx, *offdiagIdx; 2329 Vec diagV, offdiagV; 2330 PetscScalar *a, *diagA, *offdiagA; 2331 const PetscScalar *ba,*bav; 2332 PetscInt r,j,col,ncols,*bi,*bj; 2333 Mat B = mat->B; 2334 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2335 2336 PetscFunctionBegin; 2337 /* When a process holds entire A and other processes have no entry */ 2338 if (A->cmap->N == n) { 2339 PetscCall(VecGetArrayWrite(v,&diagA)); 2340 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2341 PetscCall(MatGetRowMin(mat->A,diagV,idx)); 2342 PetscCall(VecDestroy(&diagV)); 2343 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2344 PetscFunctionReturn(0); 2345 } else if (n == 0) { 2346 if (m) { 2347 PetscCall(VecGetArrayWrite(v,&a)); 2348 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2349 PetscCall(VecRestoreArrayWrite(v,&a)); 2350 } 2351 PetscFunctionReturn(0); 2352 } 2353 2354 PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 2355 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2356 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2357 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2358 2359 /* Get offdiagIdx[] for implicit 0.0 */ 2360 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2361 ba = bav; 2362 bi = b->i; 2363 bj = b->j; 2364 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2365 for (r = 0; r < m; r++) { 2366 ncols = bi[r+1] - bi[r]; 2367 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2368 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2369 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2370 offdiagA[r] = 0.0; 2371 2372 /* Find first hole in the cmap */ 2373 for (j=0; j<ncols; j++) { 2374 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2375 if (col > j && j < cstart) { 2376 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2377 break; 2378 } else if (col > j + n && j >= cstart) { 2379 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2380 break; 2381 } 2382 } 2383 if (j == ncols && ncols < A->cmap->N - n) { 2384 /* a hole is outside compressed Bcols */ 2385 if (ncols == 0) { 2386 if (cstart) { 2387 offdiagIdx[r] = 0; 2388 } else offdiagIdx[r] = cend; 2389 } else { /* ncols > 0 */ 2390 offdiagIdx[r] = cmap[ncols-1] + 1; 2391 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2392 } 2393 } 2394 } 2395 2396 for (j=0; j<ncols; j++) { 2397 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2398 ba++; bj++; 2399 } 2400 } 2401 2402 PetscCall(VecGetArrayWrite(v, &a)); 2403 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2404 for (r = 0; r < m; ++r) { 2405 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2406 a[r] = diagA[r]; 2407 if (idx) idx[r] = cstart + diagIdx[r]; 2408 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2409 a[r] = diagA[r]; 2410 if (idx) { 2411 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2412 idx[r] = cstart + diagIdx[r]; 2413 } else idx[r] = offdiagIdx[r]; 2414 } 2415 } else { 2416 a[r] = offdiagA[r]; 2417 if (idx) idx[r] = offdiagIdx[r]; 2418 } 2419 } 2420 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2421 PetscCall(VecRestoreArrayWrite(v, &a)); 2422 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2423 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2424 PetscCall(VecDestroy(&diagV)); 2425 PetscCall(VecDestroy(&offdiagV)); 2426 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2431 { 2432 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2433 PetscInt m = A->rmap->n,n = A->cmap->n; 2434 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2435 PetscInt *cmap = mat->garray; 2436 PetscInt *diagIdx, *offdiagIdx; 2437 Vec diagV, offdiagV; 2438 PetscScalar *a, *diagA, *offdiagA; 2439 const PetscScalar *ba,*bav; 2440 PetscInt r,j,col,ncols,*bi,*bj; 2441 Mat B = mat->B; 2442 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2443 2444 PetscFunctionBegin; 2445 /* When a process holds entire A and other processes have no entry */ 2446 if (A->cmap->N == n) { 2447 PetscCall(VecGetArrayWrite(v,&diagA)); 2448 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2449 PetscCall(MatGetRowMax(mat->A,diagV,idx)); 2450 PetscCall(VecDestroy(&diagV)); 2451 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2452 PetscFunctionReturn(0); 2453 } else if (n == 0) { 2454 if (m) { 2455 PetscCall(VecGetArrayWrite(v,&a)); 2456 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2457 PetscCall(VecRestoreArrayWrite(v,&a)); 2458 } 2459 PetscFunctionReturn(0); 2460 } 2461 2462 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2463 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2464 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2465 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2466 2467 /* Get offdiagIdx[] for implicit 0.0 */ 2468 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2469 ba = bav; 2470 bi = b->i; 2471 bj = b->j; 2472 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2473 for (r = 0; r < m; r++) { 2474 ncols = bi[r+1] - bi[r]; 2475 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2476 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2477 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2478 offdiagA[r] = 0.0; 2479 2480 /* Find first hole in the cmap */ 2481 for (j=0; j<ncols; j++) { 2482 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2483 if (col > j && j < cstart) { 2484 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2485 break; 2486 } else if (col > j + n && j >= cstart) { 2487 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2488 break; 2489 } 2490 } 2491 if (j == ncols && ncols < A->cmap->N - n) { 2492 /* a hole is outside compressed Bcols */ 2493 if (ncols == 0) { 2494 if (cstart) { 2495 offdiagIdx[r] = 0; 2496 } else offdiagIdx[r] = cend; 2497 } else { /* ncols > 0 */ 2498 offdiagIdx[r] = cmap[ncols-1] + 1; 2499 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2500 } 2501 } 2502 } 2503 2504 for (j=0; j<ncols; j++) { 2505 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2506 ba++; bj++; 2507 } 2508 } 2509 2510 PetscCall(VecGetArrayWrite(v, &a)); 2511 PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 2512 for (r = 0; r < m; ++r) { 2513 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2514 a[r] = diagA[r]; 2515 if (idx) idx[r] = cstart + diagIdx[r]; 2516 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2517 a[r] = diagA[r]; 2518 if (idx) { 2519 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2520 idx[r] = cstart + diagIdx[r]; 2521 } else idx[r] = offdiagIdx[r]; 2522 } 2523 } else { 2524 a[r] = offdiagA[r]; 2525 if (idx) idx[r] = offdiagIdx[r]; 2526 } 2527 } 2528 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2529 PetscCall(VecRestoreArrayWrite(v, &a)); 2530 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2531 PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 2532 PetscCall(VecDestroy(&diagV)); 2533 PetscCall(VecDestroy(&offdiagV)); 2534 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2539 { 2540 Mat *dummy; 2541 2542 PetscFunctionBegin; 2543 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2544 *newmat = *dummy; 2545 PetscCall(PetscFree(dummy)); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2550 { 2551 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2552 2553 PetscFunctionBegin; 2554 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2555 A->factorerrortype = a->A->factorerrortype; 2556 PetscFunctionReturn(0); 2557 } 2558 2559 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2560 { 2561 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2562 2563 PetscFunctionBegin; 2564 PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2565 PetscCall(MatSetRandom(aij->A,rctx)); 2566 if (x->assembled) { 2567 PetscCall(MatSetRandom(aij->B,rctx)); 2568 } else { 2569 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2570 } 2571 PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 2572 PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2577 { 2578 PetscFunctionBegin; 2579 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2580 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2581 PetscFunctionReturn(0); 2582 } 2583 2584 /*@ 2585 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2586 2587 Not collective 2588 2589 Input Parameter: 2590 . A - the matrix 2591 2592 Output Parameter: 2593 . nz - the number of nonzeros 2594 2595 Level: advanced 2596 2597 @*/ 2598 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A,PetscCount *nz) 2599 { 2600 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)A->data; 2601 Mat_SeqAIJ *aaij = (Mat_SeqAIJ*)maij->A->data, *baij = (Mat_SeqAIJ*)maij->B->data; 2602 2603 PetscFunctionBegin; 2604 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2605 PetscFunctionReturn(0); 2606 } 2607 2608 /*@ 2609 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2610 2611 Collective on Mat 2612 2613 Input Parameters: 2614 + A - the matrix 2615 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2616 2617 Level: advanced 2618 2619 @*/ 2620 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2621 { 2622 PetscFunctionBegin; 2623 PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2624 PetscFunctionReturn(0); 2625 } 2626 2627 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2628 { 2629 PetscBool sc = PETSC_FALSE,flg; 2630 2631 PetscFunctionBegin; 2632 PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options"); 2633 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2634 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2635 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2636 PetscOptionsHeadEnd(); 2637 PetscFunctionReturn(0); 2638 } 2639 2640 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2641 { 2642 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2643 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2644 2645 PetscFunctionBegin; 2646 if (!Y->preallocated) { 2647 PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2648 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2649 PetscInt nonew = aij->nonew; 2650 PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2651 aij->nonew = nonew; 2652 } 2653 PetscCall(MatShift_Basic(Y,a)); 2654 PetscFunctionReturn(0); 2655 } 2656 2657 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2658 { 2659 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2660 2661 PetscFunctionBegin; 2662 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2663 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2664 if (d) { 2665 PetscInt rstart; 2666 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2667 *d += rstart; 2668 2669 } 2670 PetscFunctionReturn(0); 2671 } 2672 2673 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2674 { 2675 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2676 2677 PetscFunctionBegin; 2678 PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2679 PetscFunctionReturn(0); 2680 } 2681 2682 /* -------------------------------------------------------------------*/ 2683 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2684 MatGetRow_MPIAIJ, 2685 MatRestoreRow_MPIAIJ, 2686 MatMult_MPIAIJ, 2687 /* 4*/ MatMultAdd_MPIAIJ, 2688 MatMultTranspose_MPIAIJ, 2689 MatMultTransposeAdd_MPIAIJ, 2690 NULL, 2691 NULL, 2692 NULL, 2693 /*10*/ NULL, 2694 NULL, 2695 NULL, 2696 MatSOR_MPIAIJ, 2697 MatTranspose_MPIAIJ, 2698 /*15*/ MatGetInfo_MPIAIJ, 2699 MatEqual_MPIAIJ, 2700 MatGetDiagonal_MPIAIJ, 2701 MatDiagonalScale_MPIAIJ, 2702 MatNorm_MPIAIJ, 2703 /*20*/ MatAssemblyBegin_MPIAIJ, 2704 MatAssemblyEnd_MPIAIJ, 2705 MatSetOption_MPIAIJ, 2706 MatZeroEntries_MPIAIJ, 2707 /*24*/ MatZeroRows_MPIAIJ, 2708 NULL, 2709 NULL, 2710 NULL, 2711 NULL, 2712 /*29*/ MatSetUp_MPIAIJ, 2713 NULL, 2714 NULL, 2715 MatGetDiagonalBlock_MPIAIJ, 2716 NULL, 2717 /*34*/ MatDuplicate_MPIAIJ, 2718 NULL, 2719 NULL, 2720 NULL, 2721 NULL, 2722 /*39*/ MatAXPY_MPIAIJ, 2723 MatCreateSubMatrices_MPIAIJ, 2724 MatIncreaseOverlap_MPIAIJ, 2725 MatGetValues_MPIAIJ, 2726 MatCopy_MPIAIJ, 2727 /*44*/ MatGetRowMax_MPIAIJ, 2728 MatScale_MPIAIJ, 2729 MatShift_MPIAIJ, 2730 MatDiagonalSet_MPIAIJ, 2731 MatZeroRowsColumns_MPIAIJ, 2732 /*49*/ MatSetRandom_MPIAIJ, 2733 MatGetRowIJ_MPIAIJ, 2734 MatRestoreRowIJ_MPIAIJ, 2735 NULL, 2736 NULL, 2737 /*54*/ MatFDColoringCreate_MPIXAIJ, 2738 NULL, 2739 MatSetUnfactored_MPIAIJ, 2740 MatPermute_MPIAIJ, 2741 NULL, 2742 /*59*/ MatCreateSubMatrix_MPIAIJ, 2743 MatDestroy_MPIAIJ, 2744 MatView_MPIAIJ, 2745 NULL, 2746 NULL, 2747 /*64*/ NULL, 2748 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2749 NULL, 2750 NULL, 2751 NULL, 2752 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2753 MatGetRowMinAbs_MPIAIJ, 2754 NULL, 2755 NULL, 2756 NULL, 2757 NULL, 2758 /*75*/ MatFDColoringApply_AIJ, 2759 MatSetFromOptions_MPIAIJ, 2760 NULL, 2761 NULL, 2762 MatFindZeroDiagonals_MPIAIJ, 2763 /*80*/ NULL, 2764 NULL, 2765 NULL, 2766 /*83*/ MatLoad_MPIAIJ, 2767 MatIsSymmetric_MPIAIJ, 2768 NULL, 2769 NULL, 2770 NULL, 2771 NULL, 2772 /*89*/ NULL, 2773 NULL, 2774 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2775 NULL, 2776 NULL, 2777 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2778 NULL, 2779 NULL, 2780 NULL, 2781 MatBindToCPU_MPIAIJ, 2782 /*99*/ MatProductSetFromOptions_MPIAIJ, 2783 NULL, 2784 NULL, 2785 MatConjugate_MPIAIJ, 2786 NULL, 2787 /*104*/MatSetValuesRow_MPIAIJ, 2788 MatRealPart_MPIAIJ, 2789 MatImaginaryPart_MPIAIJ, 2790 NULL, 2791 NULL, 2792 /*109*/NULL, 2793 NULL, 2794 MatGetRowMin_MPIAIJ, 2795 NULL, 2796 MatMissingDiagonal_MPIAIJ, 2797 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2798 NULL, 2799 MatGetGhosts_MPIAIJ, 2800 NULL, 2801 NULL, 2802 /*119*/MatMultDiagonalBlock_MPIAIJ, 2803 NULL, 2804 NULL, 2805 NULL, 2806 MatGetMultiProcBlock_MPIAIJ, 2807 /*124*/MatFindNonzeroRows_MPIAIJ, 2808 MatGetColumnReductions_MPIAIJ, 2809 MatInvertBlockDiagonal_MPIAIJ, 2810 MatInvertVariableBlockDiagonal_MPIAIJ, 2811 MatCreateSubMatricesMPI_MPIAIJ, 2812 /*129*/NULL, 2813 NULL, 2814 NULL, 2815 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2816 NULL, 2817 /*134*/NULL, 2818 NULL, 2819 NULL, 2820 NULL, 2821 NULL, 2822 /*139*/MatSetBlockSizes_MPIAIJ, 2823 NULL, 2824 NULL, 2825 MatFDColoringSetUp_MPIXAIJ, 2826 MatFindOffBlockDiagonalEntries_MPIAIJ, 2827 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2828 /*145*/NULL, 2829 NULL, 2830 NULL, 2831 MatCreateGraph_Simple_AIJ, 2832 MatFilter_AIJ 2833 }; 2834 2835 /* ----------------------------------------------------------------------------------------*/ 2836 2837 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2838 { 2839 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2840 2841 PetscFunctionBegin; 2842 PetscCall(MatStoreValues(aij->A)); 2843 PetscCall(MatStoreValues(aij->B)); 2844 PetscFunctionReturn(0); 2845 } 2846 2847 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2848 { 2849 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2850 2851 PetscFunctionBegin; 2852 PetscCall(MatRetrieveValues(aij->A)); 2853 PetscCall(MatRetrieveValues(aij->B)); 2854 PetscFunctionReturn(0); 2855 } 2856 2857 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2858 { 2859 Mat_MPIAIJ *b; 2860 PetscMPIInt size; 2861 2862 PetscFunctionBegin; 2863 PetscCall(PetscLayoutSetUp(B->rmap)); 2864 PetscCall(PetscLayoutSetUp(B->cmap)); 2865 b = (Mat_MPIAIJ*)B->data; 2866 2867 #if defined(PETSC_USE_CTABLE) 2868 PetscCall(PetscTableDestroy(&b->colmap)); 2869 #else 2870 PetscCall(PetscFree(b->colmap)); 2871 #endif 2872 PetscCall(PetscFree(b->garray)); 2873 PetscCall(VecDestroy(&b->lvec)); 2874 PetscCall(VecScatterDestroy(&b->Mvctx)); 2875 2876 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2877 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2878 PetscCall(MatDestroy(&b->B)); 2879 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2880 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2881 PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 2882 PetscCall(MatSetType(b->B,MATSEQAIJ)); 2883 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2884 2885 if (!B->preallocated) { 2886 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2887 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2888 PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 2889 PetscCall(MatSetType(b->A,MATSEQAIJ)); 2890 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2891 } 2892 2893 PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 2894 PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2895 B->preallocated = PETSC_TRUE; 2896 B->was_assembled = PETSC_FALSE; 2897 B->assembled = PETSC_FALSE; 2898 PetscFunctionReturn(0); 2899 } 2900 2901 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2902 { 2903 Mat_MPIAIJ *b; 2904 2905 PetscFunctionBegin; 2906 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2907 PetscCall(PetscLayoutSetUp(B->rmap)); 2908 PetscCall(PetscLayoutSetUp(B->cmap)); 2909 b = (Mat_MPIAIJ*)B->data; 2910 2911 #if defined(PETSC_USE_CTABLE) 2912 PetscCall(PetscTableDestroy(&b->colmap)); 2913 #else 2914 PetscCall(PetscFree(b->colmap)); 2915 #endif 2916 PetscCall(PetscFree(b->garray)); 2917 PetscCall(VecDestroy(&b->lvec)); 2918 PetscCall(VecScatterDestroy(&b->Mvctx)); 2919 2920 PetscCall(MatResetPreallocation(b->A)); 2921 PetscCall(MatResetPreallocation(b->B)); 2922 B->preallocated = PETSC_TRUE; 2923 B->was_assembled = PETSC_FALSE; 2924 B->assembled = PETSC_FALSE; 2925 PetscFunctionReturn(0); 2926 } 2927 2928 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2929 { 2930 Mat mat; 2931 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2932 2933 PetscFunctionBegin; 2934 *newmat = NULL; 2935 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2936 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2937 PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 2938 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2939 a = (Mat_MPIAIJ*)mat->data; 2940 2941 mat->factortype = matin->factortype; 2942 mat->assembled = matin->assembled; 2943 mat->insertmode = NOT_SET_VALUES; 2944 mat->preallocated = matin->preallocated; 2945 2946 a->size = oldmat->size; 2947 a->rank = oldmat->rank; 2948 a->donotstash = oldmat->donotstash; 2949 a->roworiented = oldmat->roworiented; 2950 a->rowindices = NULL; 2951 a->rowvalues = NULL; 2952 a->getrowactive = PETSC_FALSE; 2953 2954 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2955 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2956 2957 if (oldmat->colmap) { 2958 #if defined(PETSC_USE_CTABLE) 2959 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2960 #else 2961 PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 2962 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 2963 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2964 #endif 2965 } else a->colmap = NULL; 2966 if (oldmat->garray) { 2967 PetscInt len; 2968 len = oldmat->B->cmap->n; 2969 PetscCall(PetscMalloc1(len+1,&a->garray)); 2970 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2971 if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2972 } else a->garray = NULL; 2973 2974 /* It may happen MatDuplicate is called with a non-assembled matrix 2975 In fact, MatDuplicate only requires the matrix to be preallocated 2976 This may happen inside a DMCreateMatrix_Shell */ 2977 if (oldmat->lvec) { 2978 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2979 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2980 } 2981 if (oldmat->Mvctx) { 2982 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2983 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2984 } 2985 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2986 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2987 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2988 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2989 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2990 *newmat = mat; 2991 PetscFunctionReturn(0); 2992 } 2993 2994 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2995 { 2996 PetscBool isbinary, ishdf5; 2997 2998 PetscFunctionBegin; 2999 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3000 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3001 /* force binary viewer to load .info file if it has not yet done so */ 3002 PetscCall(PetscViewerSetUp(viewer)); 3003 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 3004 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 3005 if (isbinary) { 3006 PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 3007 } else if (ishdf5) { 3008 #if defined(PETSC_HAVE_HDF5) 3009 PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 3010 #else 3011 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3012 #endif 3013 } else { 3014 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3015 } 3016 PetscFunctionReturn(0); 3017 } 3018 3019 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3020 { 3021 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3022 PetscInt *rowidxs,*colidxs; 3023 PetscScalar *matvals; 3024 3025 PetscFunctionBegin; 3026 PetscCall(PetscViewerSetUp(viewer)); 3027 3028 /* read in matrix header */ 3029 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3030 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3031 M = header[1]; N = header[2]; nz = header[3]; 3032 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3033 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3034 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3035 3036 /* set block sizes from the viewer's .info file */ 3037 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3038 /* set global sizes if not set already */ 3039 if (mat->rmap->N < 0) mat->rmap->N = M; 3040 if (mat->cmap->N < 0) mat->cmap->N = N; 3041 PetscCall(PetscLayoutSetUp(mat->rmap)); 3042 PetscCall(PetscLayoutSetUp(mat->cmap)); 3043 3044 /* check if the matrix sizes are correct */ 3045 PetscCall(MatGetSize(mat,&rows,&cols)); 3046 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3047 3048 /* read in row lengths and build row indices */ 3049 PetscCall(MatGetLocalSize(mat,&m,NULL)); 3050 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3051 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3052 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3053 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3054 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3055 /* read in column indices and matrix values */ 3056 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3057 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3058 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3059 /* store matrix indices and values */ 3060 PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 3061 PetscCall(PetscFree(rowidxs)); 3062 PetscCall(PetscFree2(colidxs,matvals)); 3063 PetscFunctionReturn(0); 3064 } 3065 3066 /* Not scalable because of ISAllGather() unless getting all columns. */ 3067 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3068 { 3069 IS iscol_local; 3070 PetscBool isstride; 3071 PetscMPIInt lisstride=0,gisstride; 3072 3073 PetscFunctionBegin; 3074 /* check if we are grabbing all columns*/ 3075 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 3076 3077 if (isstride) { 3078 PetscInt start,len,mstart,mlen; 3079 PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 3080 PetscCall(ISGetLocalSize(iscol,&len)); 3081 PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3082 if (mstart == start && mlen-mstart == len) lisstride = 1; 3083 } 3084 3085 PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3086 if (gisstride) { 3087 PetscInt N; 3088 PetscCall(MatGetSize(mat,NULL,&N)); 3089 PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 3090 PetscCall(ISSetIdentity(iscol_local)); 3091 PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3092 } else { 3093 PetscInt cbs; 3094 PetscCall(ISGetBlockSize(iscol,&cbs)); 3095 PetscCall(ISAllGather(iscol,&iscol_local)); 3096 PetscCall(ISSetBlockSize(iscol_local,cbs)); 3097 } 3098 3099 *isseq = iscol_local; 3100 PetscFunctionReturn(0); 3101 } 3102 3103 /* 3104 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3105 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3106 3107 Input Parameters: 3108 mat - matrix 3109 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3110 i.e., mat->rstart <= isrow[i] < mat->rend 3111 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3112 i.e., mat->cstart <= iscol[i] < mat->cend 3113 Output Parameter: 3114 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3115 iscol_o - sequential column index set for retrieving mat->B 3116 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3117 */ 3118 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3119 { 3120 Vec x,cmap; 3121 const PetscInt *is_idx; 3122 PetscScalar *xarray,*cmaparray; 3123 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3124 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3125 Mat B=a->B; 3126 Vec lvec=a->lvec,lcmap; 3127 PetscInt i,cstart,cend,Bn=B->cmap->N; 3128 MPI_Comm comm; 3129 VecScatter Mvctx=a->Mvctx; 3130 3131 PetscFunctionBegin; 3132 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3133 PetscCall(ISGetLocalSize(iscol,&ncols)); 3134 3135 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3136 PetscCall(MatCreateVecs(mat,&x,NULL)); 3137 PetscCall(VecSet(x,-1.0)); 3138 PetscCall(VecDuplicate(x,&cmap)); 3139 PetscCall(VecSet(cmap,-1.0)); 3140 3141 /* Get start indices */ 3142 PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3143 isstart -= ncols; 3144 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3145 3146 PetscCall(ISGetIndices(iscol,&is_idx)); 3147 PetscCall(VecGetArray(x,&xarray)); 3148 PetscCall(VecGetArray(cmap,&cmaparray)); 3149 PetscCall(PetscMalloc1(ncols,&idx)); 3150 for (i=0; i<ncols; i++) { 3151 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3152 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3153 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3154 } 3155 PetscCall(VecRestoreArray(x,&xarray)); 3156 PetscCall(VecRestoreArray(cmap,&cmaparray)); 3157 PetscCall(ISRestoreIndices(iscol,&is_idx)); 3158 3159 /* Get iscol_d */ 3160 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 3161 PetscCall(ISGetBlockSize(iscol,&i)); 3162 PetscCall(ISSetBlockSize(*iscol_d,i)); 3163 3164 /* Get isrow_d */ 3165 PetscCall(ISGetLocalSize(isrow,&m)); 3166 rstart = mat->rmap->rstart; 3167 PetscCall(PetscMalloc1(m,&idx)); 3168 PetscCall(ISGetIndices(isrow,&is_idx)); 3169 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3170 PetscCall(ISRestoreIndices(isrow,&is_idx)); 3171 3172 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 3173 PetscCall(ISGetBlockSize(isrow,&i)); 3174 PetscCall(ISSetBlockSize(*isrow_d,i)); 3175 3176 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3177 PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3178 PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3179 3180 PetscCall(VecDuplicate(lvec,&lcmap)); 3181 3182 PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3183 PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3184 3185 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3186 /* off-process column indices */ 3187 count = 0; 3188 PetscCall(PetscMalloc1(Bn,&idx)); 3189 PetscCall(PetscMalloc1(Bn,&cmap1)); 3190 3191 PetscCall(VecGetArray(lvec,&xarray)); 3192 PetscCall(VecGetArray(lcmap,&cmaparray)); 3193 for (i=0; i<Bn; i++) { 3194 if (PetscRealPart(xarray[i]) > -1.0) { 3195 idx[count] = i; /* local column index in off-diagonal part B */ 3196 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3197 count++; 3198 } 3199 } 3200 PetscCall(VecRestoreArray(lvec,&xarray)); 3201 PetscCall(VecRestoreArray(lcmap,&cmaparray)); 3202 3203 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3204 /* cannot ensure iscol_o has same blocksize as iscol! */ 3205 3206 PetscCall(PetscFree(idx)); 3207 *garray = cmap1; 3208 3209 PetscCall(VecDestroy(&x)); 3210 PetscCall(VecDestroy(&cmap)); 3211 PetscCall(VecDestroy(&lcmap)); 3212 PetscFunctionReturn(0); 3213 } 3214 3215 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3216 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3217 { 3218 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3219 Mat M = NULL; 3220 MPI_Comm comm; 3221 IS iscol_d,isrow_d,iscol_o; 3222 Mat Asub = NULL,Bsub = NULL; 3223 PetscInt n; 3224 3225 PetscFunctionBegin; 3226 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3227 3228 if (call == MAT_REUSE_MATRIX) { 3229 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3230 PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 3231 PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3232 3233 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 3234 PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3235 3236 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 3237 PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3238 3239 /* Update diagonal and off-diagonal portions of submat */ 3240 asub = (Mat_MPIAIJ*)(*submat)->data; 3241 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 3242 PetscCall(ISGetLocalSize(iscol_o,&n)); 3243 if (n) { 3244 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 3245 } 3246 PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 3247 PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 3248 3249 } else { /* call == MAT_INITIAL_MATRIX) */ 3250 const PetscInt *garray; 3251 PetscInt BsubN; 3252 3253 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3254 PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 3255 3256 /* Create local submatrices Asub and Bsub */ 3257 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 3258 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 3259 3260 /* Create submatrix M */ 3261 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 3262 3263 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3264 asub = (Mat_MPIAIJ*)M->data; 3265 3266 PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3267 n = asub->B->cmap->N; 3268 if (BsubN > n) { 3269 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3270 const PetscInt *idx; 3271 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3272 PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 3273 3274 PetscCall(PetscMalloc1(n,&idx_new)); 3275 j = 0; 3276 PetscCall(ISGetIndices(iscol_o,&idx)); 3277 for (i=0; i<n; i++) { 3278 if (j >= BsubN) break; 3279 while (subgarray[i] > garray[j]) j++; 3280 3281 if (subgarray[i] == garray[j]) { 3282 idx_new[i] = idx[j++]; 3283 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3284 } 3285 PetscCall(ISRestoreIndices(iscol_o,&idx)); 3286 3287 PetscCall(ISDestroy(&iscol_o)); 3288 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 3289 3290 } else if (BsubN < n) { 3291 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3292 } 3293 3294 PetscCall(PetscFree(garray)); 3295 *submat = M; 3296 3297 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3298 PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 3299 PetscCall(ISDestroy(&isrow_d)); 3300 3301 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 3302 PetscCall(ISDestroy(&iscol_d)); 3303 3304 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 3305 PetscCall(ISDestroy(&iscol_o)); 3306 } 3307 PetscFunctionReturn(0); 3308 } 3309 3310 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3311 { 3312 IS iscol_local=NULL,isrow_d; 3313 PetscInt csize; 3314 PetscInt n,i,j,start,end; 3315 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3316 MPI_Comm comm; 3317 3318 PetscFunctionBegin; 3319 /* If isrow has same processor distribution as mat, 3320 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3321 if (call == MAT_REUSE_MATRIX) { 3322 PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3323 if (isrow_d) { 3324 sameRowDist = PETSC_TRUE; 3325 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3326 } else { 3327 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3328 if (iscol_local) { 3329 sameRowDist = PETSC_TRUE; 3330 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3331 } 3332 } 3333 } else { 3334 /* Check if isrow has same processor distribution as mat */ 3335 sameDist[0] = PETSC_FALSE; 3336 PetscCall(ISGetLocalSize(isrow,&n)); 3337 if (!n) { 3338 sameDist[0] = PETSC_TRUE; 3339 } else { 3340 PetscCall(ISGetMinMax(isrow,&i,&j)); 3341 PetscCall(MatGetOwnershipRange(mat,&start,&end)); 3342 if (i >= start && j < end) { 3343 sameDist[0] = PETSC_TRUE; 3344 } 3345 } 3346 3347 /* Check if iscol has same processor distribution as mat */ 3348 sameDist[1] = PETSC_FALSE; 3349 PetscCall(ISGetLocalSize(iscol,&n)); 3350 if (!n) { 3351 sameDist[1] = PETSC_TRUE; 3352 } else { 3353 PetscCall(ISGetMinMax(iscol,&i,&j)); 3354 PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 3355 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3356 } 3357 3358 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3359 PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 3360 sameRowDist = tsameDist[0]; 3361 } 3362 3363 if (sameRowDist) { 3364 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3365 /* isrow and iscol have same processor distribution as mat */ 3366 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 3367 PetscFunctionReturn(0); 3368 } else { /* sameRowDist */ 3369 /* isrow has same processor distribution as mat */ 3370 if (call == MAT_INITIAL_MATRIX) { 3371 PetscBool sorted; 3372 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3373 PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 3374 PetscCall(ISGetSize(iscol,&i)); 3375 PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3376 3377 PetscCall(ISSorted(iscol_local,&sorted)); 3378 if (sorted) { 3379 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3380 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 3381 PetscFunctionReturn(0); 3382 } 3383 } else { /* call == MAT_REUSE_MATRIX */ 3384 IS iscol_sub; 3385 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3386 if (iscol_sub) { 3387 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 3388 PetscFunctionReturn(0); 3389 } 3390 } 3391 } 3392 } 3393 3394 /* General case: iscol -> iscol_local which has global size of iscol */ 3395 if (call == MAT_REUSE_MATRIX) { 3396 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 3397 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3398 } else { 3399 if (!iscol_local) { 3400 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3401 } 3402 } 3403 3404 PetscCall(ISGetLocalSize(iscol,&csize)); 3405 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 3406 3407 if (call == MAT_INITIAL_MATRIX) { 3408 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 3409 PetscCall(ISDestroy(&iscol_local)); 3410 } 3411 PetscFunctionReturn(0); 3412 } 3413 3414 /*@C 3415 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3416 and "off-diagonal" part of the matrix in CSR format. 3417 3418 Collective 3419 3420 Input Parameters: 3421 + comm - MPI communicator 3422 . A - "diagonal" portion of matrix 3423 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3424 - garray - global index of B columns 3425 3426 Output Parameter: 3427 . mat - the matrix, with input A as its local diagonal matrix 3428 Level: advanced 3429 3430 Notes: 3431 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3432 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3433 3434 .seealso: `MatCreateMPIAIJWithSplitArrays()` 3435 @*/ 3436 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3437 { 3438 Mat_MPIAIJ *maij; 3439 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3440 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3441 const PetscScalar *oa; 3442 Mat Bnew; 3443 PetscInt m,n,N; 3444 MatType mpi_mat_type; 3445 3446 PetscFunctionBegin; 3447 PetscCall(MatCreate(comm,mat)); 3448 PetscCall(MatGetSize(A,&m,&n)); 3449 PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3450 PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3451 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3452 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3453 3454 /* Get global columns of mat */ 3455 PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3456 3457 PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 3458 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3459 PetscCall(MatGetMPIMatType_Private(A,&mpi_mat_type)); 3460 PetscCall(MatSetType(*mat,mpi_mat_type)); 3461 3462 PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3463 maij = (Mat_MPIAIJ*)(*mat)->data; 3464 3465 (*mat)->preallocated = PETSC_TRUE; 3466 3467 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3468 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3469 3470 /* Set A as diagonal portion of *mat */ 3471 maij->A = A; 3472 3473 nz = oi[m]; 3474 for (i=0; i<nz; i++) { 3475 col = oj[i]; 3476 oj[i] = garray[col]; 3477 } 3478 3479 /* Set Bnew as off-diagonal portion of *mat */ 3480 PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 3481 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 3482 PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3483 bnew = (Mat_SeqAIJ*)Bnew->data; 3484 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3485 maij->B = Bnew; 3486 3487 PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3488 3489 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3490 b->free_a = PETSC_FALSE; 3491 b->free_ij = PETSC_FALSE; 3492 PetscCall(MatDestroy(&B)); 3493 3494 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3495 bnew->free_a = PETSC_TRUE; 3496 bnew->free_ij = PETSC_TRUE; 3497 3498 /* condense columns of maij->B */ 3499 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3500 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3501 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3502 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 3503 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3504 PetscFunctionReturn(0); 3505 } 3506 3507 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3508 3509 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3510 { 3511 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3512 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3513 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3514 Mat M,Msub,B=a->B; 3515 MatScalar *aa; 3516 Mat_SeqAIJ *aij; 3517 PetscInt *garray = a->garray,*colsub,Ncols; 3518 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3519 IS iscol_sub,iscmap; 3520 const PetscInt *is_idx,*cmap; 3521 PetscBool allcolumns=PETSC_FALSE; 3522 MPI_Comm comm; 3523 3524 PetscFunctionBegin; 3525 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3526 if (call == MAT_REUSE_MATRIX) { 3527 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3528 PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3529 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3530 3531 PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 3532 PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3533 3534 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 3535 PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3536 3537 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3538 3539 } else { /* call == MAT_INITIAL_MATRIX) */ 3540 PetscBool flg; 3541 3542 PetscCall(ISGetLocalSize(iscol,&n)); 3543 PetscCall(ISGetSize(iscol,&Ncols)); 3544 3545 /* (1) iscol -> nonscalable iscol_local */ 3546 /* Check for special case: each processor gets entire matrix columns */ 3547 PetscCall(ISIdentity(iscol_local,&flg)); 3548 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3549 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3550 if (allcolumns) { 3551 iscol_sub = iscol_local; 3552 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3553 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3554 3555 } else { 3556 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3557 PetscInt *idx,*cmap1,k; 3558 PetscCall(PetscMalloc1(Ncols,&idx)); 3559 PetscCall(PetscMalloc1(Ncols,&cmap1)); 3560 PetscCall(ISGetIndices(iscol_local,&is_idx)); 3561 count = 0; 3562 k = 0; 3563 for (i=0; i<Ncols; i++) { 3564 j = is_idx[i]; 3565 if (j >= cstart && j < cend) { 3566 /* diagonal part of mat */ 3567 idx[count] = j; 3568 cmap1[count++] = i; /* column index in submat */ 3569 } else if (Bn) { 3570 /* off-diagonal part of mat */ 3571 if (j == garray[k]) { 3572 idx[count] = j; 3573 cmap1[count++] = i; /* column index in submat */ 3574 } else if (j > garray[k]) { 3575 while (j > garray[k] && k < Bn-1) k++; 3576 if (j == garray[k]) { 3577 idx[count] = j; 3578 cmap1[count++] = i; /* column index in submat */ 3579 } 3580 } 3581 } 3582 } 3583 PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 3584 3585 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 3586 PetscCall(ISGetBlockSize(iscol,&cbs)); 3587 PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3588 3589 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3590 } 3591 3592 /* (3) Create sequential Msub */ 3593 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3594 } 3595 3596 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3597 aij = (Mat_SeqAIJ*)(Msub)->data; 3598 ii = aij->i; 3599 PetscCall(ISGetIndices(iscmap,&cmap)); 3600 3601 /* 3602 m - number of local rows 3603 Ncols - number of columns (same on all processors) 3604 rstart - first row in new global matrix generated 3605 */ 3606 PetscCall(MatGetSize(Msub,&m,NULL)); 3607 3608 if (call == MAT_INITIAL_MATRIX) { 3609 /* (4) Create parallel newmat */ 3610 PetscMPIInt rank,size; 3611 PetscInt csize; 3612 3613 PetscCallMPI(MPI_Comm_size(comm,&size)); 3614 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3615 3616 /* 3617 Determine the number of non-zeros in the diagonal and off-diagonal 3618 portions of the matrix in order to do correct preallocation 3619 */ 3620 3621 /* first get start and end of "diagonal" columns */ 3622 PetscCall(ISGetLocalSize(iscol,&csize)); 3623 if (csize == PETSC_DECIDE) { 3624 PetscCall(ISGetSize(isrow,&mglobal)); 3625 if (mglobal == Ncols) { /* square matrix */ 3626 nlocal = m; 3627 } else { 3628 nlocal = Ncols/size + ((Ncols % size) > rank); 3629 } 3630 } else { 3631 nlocal = csize; 3632 } 3633 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3634 rstart = rend - nlocal; 3635 PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3636 3637 /* next, compute all the lengths */ 3638 jj = aij->j; 3639 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3640 olens = dlens + m; 3641 for (i=0; i<m; i++) { 3642 jend = ii[i+1] - ii[i]; 3643 olen = 0; 3644 dlen = 0; 3645 for (j=0; j<jend; j++) { 3646 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3647 else dlen++; 3648 jj++; 3649 } 3650 olens[i] = olen; 3651 dlens[i] = dlen; 3652 } 3653 3654 PetscCall(ISGetBlockSize(isrow,&bs)); 3655 PetscCall(ISGetBlockSize(iscol,&cbs)); 3656 3657 PetscCall(MatCreate(comm,&M)); 3658 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 3659 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3660 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3661 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3662 PetscCall(PetscFree(dlens)); 3663 3664 } else { /* call == MAT_REUSE_MATRIX */ 3665 M = *newmat; 3666 PetscCall(MatGetLocalSize(M,&i,NULL)); 3667 PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3668 PetscCall(MatZeroEntries(M)); 3669 /* 3670 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3671 rather than the slower MatSetValues(). 3672 */ 3673 M->was_assembled = PETSC_TRUE; 3674 M->assembled = PETSC_FALSE; 3675 } 3676 3677 /* (5) Set values of Msub to *newmat */ 3678 PetscCall(PetscMalloc1(count,&colsub)); 3679 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 3680 3681 jj = aij->j; 3682 PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3683 for (i=0; i<m; i++) { 3684 row = rstart + i; 3685 nz = ii[i+1] - ii[i]; 3686 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3687 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 3688 jj += nz; aa += nz; 3689 } 3690 PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 3691 PetscCall(ISRestoreIndices(iscmap,&cmap)); 3692 3693 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3694 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3695 3696 PetscCall(PetscFree(colsub)); 3697 3698 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3699 if (call == MAT_INITIAL_MATRIX) { 3700 *newmat = M; 3701 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 3702 PetscCall(MatDestroy(&Msub)); 3703 3704 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 3705 PetscCall(ISDestroy(&iscol_sub)); 3706 3707 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 3708 PetscCall(ISDestroy(&iscmap)); 3709 3710 if (iscol_local) { 3711 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 3712 PetscCall(ISDestroy(&iscol_local)); 3713 } 3714 } 3715 PetscFunctionReturn(0); 3716 } 3717 3718 /* 3719 Not great since it makes two copies of the submatrix, first an SeqAIJ 3720 in local and then by concatenating the local matrices the end result. 3721 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3722 3723 Note: This requires a sequential iscol with all indices. 3724 */ 3725 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3726 { 3727 PetscMPIInt rank,size; 3728 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3729 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3730 Mat M,Mreuse; 3731 MatScalar *aa,*vwork; 3732 MPI_Comm comm; 3733 Mat_SeqAIJ *aij; 3734 PetscBool colflag,allcolumns=PETSC_FALSE; 3735 3736 PetscFunctionBegin; 3737 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3738 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3739 PetscCallMPI(MPI_Comm_size(comm,&size)); 3740 3741 /* Check for special case: each processor gets entire matrix columns */ 3742 PetscCall(ISIdentity(iscol,&colflag)); 3743 PetscCall(ISGetLocalSize(iscol,&n)); 3744 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3745 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3746 3747 if (call == MAT_REUSE_MATRIX) { 3748 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 3749 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3750 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3751 } else { 3752 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3753 } 3754 3755 /* 3756 m - number of local rows 3757 n - number of columns (same on all processors) 3758 rstart - first row in new global matrix generated 3759 */ 3760 PetscCall(MatGetSize(Mreuse,&m,&n)); 3761 PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3762 if (call == MAT_INITIAL_MATRIX) { 3763 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3764 ii = aij->i; 3765 jj = aij->j; 3766 3767 /* 3768 Determine the number of non-zeros in the diagonal and off-diagonal 3769 portions of the matrix in order to do correct preallocation 3770 */ 3771 3772 /* first get start and end of "diagonal" columns */ 3773 if (csize == PETSC_DECIDE) { 3774 PetscCall(ISGetSize(isrow,&mglobal)); 3775 if (mglobal == n) { /* square matrix */ 3776 nlocal = m; 3777 } else { 3778 nlocal = n/size + ((n % size) > rank); 3779 } 3780 } else { 3781 nlocal = csize; 3782 } 3783 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3784 rstart = rend - nlocal; 3785 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3786 3787 /* next, compute all the lengths */ 3788 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3789 olens = dlens + m; 3790 for (i=0; i<m; i++) { 3791 jend = ii[i+1] - ii[i]; 3792 olen = 0; 3793 dlen = 0; 3794 for (j=0; j<jend; j++) { 3795 if (*jj < rstart || *jj >= rend) olen++; 3796 else dlen++; 3797 jj++; 3798 } 3799 olens[i] = olen; 3800 dlens[i] = dlen; 3801 } 3802 PetscCall(MatCreate(comm,&M)); 3803 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 3804 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3805 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3806 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3807 PetscCall(PetscFree(dlens)); 3808 } else { 3809 PetscInt ml,nl; 3810 3811 M = *newmat; 3812 PetscCall(MatGetLocalSize(M,&ml,&nl)); 3813 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3814 PetscCall(MatZeroEntries(M)); 3815 /* 3816 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3817 rather than the slower MatSetValues(). 3818 */ 3819 M->was_assembled = PETSC_TRUE; 3820 M->assembled = PETSC_FALSE; 3821 } 3822 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3823 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3824 ii = aij->i; 3825 jj = aij->j; 3826 3827 /* trigger copy to CPU if needed */ 3828 PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3829 for (i=0; i<m; i++) { 3830 row = rstart + i; 3831 nz = ii[i+1] - ii[i]; 3832 cwork = jj; jj += nz; 3833 vwork = aa; aa += nz; 3834 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3835 } 3836 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3837 3838 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3839 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3840 *newmat = M; 3841 3842 /* save submatrix used in processor for next request */ 3843 if (call == MAT_INITIAL_MATRIX) { 3844 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 3845 PetscCall(MatDestroy(&Mreuse)); 3846 } 3847 PetscFunctionReturn(0); 3848 } 3849 3850 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3851 { 3852 PetscInt m,cstart, cend,j,nnz,i,d,*ld; 3853 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3854 const PetscInt *JJ; 3855 PetscBool nooffprocentries; 3856 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)B->data; 3857 3858 PetscFunctionBegin; 3859 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3860 3861 PetscCall(PetscLayoutSetUp(B->rmap)); 3862 PetscCall(PetscLayoutSetUp(B->cmap)); 3863 m = B->rmap->n; 3864 cstart = B->cmap->rstart; 3865 cend = B->cmap->rend; 3866 rstart = B->rmap->rstart; 3867 3868 PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3869 3870 if (PetscDefined(USE_DEBUG)) { 3871 for (i=0; i<m; i++) { 3872 nnz = Ii[i+1]- Ii[i]; 3873 JJ = J + Ii[i]; 3874 PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3875 PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3876 PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3877 } 3878 } 3879 3880 for (i=0; i<m; i++) { 3881 nnz = Ii[i+1]- Ii[i]; 3882 JJ = J + Ii[i]; 3883 nnz_max = PetscMax(nnz_max,nnz); 3884 d = 0; 3885 for (j=0; j<nnz; j++) { 3886 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3887 } 3888 d_nnz[i] = d; 3889 o_nnz[i] = nnz - d; 3890 } 3891 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 3892 PetscCall(PetscFree2(d_nnz,o_nnz)); 3893 3894 for (i=0; i<m; i++) { 3895 ii = i + rstart; 3896 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3897 } 3898 nooffprocentries = B->nooffprocentries; 3899 B->nooffprocentries = PETSC_TRUE; 3900 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3901 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3902 B->nooffprocentries = nooffprocentries; 3903 3904 /* count number of entries below block diagonal */ 3905 PetscCall(PetscFree(Aij->ld)); 3906 PetscCall(PetscCalloc1(m,&ld)); 3907 Aij->ld = ld; 3908 for (i=0; i<m; i++) { 3909 nnz = Ii[i+1] - Ii[i]; 3910 j = 0; 3911 while (j < nnz && J[j] < cstart) {j++;} 3912 ld[i] = j; 3913 J += nnz; 3914 } 3915 3916 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3917 PetscFunctionReturn(0); 3918 } 3919 3920 /*@ 3921 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3922 (the default parallel PETSc format). 3923 3924 Collective 3925 3926 Input Parameters: 3927 + B - the matrix 3928 . i - the indices into j for the start of each local row (starts with zero) 3929 . j - the column indices for each local row (starts with zero) 3930 - v - optional values in the matrix 3931 3932 Level: developer 3933 3934 Notes: 3935 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3936 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3937 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3938 3939 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3940 3941 The format which is used for the sparse matrix input, is equivalent to a 3942 row-major ordering.. i.e for the following matrix, the input data expected is 3943 as shown 3944 3945 $ 1 0 0 3946 $ 2 0 3 P0 3947 $ ------- 3948 $ 4 5 6 P1 3949 $ 3950 $ Process0 [P0]: rows_owned=[0,1] 3951 $ i = {0,1,3} [size = nrow+1 = 2+1] 3952 $ j = {0,0,2} [size = 3] 3953 $ v = {1,2,3} [size = 3] 3954 $ 3955 $ Process1 [P1]: rows_owned=[2] 3956 $ i = {0,3} [size = nrow+1 = 1+1] 3957 $ j = {0,1,2} [size = 3] 3958 $ v = {4,5,6} [size = 3] 3959 3960 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3961 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3962 @*/ 3963 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3964 { 3965 PetscFunctionBegin; 3966 PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3967 PetscFunctionReturn(0); 3968 } 3969 3970 /*@C 3971 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3972 (the default parallel PETSc format). For good matrix assembly performance 3973 the user should preallocate the matrix storage by setting the parameters 3974 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3975 performance can be increased by more than a factor of 50. 3976 3977 Collective 3978 3979 Input Parameters: 3980 + B - the matrix 3981 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3982 (same value is used for all local rows) 3983 . d_nnz - array containing the number of nonzeros in the various rows of the 3984 DIAGONAL portion of the local submatrix (possibly different for each row) 3985 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3986 The size of this array is equal to the number of local rows, i.e 'm'. 3987 For matrices that will be factored, you must leave room for (and set) 3988 the diagonal entry even if it is zero. 3989 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3990 submatrix (same value is used for all local rows). 3991 - o_nnz - array containing the number of nonzeros in the various rows of the 3992 OFF-DIAGONAL portion of the local submatrix (possibly different for 3993 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3994 structure. The size of this array is equal to the number 3995 of local rows, i.e 'm'. 3996 3997 If the *_nnz parameter is given then the *_nz parameter is ignored 3998 3999 The AIJ format (also called the Yale sparse matrix format or 4000 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4001 storage. The stored row and column indices begin with zero. 4002 See Users-Manual: ch_mat for details. 4003 4004 The parallel matrix is partitioned such that the first m0 rows belong to 4005 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4006 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4007 4008 The DIAGONAL portion of the local submatrix of a processor can be defined 4009 as the submatrix which is obtained by extraction the part corresponding to 4010 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4011 first row that belongs to the processor, r2 is the last row belonging to 4012 the this processor, and c1-c2 is range of indices of the local part of a 4013 vector suitable for applying the matrix to. This is an mxn matrix. In the 4014 common case of a square matrix, the row and column ranges are the same and 4015 the DIAGONAL part is also square. The remaining portion of the local 4016 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4017 4018 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4019 4020 You can call MatGetInfo() to get information on how effective the preallocation was; 4021 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4022 You can also run with the option -info and look for messages with the string 4023 malloc in them to see if additional memory allocation was needed. 4024 4025 Example usage: 4026 4027 Consider the following 8x8 matrix with 34 non-zero values, that is 4028 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4029 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4030 as follows: 4031 4032 .vb 4033 1 2 0 | 0 3 0 | 0 4 4034 Proc0 0 5 6 | 7 0 0 | 8 0 4035 9 0 10 | 11 0 0 | 12 0 4036 ------------------------------------- 4037 13 0 14 | 15 16 17 | 0 0 4038 Proc1 0 18 0 | 19 20 21 | 0 0 4039 0 0 0 | 22 23 0 | 24 0 4040 ------------------------------------- 4041 Proc2 25 26 27 | 0 0 28 | 29 0 4042 30 0 0 | 31 32 33 | 0 34 4043 .ve 4044 4045 This can be represented as a collection of submatrices as: 4046 4047 .vb 4048 A B C 4049 D E F 4050 G H I 4051 .ve 4052 4053 Where the submatrices A,B,C are owned by proc0, D,E,F are 4054 owned by proc1, G,H,I are owned by proc2. 4055 4056 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4057 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4058 The 'M','N' parameters are 8,8, and have the same values on all procs. 4059 4060 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4061 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4062 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4063 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4064 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4065 matrix, ans [DF] as another SeqAIJ matrix. 4066 4067 When d_nz, o_nz parameters are specified, d_nz storage elements are 4068 allocated for every row of the local diagonal submatrix, and o_nz 4069 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4070 One way to choose d_nz and o_nz is to use the max nonzerors per local 4071 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4072 In this case, the values of d_nz,o_nz are: 4073 .vb 4074 proc0 : dnz = 2, o_nz = 2 4075 proc1 : dnz = 3, o_nz = 2 4076 proc2 : dnz = 1, o_nz = 4 4077 .ve 4078 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4079 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4080 for proc3. i.e we are using 12+15+10=37 storage locations to store 4081 34 values. 4082 4083 When d_nnz, o_nnz parameters are specified, the storage is specified 4084 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4085 In the above case the values for d_nnz,o_nnz are: 4086 .vb 4087 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4088 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4089 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4090 .ve 4091 Here the space allocated is sum of all the above values i.e 34, and 4092 hence pre-allocation is perfect. 4093 4094 Level: intermediate 4095 4096 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4097 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4098 @*/ 4099 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4100 { 4101 PetscFunctionBegin; 4102 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4103 PetscValidType(B,1); 4104 PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4105 PetscFunctionReturn(0); 4106 } 4107 4108 /*@ 4109 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4110 CSR format for the local rows. 4111 4112 Collective 4113 4114 Input Parameters: 4115 + comm - MPI communicator 4116 . m - number of local rows (Cannot be PETSC_DECIDE) 4117 . n - This value should be the same as the local size used in creating the 4118 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4119 calculated if N is given) For square matrices n is almost always m. 4120 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4121 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4122 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4123 . j - column indices 4124 - a - optional matrix values 4125 4126 Output Parameter: 4127 . mat - the matrix 4128 4129 Level: intermediate 4130 4131 Notes: 4132 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4133 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4134 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4135 4136 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4137 4138 The format which is used for the sparse matrix input, is equivalent to a 4139 row-major ordering.. i.e for the following matrix, the input data expected is 4140 as shown 4141 4142 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4143 4144 $ 1 0 0 4145 $ 2 0 3 P0 4146 $ ------- 4147 $ 4 5 6 P1 4148 $ 4149 $ Process0 [P0]: rows_owned=[0,1] 4150 $ i = {0,1,3} [size = nrow+1 = 2+1] 4151 $ j = {0,0,2} [size = 3] 4152 $ v = {1,2,3} [size = 3] 4153 $ 4154 $ Process1 [P1]: rows_owned=[2] 4155 $ i = {0,3} [size = nrow+1 = 1+1] 4156 $ j = {0,1,2} [size = 3] 4157 $ v = {4,5,6} [size = 3] 4158 4159 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4160 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4161 @*/ 4162 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4163 { 4164 PetscFunctionBegin; 4165 PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4166 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4167 PetscCall(MatCreate(comm,mat)); 4168 PetscCall(MatSetSizes(*mat,m,n,M,N)); 4169 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4170 PetscCall(MatSetType(*mat,MATMPIAIJ)); 4171 PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 4172 PetscFunctionReturn(0); 4173 } 4174 4175 /*@ 4176 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4177 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from MatCreateMPIAIJWithArrays() 4178 4179 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4180 4181 Collective 4182 4183 Input Parameters: 4184 + mat - the matrix 4185 . m - number of local rows (Cannot be PETSC_DECIDE) 4186 . n - This value should be the same as the local size used in creating the 4187 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4188 calculated if N is given) For square matrices n is almost always m. 4189 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4190 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4191 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4192 . J - column indices 4193 - v - matrix values 4194 4195 Level: intermediate 4196 4197 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4198 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4199 @*/ 4200 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4201 { 4202 PetscInt nnz,i; 4203 PetscBool nooffprocentries; 4204 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4205 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4206 PetscScalar *ad,*ao; 4207 PetscInt ldi,Iii,md; 4208 const PetscInt *Adi = Ad->i; 4209 PetscInt *ld = Aij->ld; 4210 4211 PetscFunctionBegin; 4212 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4213 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4214 PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4215 PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4216 4217 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4218 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4219 4220 for (i=0; i<m; i++) { 4221 nnz = Ii[i+1]- Ii[i]; 4222 Iii = Ii[i]; 4223 ldi = ld[i]; 4224 md = Adi[i+1]-Adi[i]; 4225 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4226 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4227 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4228 ad += md; 4229 ao += nnz - md; 4230 } 4231 nooffprocentries = mat->nooffprocentries; 4232 mat->nooffprocentries = PETSC_TRUE; 4233 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4234 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4235 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4236 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4237 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4238 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4239 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4240 mat->nooffprocentries = nooffprocentries; 4241 PetscFunctionReturn(0); 4242 } 4243 4244 /*@ 4245 MatUpdateMPIAIJWithArray - updates an MPI AIJ matrix using an array that contains the nonzero values 4246 4247 Collective 4248 4249 Input Parameters: 4250 + mat - the matrix 4251 - v - matrix values, stored by row 4252 4253 Level: intermediate 4254 4255 Notes: 4256 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4257 4258 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4259 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4260 @*/ 4261 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat,const PetscScalar v[]) 4262 { 4263 PetscInt nnz,i,m; 4264 PetscBool nooffprocentries; 4265 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4266 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4267 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)Aij->B->data; 4268 PetscScalar *ad,*ao; 4269 const PetscInt *Adi = Ad->i,*Adj = Ao->i; 4270 PetscInt ldi,Iii,md; 4271 PetscInt *ld = Aij->ld; 4272 4273 PetscFunctionBegin; 4274 m = mat->rmap->n; 4275 4276 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4277 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4278 Iii = 0; 4279 for (i=0; i<m; i++) { 4280 nnz = Adi[i+1]-Adi[i] + Adj[i+1]-Adj[i]; 4281 ldi = ld[i]; 4282 md = Adi[i+1]-Adi[i]; 4283 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4284 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4285 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4286 ad += md; 4287 ao += nnz - md; 4288 Iii += nnz; 4289 } 4290 nooffprocentries = mat->nooffprocentries; 4291 mat->nooffprocentries = PETSC_TRUE; 4292 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4293 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4294 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4295 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4296 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4297 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4298 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4299 mat->nooffprocentries = nooffprocentries; 4300 PetscFunctionReturn(0); 4301 } 4302 4303 /*@C 4304 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4305 (the default parallel PETSc format). For good matrix assembly performance 4306 the user should preallocate the matrix storage by setting the parameters 4307 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4308 performance can be increased by more than a factor of 50. 4309 4310 Collective 4311 4312 Input Parameters: 4313 + comm - MPI communicator 4314 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4315 This value should be the same as the local size used in creating the 4316 y vector for the matrix-vector product y = Ax. 4317 . n - This value should be the same as the local size used in creating the 4318 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4319 calculated if N is given) For square matrices n is almost always m. 4320 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4321 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4322 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4323 (same value is used for all local rows) 4324 . d_nnz - array containing the number of nonzeros in the various rows of the 4325 DIAGONAL portion of the local submatrix (possibly different for each row) 4326 or NULL, if d_nz is used to specify the nonzero structure. 4327 The size of this array is equal to the number of local rows, i.e 'm'. 4328 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4329 submatrix (same value is used for all local rows). 4330 - o_nnz - array containing the number of nonzeros in the various rows of the 4331 OFF-DIAGONAL portion of the local submatrix (possibly different for 4332 each row) or NULL, if o_nz is used to specify the nonzero 4333 structure. The size of this array is equal to the number 4334 of local rows, i.e 'm'. 4335 4336 Output Parameter: 4337 . A - the matrix 4338 4339 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4340 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4341 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4342 4343 Notes: 4344 If the *_nnz parameter is given then the *_nz parameter is ignored 4345 4346 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4347 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4348 storage requirements for this matrix. 4349 4350 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4351 processor than it must be used on all processors that share the object for 4352 that argument. 4353 4354 The user MUST specify either the local or global matrix dimensions 4355 (possibly both). 4356 4357 The parallel matrix is partitioned across processors such that the 4358 first m0 rows belong to process 0, the next m1 rows belong to 4359 process 1, the next m2 rows belong to process 2 etc.. where 4360 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4361 values corresponding to [m x N] submatrix. 4362 4363 The columns are logically partitioned with the n0 columns belonging 4364 to 0th partition, the next n1 columns belonging to the next 4365 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4366 4367 The DIAGONAL portion of the local submatrix on any given processor 4368 is the submatrix corresponding to the rows and columns m,n 4369 corresponding to the given processor. i.e diagonal matrix on 4370 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4371 etc. The remaining portion of the local submatrix [m x (N-n)] 4372 constitute the OFF-DIAGONAL portion. The example below better 4373 illustrates this concept. 4374 4375 For a square global matrix we define each processor's diagonal portion 4376 to be its local rows and the corresponding columns (a square submatrix); 4377 each processor's off-diagonal portion encompasses the remainder of the 4378 local matrix (a rectangular submatrix). 4379 4380 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4381 4382 When calling this routine with a single process communicator, a matrix of 4383 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4384 type of communicator, use the construction mechanism 4385 .vb 4386 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4387 .ve 4388 4389 $ MatCreate(...,&A); 4390 $ MatSetType(A,MATMPIAIJ); 4391 $ MatSetSizes(A, m,n,M,N); 4392 $ MatMPIAIJSetPreallocation(A,...); 4393 4394 By default, this format uses inodes (identical nodes) when possible. 4395 We search for consecutive rows with the same nonzero structure, thereby 4396 reusing matrix information to achieve increased efficiency. 4397 4398 Options Database Keys: 4399 + -mat_no_inode - Do not use inodes 4400 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4401 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4402 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4403 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4404 4405 Example usage: 4406 4407 Consider the following 8x8 matrix with 34 non-zero values, that is 4408 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4409 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4410 as follows 4411 4412 .vb 4413 1 2 0 | 0 3 0 | 0 4 4414 Proc0 0 5 6 | 7 0 0 | 8 0 4415 9 0 10 | 11 0 0 | 12 0 4416 ------------------------------------- 4417 13 0 14 | 15 16 17 | 0 0 4418 Proc1 0 18 0 | 19 20 21 | 0 0 4419 0 0 0 | 22 23 0 | 24 0 4420 ------------------------------------- 4421 Proc2 25 26 27 | 0 0 28 | 29 0 4422 30 0 0 | 31 32 33 | 0 34 4423 .ve 4424 4425 This can be represented as a collection of submatrices as 4426 4427 .vb 4428 A B C 4429 D E F 4430 G H I 4431 .ve 4432 4433 Where the submatrices A,B,C are owned by proc0, D,E,F are 4434 owned by proc1, G,H,I are owned by proc2. 4435 4436 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4437 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4438 The 'M','N' parameters are 8,8, and have the same values on all procs. 4439 4440 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4441 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4442 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4443 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4444 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4445 matrix, ans [DF] as another SeqAIJ matrix. 4446 4447 When d_nz, o_nz parameters are specified, d_nz storage elements are 4448 allocated for every row of the local diagonal submatrix, and o_nz 4449 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4450 One way to choose d_nz and o_nz is to use the max nonzerors per local 4451 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4452 In this case, the values of d_nz,o_nz are 4453 .vb 4454 proc0 : dnz = 2, o_nz = 2 4455 proc1 : dnz = 3, o_nz = 2 4456 proc2 : dnz = 1, o_nz = 4 4457 .ve 4458 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4459 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4460 for proc3. i.e we are using 12+15+10=37 storage locations to store 4461 34 values. 4462 4463 When d_nnz, o_nnz parameters are specified, the storage is specified 4464 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4465 In the above case the values for d_nnz,o_nnz are 4466 .vb 4467 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4468 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4469 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4470 .ve 4471 Here the space allocated is sum of all the above values i.e 34, and 4472 hence pre-allocation is perfect. 4473 4474 Level: intermediate 4475 4476 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4477 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4478 @*/ 4479 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4480 { 4481 PetscMPIInt size; 4482 4483 PetscFunctionBegin; 4484 PetscCall(MatCreate(comm,A)); 4485 PetscCall(MatSetSizes(*A,m,n,M,N)); 4486 PetscCallMPI(MPI_Comm_size(comm,&size)); 4487 if (size > 1) { 4488 PetscCall(MatSetType(*A,MATMPIAIJ)); 4489 PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4490 } else { 4491 PetscCall(MatSetType(*A,MATSEQAIJ)); 4492 PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4493 } 4494 PetscFunctionReturn(0); 4495 } 4496 4497 /*@C 4498 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4499 4500 Not collective 4501 4502 Input Parameter: 4503 . A - The MPIAIJ matrix 4504 4505 Output Parameters: 4506 + Ad - The local diagonal block as a SeqAIJ matrix 4507 . Ao - The local off-diagonal block as a SeqAIJ matrix 4508 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4509 4510 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4511 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4512 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4513 local column numbers to global column numbers in the original matrix. 4514 4515 Level: intermediate 4516 4517 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4518 @*/ 4519 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4520 { 4521 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4522 PetscBool flg; 4523 4524 PetscFunctionBegin; 4525 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 4526 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4527 if (Ad) *Ad = a->A; 4528 if (Ao) *Ao = a->B; 4529 if (colmap) *colmap = a->garray; 4530 PetscFunctionReturn(0); 4531 } 4532 4533 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4534 { 4535 PetscInt m,N,i,rstart,nnz,Ii; 4536 PetscInt *indx; 4537 PetscScalar *values; 4538 MatType rootType; 4539 4540 PetscFunctionBegin; 4541 PetscCall(MatGetSize(inmat,&m,&N)); 4542 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4543 PetscInt *dnz,*onz,sum,bs,cbs; 4544 4545 if (n == PETSC_DECIDE) { 4546 PetscCall(PetscSplitOwnership(comm,&n,&N)); 4547 } 4548 /* Check sum(n) = N */ 4549 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 4550 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4551 4552 PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 4553 rstart -= m; 4554 4555 MatPreallocateBegin(comm,m,n,dnz,onz); 4556 for (i=0; i<m; i++) { 4557 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4558 PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 4559 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4560 } 4561 4562 PetscCall(MatCreate(comm,outmat)); 4563 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4564 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 4565 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 4566 PetscCall(MatGetRootType_Private(inmat,&rootType)); 4567 PetscCall(MatSetType(*outmat,rootType)); 4568 PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 4569 PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4570 MatPreallocateEnd(dnz,onz); 4571 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 4572 } 4573 4574 /* numeric phase */ 4575 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 4576 for (i=0; i<m; i++) { 4577 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4578 Ii = i + rstart; 4579 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 4580 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4581 } 4582 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 4583 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4584 PetscFunctionReturn(0); 4585 } 4586 4587 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4588 { 4589 PetscMPIInt rank; 4590 PetscInt m,N,i,rstart,nnz; 4591 size_t len; 4592 const PetscInt *indx; 4593 PetscViewer out; 4594 char *name; 4595 Mat B; 4596 const PetscScalar *values; 4597 4598 PetscFunctionBegin; 4599 PetscCall(MatGetLocalSize(A,&m,NULL)); 4600 PetscCall(MatGetSize(A,NULL,&N)); 4601 /* Should this be the type of the diagonal block of A? */ 4602 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 4603 PetscCall(MatSetSizes(B,m,N,m,N)); 4604 PetscCall(MatSetBlockSizesFromMats(B,A,A)); 4605 PetscCall(MatSetType(B,MATSEQAIJ)); 4606 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 4607 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4608 for (i=0; i<m; i++) { 4609 PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 4610 PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 4611 PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4612 } 4613 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 4614 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4615 4616 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 4617 PetscCall(PetscStrlen(outfile,&len)); 4618 PetscCall(PetscMalloc1(len+6,&name)); 4619 PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 4620 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 4621 PetscCall(PetscFree(name)); 4622 PetscCall(MatView(B,out)); 4623 PetscCall(PetscViewerDestroy(&out)); 4624 PetscCall(MatDestroy(&B)); 4625 PetscFunctionReturn(0); 4626 } 4627 4628 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4629 { 4630 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4631 4632 PetscFunctionBegin; 4633 if (!merge) PetscFunctionReturn(0); 4634 PetscCall(PetscFree(merge->id_r)); 4635 PetscCall(PetscFree(merge->len_s)); 4636 PetscCall(PetscFree(merge->len_r)); 4637 PetscCall(PetscFree(merge->bi)); 4638 PetscCall(PetscFree(merge->bj)); 4639 PetscCall(PetscFree(merge->buf_ri[0])); 4640 PetscCall(PetscFree(merge->buf_ri)); 4641 PetscCall(PetscFree(merge->buf_rj[0])); 4642 PetscCall(PetscFree(merge->buf_rj)); 4643 PetscCall(PetscFree(merge->coi)); 4644 PetscCall(PetscFree(merge->coj)); 4645 PetscCall(PetscFree(merge->owners_co)); 4646 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4647 PetscCall(PetscFree(merge)); 4648 PetscFunctionReturn(0); 4649 } 4650 4651 #include <../src/mat/utils/freespace.h> 4652 #include <petscbt.h> 4653 4654 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4655 { 4656 MPI_Comm comm; 4657 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4658 PetscMPIInt size,rank,taga,*len_s; 4659 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4660 PetscInt proc,m; 4661 PetscInt **buf_ri,**buf_rj; 4662 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4663 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4664 MPI_Request *s_waits,*r_waits; 4665 MPI_Status *status; 4666 const MatScalar *aa,*a_a; 4667 MatScalar **abuf_r,*ba_i; 4668 Mat_Merge_SeqsToMPI *merge; 4669 PetscContainer container; 4670 4671 PetscFunctionBegin; 4672 PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 4673 PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 4674 4675 PetscCallMPI(MPI_Comm_size(comm,&size)); 4676 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4677 4678 PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 4679 PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4680 PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 4681 PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4682 aa = a_a; 4683 4684 bi = merge->bi; 4685 bj = merge->bj; 4686 buf_ri = merge->buf_ri; 4687 buf_rj = merge->buf_rj; 4688 4689 PetscCall(PetscMalloc1(size,&status)); 4690 owners = merge->rowmap->range; 4691 len_s = merge->len_s; 4692 4693 /* send and recv matrix values */ 4694 /*-----------------------------*/ 4695 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 4696 PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 4697 4698 PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 4699 for (proc=0,k=0; proc<size; proc++) { 4700 if (!len_s[proc]) continue; 4701 i = owners[proc]; 4702 PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 4703 k++; 4704 } 4705 4706 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 4707 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 4708 PetscCall(PetscFree(status)); 4709 4710 PetscCall(PetscFree(s_waits)); 4711 PetscCall(PetscFree(r_waits)); 4712 4713 /* insert mat values of mpimat */ 4714 /*----------------------------*/ 4715 PetscCall(PetscMalloc1(N,&ba_i)); 4716 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4717 4718 for (k=0; k<merge->nrecv; k++) { 4719 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4720 nrows = *(buf_ri_k[k]); 4721 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4722 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4723 } 4724 4725 /* set values of ba */ 4726 m = merge->rowmap->n; 4727 for (i=0; i<m; i++) { 4728 arow = owners[rank] + i; 4729 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4730 bnzi = bi[i+1] - bi[i]; 4731 PetscCall(PetscArrayzero(ba_i,bnzi)); 4732 4733 /* add local non-zero vals of this proc's seqmat into ba */ 4734 anzi = ai[arow+1] - ai[arow]; 4735 aj = a->j + ai[arow]; 4736 aa = a_a + ai[arow]; 4737 nextaj = 0; 4738 for (j=0; nextaj<anzi; j++) { 4739 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4740 ba_i[j] += aa[nextaj++]; 4741 } 4742 } 4743 4744 /* add received vals into ba */ 4745 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4746 /* i-th row */ 4747 if (i == *nextrow[k]) { 4748 anzi = *(nextai[k]+1) - *nextai[k]; 4749 aj = buf_rj[k] + *(nextai[k]); 4750 aa = abuf_r[k] + *(nextai[k]); 4751 nextaj = 0; 4752 for (j=0; nextaj<anzi; j++) { 4753 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4754 ba_i[j] += aa[nextaj++]; 4755 } 4756 } 4757 nextrow[k]++; nextai[k]++; 4758 } 4759 } 4760 PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 4761 } 4762 PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 4763 PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 4764 PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 4765 4766 PetscCall(PetscFree(abuf_r[0])); 4767 PetscCall(PetscFree(abuf_r)); 4768 PetscCall(PetscFree(ba_i)); 4769 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4770 PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 4771 PetscFunctionReturn(0); 4772 } 4773 4774 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4775 { 4776 Mat B_mpi; 4777 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4778 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4779 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4780 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4781 PetscInt len,proc,*dnz,*onz,bs,cbs; 4782 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4783 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4784 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4785 MPI_Status *status; 4786 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4787 PetscBT lnkbt; 4788 Mat_Merge_SeqsToMPI *merge; 4789 PetscContainer container; 4790 4791 PetscFunctionBegin; 4792 PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 4793 4794 /* make sure it is a PETSc comm */ 4795 PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 4796 PetscCallMPI(MPI_Comm_size(comm,&size)); 4797 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4798 4799 PetscCall(PetscNew(&merge)); 4800 PetscCall(PetscMalloc1(size,&status)); 4801 4802 /* determine row ownership */ 4803 /*---------------------------------------------------------*/ 4804 PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 4805 PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 4806 PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 4807 PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 4808 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4809 PetscCall(PetscMalloc1(size,&len_si)); 4810 PetscCall(PetscMalloc1(size,&merge->len_s)); 4811 4812 m = merge->rowmap->n; 4813 owners = merge->rowmap->range; 4814 4815 /* determine the number of messages to send, their lengths */ 4816 /*---------------------------------------------------------*/ 4817 len_s = merge->len_s; 4818 4819 len = 0; /* length of buf_si[] */ 4820 merge->nsend = 0; 4821 for (proc=0; proc<size; proc++) { 4822 len_si[proc] = 0; 4823 if (proc == rank) { 4824 len_s[proc] = 0; 4825 } else { 4826 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4827 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4828 } 4829 if (len_s[proc]) { 4830 merge->nsend++; 4831 nrows = 0; 4832 for (i=owners[proc]; i<owners[proc+1]; i++) { 4833 if (ai[i+1] > ai[i]) nrows++; 4834 } 4835 len_si[proc] = 2*(nrows+1); 4836 len += len_si[proc]; 4837 } 4838 } 4839 4840 /* determine the number and length of messages to receive for ij-structure */ 4841 /*-------------------------------------------------------------------------*/ 4842 PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 4843 PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4844 4845 /* post the Irecv of j-structure */ 4846 /*-------------------------------*/ 4847 PetscCall(PetscCommGetNewTag(comm,&tagj)); 4848 PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 4849 4850 /* post the Isend of j-structure */ 4851 /*--------------------------------*/ 4852 PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 4853 4854 for (proc=0, k=0; proc<size; proc++) { 4855 if (!len_s[proc]) continue; 4856 i = owners[proc]; 4857 PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 4858 k++; 4859 } 4860 4861 /* receives and sends of j-structure are complete */ 4862 /*------------------------------------------------*/ 4863 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 4864 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 4865 4866 /* send and recv i-structure */ 4867 /*---------------------------*/ 4868 PetscCall(PetscCommGetNewTag(comm,&tagi)); 4869 PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 4870 4871 PetscCall(PetscMalloc1(len+1,&buf_s)); 4872 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4873 for (proc=0,k=0; proc<size; proc++) { 4874 if (!len_s[proc]) continue; 4875 /* form outgoing message for i-structure: 4876 buf_si[0]: nrows to be sent 4877 [1:nrows]: row index (global) 4878 [nrows+1:2*nrows+1]: i-structure index 4879 */ 4880 /*-------------------------------------------*/ 4881 nrows = len_si[proc]/2 - 1; 4882 buf_si_i = buf_si + nrows+1; 4883 buf_si[0] = nrows; 4884 buf_si_i[0] = 0; 4885 nrows = 0; 4886 for (i=owners[proc]; i<owners[proc+1]; i++) { 4887 anzi = ai[i+1] - ai[i]; 4888 if (anzi) { 4889 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4890 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4891 nrows++; 4892 } 4893 } 4894 PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 4895 k++; 4896 buf_si += len_si[proc]; 4897 } 4898 4899 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 4900 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 4901 4902 PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 4903 for (i=0; i<merge->nrecv; i++) { 4904 PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 4905 } 4906 4907 PetscCall(PetscFree(len_si)); 4908 PetscCall(PetscFree(len_ri)); 4909 PetscCall(PetscFree(rj_waits)); 4910 PetscCall(PetscFree2(si_waits,sj_waits)); 4911 PetscCall(PetscFree(ri_waits)); 4912 PetscCall(PetscFree(buf_s)); 4913 PetscCall(PetscFree(status)); 4914 4915 /* compute a local seq matrix in each processor */ 4916 /*----------------------------------------------*/ 4917 /* allocate bi array and free space for accumulating nonzero column info */ 4918 PetscCall(PetscMalloc1(m+1,&bi)); 4919 bi[0] = 0; 4920 4921 /* create and initialize a linked list */ 4922 nlnk = N+1; 4923 PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 4924 4925 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4926 len = ai[owners[rank+1]] - ai[owners[rank]]; 4927 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 4928 4929 current_space = free_space; 4930 4931 /* determine symbolic info for each local row */ 4932 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4933 4934 for (k=0; k<merge->nrecv; k++) { 4935 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4936 nrows = *buf_ri_k[k]; 4937 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4938 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4939 } 4940 4941 MatPreallocateBegin(comm,m,n,dnz,onz); 4942 len = 0; 4943 for (i=0; i<m; i++) { 4944 bnzi = 0; 4945 /* add local non-zero cols of this proc's seqmat into lnk */ 4946 arow = owners[rank] + i; 4947 anzi = ai[arow+1] - ai[arow]; 4948 aj = a->j + ai[arow]; 4949 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4950 bnzi += nlnk; 4951 /* add received col data into lnk */ 4952 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4953 if (i == *nextrow[k]) { /* i-th row */ 4954 anzi = *(nextai[k]+1) - *nextai[k]; 4955 aj = buf_rj[k] + *nextai[k]; 4956 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4957 bnzi += nlnk; 4958 nextrow[k]++; nextai[k]++; 4959 } 4960 } 4961 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4962 4963 /* if free space is not available, make more free space */ 4964 if (current_space->local_remaining<bnzi) { 4965 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 4966 nspacedouble++; 4967 } 4968 /* copy data into free space, then initialize lnk */ 4969 PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 4970 PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4971 4972 current_space->array += bnzi; 4973 current_space->local_used += bnzi; 4974 current_space->local_remaining -= bnzi; 4975 4976 bi[i+1] = bi[i] + bnzi; 4977 } 4978 4979 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4980 4981 PetscCall(PetscMalloc1(bi[m]+1,&bj)); 4982 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4983 PetscCall(PetscLLDestroy(lnk,lnkbt)); 4984 4985 /* create symbolic parallel matrix B_mpi */ 4986 /*---------------------------------------*/ 4987 PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 4988 PetscCall(MatCreate(comm,&B_mpi)); 4989 if (n==PETSC_DECIDE) { 4990 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 4991 } else { 4992 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4993 } 4994 PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 4995 PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 4996 PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 4997 MatPreallocateEnd(dnz,onz); 4998 PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 4999 5000 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5001 B_mpi->assembled = PETSC_FALSE; 5002 merge->bi = bi; 5003 merge->bj = bj; 5004 merge->buf_ri = buf_ri; 5005 merge->buf_rj = buf_rj; 5006 merge->coi = NULL; 5007 merge->coj = NULL; 5008 merge->owners_co = NULL; 5009 5010 PetscCall(PetscCommDestroy(&comm)); 5011 5012 /* attach the supporting struct to B_mpi for reuse */ 5013 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 5014 PetscCall(PetscContainerSetPointer(container,merge)); 5015 PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 5016 PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 5017 PetscCall(PetscContainerDestroy(&container)); 5018 *mpimat = B_mpi; 5019 5020 PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 5021 PetscFunctionReturn(0); 5022 } 5023 5024 /*@C 5025 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5026 matrices from each processor 5027 5028 Collective 5029 5030 Input Parameters: 5031 + comm - the communicators the parallel matrix will live on 5032 . seqmat - the input sequential matrices 5033 . m - number of local rows (or PETSC_DECIDE) 5034 . n - number of local columns (or PETSC_DECIDE) 5035 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5036 5037 Output Parameter: 5038 . mpimat - the parallel matrix generated 5039 5040 Level: advanced 5041 5042 Notes: 5043 The dimensions of the sequential matrix in each processor MUST be the same. 5044 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5045 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5046 @*/ 5047 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5048 { 5049 PetscMPIInt size; 5050 5051 PetscFunctionBegin; 5052 PetscCallMPI(MPI_Comm_size(comm,&size)); 5053 if (size == 1) { 5054 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 5055 if (scall == MAT_INITIAL_MATRIX) { 5056 PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 5057 } else { 5058 PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 5059 } 5060 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 5061 PetscFunctionReturn(0); 5062 } 5063 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 5064 if (scall == MAT_INITIAL_MATRIX) { 5065 PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 5066 } 5067 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 5068 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 5069 PetscFunctionReturn(0); 5070 } 5071 5072 /*@ 5073 MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5074 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5075 with MatGetSize() 5076 5077 Not Collective 5078 5079 Input Parameters: 5080 + A - the matrix 5081 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5082 5083 Output Parameter: 5084 . A_loc - the local sequential matrix generated 5085 5086 Level: developer 5087 5088 Notes: 5089 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5090 5091 Destroy the matrix with MatDestroy() 5092 5093 .seealso: MatMPIAIJGetLocalMat() 5094 5095 @*/ 5096 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc) 5097 { 5098 PetscBool mpi; 5099 5100 PetscFunctionBegin; 5101 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi)); 5102 if (mpi) { 5103 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc)); 5104 } else { 5105 *A_loc = A; 5106 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5107 } 5108 PetscFunctionReturn(0); 5109 } 5110 5111 /*@ 5112 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5113 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5114 with MatGetSize() 5115 5116 Not Collective 5117 5118 Input Parameters: 5119 + A - the matrix 5120 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5121 5122 Output Parameter: 5123 . A_loc - the local sequential matrix generated 5124 5125 Level: developer 5126 5127 Notes: 5128 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5129 5130 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5131 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5132 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5133 modify the values of the returned A_loc. 5134 5135 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5136 @*/ 5137 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5138 { 5139 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5140 Mat_SeqAIJ *mat,*a,*b; 5141 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5142 const PetscScalar *aa,*ba,*aav,*bav; 5143 PetscScalar *ca,*cam; 5144 PetscMPIInt size; 5145 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5146 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5147 PetscBool match; 5148 5149 PetscFunctionBegin; 5150 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 5151 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5152 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5153 if (size == 1) { 5154 if (scall == MAT_INITIAL_MATRIX) { 5155 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5156 *A_loc = mpimat->A; 5157 } else if (scall == MAT_REUSE_MATRIX) { 5158 PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 5159 } 5160 PetscFunctionReturn(0); 5161 } 5162 5163 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5164 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5165 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5166 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5167 PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 5168 PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5169 aa = aav; 5170 ba = bav; 5171 if (scall == MAT_INITIAL_MATRIX) { 5172 PetscCall(PetscMalloc1(1+am,&ci)); 5173 ci[0] = 0; 5174 for (i=0; i<am; i++) { 5175 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5176 } 5177 PetscCall(PetscMalloc1(1+ci[am],&cj)); 5178 PetscCall(PetscMalloc1(1+ci[am],&ca)); 5179 k = 0; 5180 for (i=0; i<am; i++) { 5181 ncols_o = bi[i+1] - bi[i]; 5182 ncols_d = ai[i+1] - ai[i]; 5183 /* off-diagonal portion of A */ 5184 for (jo=0; jo<ncols_o; jo++) { 5185 col = cmap[*bj]; 5186 if (col >= cstart) break; 5187 cj[k] = col; bj++; 5188 ca[k++] = *ba++; 5189 } 5190 /* diagonal portion of A */ 5191 for (j=0; j<ncols_d; j++) { 5192 cj[k] = cstart + *aj++; 5193 ca[k++] = *aa++; 5194 } 5195 /* off-diagonal portion of A */ 5196 for (j=jo; j<ncols_o; j++) { 5197 cj[k] = cmap[*bj++]; 5198 ca[k++] = *ba++; 5199 } 5200 } 5201 /* put together the new matrix */ 5202 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5203 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5204 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5205 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5206 mat->free_a = PETSC_TRUE; 5207 mat->free_ij = PETSC_TRUE; 5208 mat->nonew = 0; 5209 } else if (scall == MAT_REUSE_MATRIX) { 5210 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5211 ci = mat->i; 5212 cj = mat->j; 5213 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 5214 for (i=0; i<am; i++) { 5215 /* off-diagonal portion of A */ 5216 ncols_o = bi[i+1] - bi[i]; 5217 for (jo=0; jo<ncols_o; jo++) { 5218 col = cmap[*bj]; 5219 if (col >= cstart) break; 5220 *cam++ = *ba++; bj++; 5221 } 5222 /* diagonal portion of A */ 5223 ncols_d = ai[i+1] - ai[i]; 5224 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5225 /* off-diagonal portion of A */ 5226 for (j=jo; j<ncols_o; j++) { 5227 *cam++ = *ba++; bj++; 5228 } 5229 } 5230 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 5231 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5232 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 5233 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 5234 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5235 PetscFunctionReturn(0); 5236 } 5237 5238 /*@ 5239 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5240 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5241 5242 Not Collective 5243 5244 Input Parameters: 5245 + A - the matrix 5246 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5247 5248 Output Parameters: 5249 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5250 - A_loc - the local sequential matrix generated 5251 5252 Level: developer 5253 5254 Notes: 5255 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5256 5257 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5258 5259 @*/ 5260 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5261 { 5262 Mat Ao,Ad; 5263 const PetscInt *cmap; 5264 PetscMPIInt size; 5265 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5266 5267 PetscFunctionBegin; 5268 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 5269 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5270 if (size == 1) { 5271 if (scall == MAT_INITIAL_MATRIX) { 5272 PetscCall(PetscObjectReference((PetscObject)Ad)); 5273 *A_loc = Ad; 5274 } else if (scall == MAT_REUSE_MATRIX) { 5275 PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5276 } 5277 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5278 PetscFunctionReturn(0); 5279 } 5280 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 5281 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5282 if (f) { 5283 PetscCall((*f)(A,scall,glob,A_loc)); 5284 } else { 5285 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5286 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5287 Mat_SeqAIJ *c; 5288 PetscInt *ai = a->i, *aj = a->j; 5289 PetscInt *bi = b->i, *bj = b->j; 5290 PetscInt *ci,*cj; 5291 const PetscScalar *aa,*ba; 5292 PetscScalar *ca; 5293 PetscInt i,j,am,dn,on; 5294 5295 PetscCall(MatGetLocalSize(Ad,&am,&dn)); 5296 PetscCall(MatGetLocalSize(Ao,NULL,&on)); 5297 PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 5298 PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5299 if (scall == MAT_INITIAL_MATRIX) { 5300 PetscInt k; 5301 PetscCall(PetscMalloc1(1+am,&ci)); 5302 PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 5303 PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5304 ci[0] = 0; 5305 for (i=0,k=0; i<am; i++) { 5306 const PetscInt ncols_o = bi[i+1] - bi[i]; 5307 const PetscInt ncols_d = ai[i+1] - ai[i]; 5308 ci[i+1] = ci[i] + ncols_o + ncols_d; 5309 /* diagonal portion of A */ 5310 for (j=0; j<ncols_d; j++,k++) { 5311 cj[k] = *aj++; 5312 ca[k] = *aa++; 5313 } 5314 /* off-diagonal portion of A */ 5315 for (j=0; j<ncols_o; j++,k++) { 5316 cj[k] = dn + *bj++; 5317 ca[k] = *ba++; 5318 } 5319 } 5320 /* put together the new matrix */ 5321 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5322 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5323 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5324 c = (Mat_SeqAIJ*)(*A_loc)->data; 5325 c->free_a = PETSC_TRUE; 5326 c->free_ij = PETSC_TRUE; 5327 c->nonew = 0; 5328 PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5329 } else if (scall == MAT_REUSE_MATRIX) { 5330 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5331 for (i=0; i<am; i++) { 5332 const PetscInt ncols_d = ai[i+1] - ai[i]; 5333 const PetscInt ncols_o = bi[i+1] - bi[i]; 5334 /* diagonal portion of A */ 5335 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5336 /* off-diagonal portion of A */ 5337 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5338 } 5339 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 5340 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5341 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 5342 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5343 if (glob) { 5344 PetscInt cst, *gidx; 5345 5346 PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 5347 PetscCall(PetscMalloc1(dn+on,&gidx)); 5348 for (i=0; i<dn; i++) gidx[i] = cst + i; 5349 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5350 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5351 } 5352 } 5353 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5354 PetscFunctionReturn(0); 5355 } 5356 5357 /*@C 5358 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5359 5360 Not Collective 5361 5362 Input Parameters: 5363 + A - the matrix 5364 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5365 - row, col - index sets of rows and columns to extract (or NULL) 5366 5367 Output Parameter: 5368 . A_loc - the local sequential matrix generated 5369 5370 Level: developer 5371 5372 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5373 5374 @*/ 5375 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5376 { 5377 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5378 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5379 IS isrowa,iscola; 5380 Mat *aloc; 5381 PetscBool match; 5382 5383 PetscFunctionBegin; 5384 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 5385 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5386 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 5387 if (!row) { 5388 start = A->rmap->rstart; end = A->rmap->rend; 5389 PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 5390 } else { 5391 isrowa = *row; 5392 } 5393 if (!col) { 5394 start = A->cmap->rstart; 5395 cmap = a->garray; 5396 nzA = a->A->cmap->n; 5397 nzB = a->B->cmap->n; 5398 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5399 ncols = 0; 5400 for (i=0; i<nzB; i++) { 5401 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5402 else break; 5403 } 5404 imark = i; 5405 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5406 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5407 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 5408 } else { 5409 iscola = *col; 5410 } 5411 if (scall != MAT_INITIAL_MATRIX) { 5412 PetscCall(PetscMalloc1(1,&aloc)); 5413 aloc[0] = *A_loc; 5414 } 5415 PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5416 if (!col) { /* attach global id of condensed columns */ 5417 PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5418 } 5419 *A_loc = aloc[0]; 5420 PetscCall(PetscFree(aloc)); 5421 if (!row) { 5422 PetscCall(ISDestroy(&isrowa)); 5423 } 5424 if (!col) { 5425 PetscCall(ISDestroy(&iscola)); 5426 } 5427 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 5428 PetscFunctionReturn(0); 5429 } 5430 5431 /* 5432 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5433 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5434 * on a global size. 5435 * */ 5436 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5437 { 5438 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5439 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5440 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5441 PetscMPIInt owner; 5442 PetscSFNode *iremote,*oiremote; 5443 const PetscInt *lrowindices; 5444 PetscSF sf,osf; 5445 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5446 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5447 MPI_Comm comm; 5448 ISLocalToGlobalMapping mapping; 5449 const PetscScalar *pd_a,*po_a; 5450 5451 PetscFunctionBegin; 5452 PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 5453 /* plocalsize is the number of roots 5454 * nrows is the number of leaves 5455 * */ 5456 PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 5457 PetscCall(ISGetLocalSize(rows,&nrows)); 5458 PetscCall(PetscCalloc1(nrows,&iremote)); 5459 PetscCall(ISGetIndices(rows,&lrowindices)); 5460 for (i=0;i<nrows;i++) { 5461 /* Find a remote index and an owner for a row 5462 * The row could be local or remote 5463 * */ 5464 owner = 0; 5465 lidx = 0; 5466 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 5467 iremote[i].index = lidx; 5468 iremote[i].rank = owner; 5469 } 5470 /* Create SF to communicate how many nonzero columns for each row */ 5471 PetscCall(PetscSFCreate(comm,&sf)); 5472 /* SF will figure out the number of nonzero colunms for each row, and their 5473 * offsets 5474 * */ 5475 PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5476 PetscCall(PetscSFSetFromOptions(sf)); 5477 PetscCall(PetscSFSetUp(sf)); 5478 5479 PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 5480 PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 5481 PetscCall(PetscCalloc1(nrows,&pnnz)); 5482 roffsets[0] = 0; 5483 roffsets[1] = 0; 5484 for (i=0;i<plocalsize;i++) { 5485 /* diag */ 5486 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5487 /* off diag */ 5488 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5489 /* compute offsets so that we relative location for each row */ 5490 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5491 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5492 } 5493 PetscCall(PetscCalloc1(2*nrows,&nlcols)); 5494 PetscCall(PetscCalloc1(2*nrows,&loffsets)); 5495 /* 'r' means root, and 'l' means leaf */ 5496 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5497 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5498 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5499 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5500 PetscCall(PetscSFDestroy(&sf)); 5501 PetscCall(PetscFree(roffsets)); 5502 PetscCall(PetscFree(nrcols)); 5503 dntotalcols = 0; 5504 ontotalcols = 0; 5505 ncol = 0; 5506 for (i=0;i<nrows;i++) { 5507 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5508 ncol = PetscMax(pnnz[i],ncol); 5509 /* diag */ 5510 dntotalcols += nlcols[i*2+0]; 5511 /* off diag */ 5512 ontotalcols += nlcols[i*2+1]; 5513 } 5514 /* We do not need to figure the right number of columns 5515 * since all the calculations will be done by going through the raw data 5516 * */ 5517 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 5518 PetscCall(MatSetUp(*P_oth)); 5519 PetscCall(PetscFree(pnnz)); 5520 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5521 /* diag */ 5522 PetscCall(PetscCalloc1(dntotalcols,&iremote)); 5523 /* off diag */ 5524 PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 5525 /* diag */ 5526 PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 5527 /* off diag */ 5528 PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 5529 dntotalcols = 0; 5530 ontotalcols = 0; 5531 ntotalcols = 0; 5532 for (i=0;i<nrows;i++) { 5533 owner = 0; 5534 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 5535 /* Set iremote for diag matrix */ 5536 for (j=0;j<nlcols[i*2+0];j++) { 5537 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5538 iremote[dntotalcols].rank = owner; 5539 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5540 ilocal[dntotalcols++] = ntotalcols++; 5541 } 5542 /* off diag */ 5543 for (j=0;j<nlcols[i*2+1];j++) { 5544 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5545 oiremote[ontotalcols].rank = owner; 5546 oilocal[ontotalcols++] = ntotalcols++; 5547 } 5548 } 5549 PetscCall(ISRestoreIndices(rows,&lrowindices)); 5550 PetscCall(PetscFree(loffsets)); 5551 PetscCall(PetscFree(nlcols)); 5552 PetscCall(PetscSFCreate(comm,&sf)); 5553 /* P serves as roots and P_oth is leaves 5554 * Diag matrix 5555 * */ 5556 PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5557 PetscCall(PetscSFSetFromOptions(sf)); 5558 PetscCall(PetscSFSetUp(sf)); 5559 5560 PetscCall(PetscSFCreate(comm,&osf)); 5561 /* Off diag */ 5562 PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 5563 PetscCall(PetscSFSetFromOptions(osf)); 5564 PetscCall(PetscSFSetUp(osf)); 5565 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5566 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5567 /* We operate on the matrix internal data for saving memory */ 5568 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5569 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5570 PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 5571 /* Convert to global indices for diag matrix */ 5572 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5573 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5574 /* We want P_oth store global indices */ 5575 PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 5576 /* Use memory scalable approach */ 5577 PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 5578 PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 5579 PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5580 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5581 /* Convert back to local indices */ 5582 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5583 PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5584 nout = 0; 5585 PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 5586 PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5587 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5588 /* Exchange values */ 5589 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5590 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5591 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5592 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5593 /* Stop PETSc from shrinking memory */ 5594 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5595 PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 5596 PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 5597 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5598 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 5599 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 5600 PetscCall(PetscSFDestroy(&sf)); 5601 PetscCall(PetscSFDestroy(&osf)); 5602 PetscFunctionReturn(0); 5603 } 5604 5605 /* 5606 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5607 * This supports MPIAIJ and MAIJ 5608 * */ 5609 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5610 { 5611 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5612 Mat_SeqAIJ *p_oth; 5613 IS rows,map; 5614 PetscHMapI hamp; 5615 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5616 MPI_Comm comm; 5617 PetscSF sf,osf; 5618 PetscBool has; 5619 5620 PetscFunctionBegin; 5621 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5622 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 5623 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5624 * and then create a submatrix (that often is an overlapping matrix) 5625 * */ 5626 if (reuse == MAT_INITIAL_MATRIX) { 5627 /* Use a hash table to figure out unique keys */ 5628 PetscCall(PetscHMapICreate(&hamp)); 5629 PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 5630 PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5631 count = 0; 5632 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5633 for (i=0;i<a->B->cmap->n;i++) { 5634 key = a->garray[i]/dof; 5635 PetscCall(PetscHMapIHas(hamp,key,&has)); 5636 if (!has) { 5637 mapping[i] = count; 5638 PetscCall(PetscHMapISet(hamp,key,count++)); 5639 } else { 5640 /* Current 'i' has the same value the previous step */ 5641 mapping[i] = count-1; 5642 } 5643 } 5644 PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 5645 PetscCall(PetscHMapIGetSize(hamp,&htsize)); 5646 PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5647 PetscCall(PetscCalloc1(htsize,&rowindices)); 5648 off = 0; 5649 PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 5650 PetscCall(PetscHMapIDestroy(&hamp)); 5651 PetscCall(PetscSortInt(htsize,rowindices)); 5652 PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 5653 /* In case, the matrix was already created but users want to recreate the matrix */ 5654 PetscCall(MatDestroy(P_oth)); 5655 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 5656 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 5657 PetscCall(ISDestroy(&map)); 5658 PetscCall(ISDestroy(&rows)); 5659 } else if (reuse == MAT_REUSE_MATRIX) { 5660 /* If matrix was already created, we simply update values using SF objects 5661 * that as attached to the matrix ealier. 5662 */ 5663 const PetscScalar *pd_a,*po_a; 5664 5665 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 5666 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 5667 PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5668 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5669 /* Update values in place */ 5670 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5671 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5672 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5673 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5674 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5675 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5676 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5677 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5678 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5679 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 5680 PetscFunctionReturn(0); 5681 } 5682 5683 /*@C 5684 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5685 5686 Collective on Mat 5687 5688 Input Parameters: 5689 + A - the first matrix in mpiaij format 5690 . B - the second matrix in mpiaij format 5691 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5692 5693 Output Parameters: 5694 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5695 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5696 - B_seq - the sequential matrix generated 5697 5698 Level: developer 5699 5700 @*/ 5701 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5702 { 5703 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5704 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5705 IS isrowb,iscolb; 5706 Mat *bseq=NULL; 5707 5708 PetscFunctionBegin; 5709 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5710 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5711 } 5712 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 5713 5714 if (scall == MAT_INITIAL_MATRIX) { 5715 start = A->cmap->rstart; 5716 cmap = a->garray; 5717 nzA = a->A->cmap->n; 5718 nzB = a->B->cmap->n; 5719 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5720 ncols = 0; 5721 for (i=0; i<nzB; i++) { /* row < local row index */ 5722 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5723 else break; 5724 } 5725 imark = i; 5726 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5727 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5728 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 5729 PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 5730 } else { 5731 PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5732 isrowb = *rowb; iscolb = *colb; 5733 PetscCall(PetscMalloc1(1,&bseq)); 5734 bseq[0] = *B_seq; 5735 } 5736 PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 5737 *B_seq = bseq[0]; 5738 PetscCall(PetscFree(bseq)); 5739 if (!rowb) { 5740 PetscCall(ISDestroy(&isrowb)); 5741 } else { 5742 *rowb = isrowb; 5743 } 5744 if (!colb) { 5745 PetscCall(ISDestroy(&iscolb)); 5746 } else { 5747 *colb = iscolb; 5748 } 5749 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 5750 PetscFunctionReturn(0); 5751 } 5752 5753 /* 5754 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5755 of the OFF-DIAGONAL portion of local A 5756 5757 Collective on Mat 5758 5759 Input Parameters: 5760 + A,B - the matrices in mpiaij format 5761 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5762 5763 Output Parameter: 5764 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5765 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5766 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5767 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5768 5769 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5770 for this matrix. This is not desirable.. 5771 5772 Level: developer 5773 5774 */ 5775 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5776 { 5777 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5778 Mat_SeqAIJ *b_oth; 5779 VecScatter ctx; 5780 MPI_Comm comm; 5781 const PetscMPIInt *rprocs,*sprocs; 5782 const PetscInt *srow,*rstarts,*sstarts; 5783 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5784 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5785 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5786 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5787 PetscMPIInt size,tag,rank,nreqs; 5788 5789 PetscFunctionBegin; 5790 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5791 PetscCallMPI(MPI_Comm_size(comm,&size)); 5792 5793 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5794 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5795 } 5796 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 5797 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5798 5799 if (size == 1) { 5800 startsj_s = NULL; 5801 bufa_ptr = NULL; 5802 *B_oth = NULL; 5803 PetscFunctionReturn(0); 5804 } 5805 5806 ctx = a->Mvctx; 5807 tag = ((PetscObject)ctx)->tag; 5808 5809 PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5810 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5811 PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 5812 PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 5813 PetscCall(PetscMalloc1(nreqs,&reqs)); 5814 rwaits = reqs; 5815 swaits = reqs + nrecvs; 5816 5817 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5818 if (scall == MAT_INITIAL_MATRIX) { 5819 /* i-array */ 5820 /*---------*/ 5821 /* post receives */ 5822 if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5823 for (i=0; i<nrecvs; i++) { 5824 rowlen = rvalues + rstarts[i]*rbs; 5825 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5826 PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5827 } 5828 5829 /* pack the outgoing message */ 5830 PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 5831 5832 sstartsj[0] = 0; 5833 rstartsj[0] = 0; 5834 len = 0; /* total length of j or a array to be sent */ 5835 if (nsends) { 5836 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5837 PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 5838 } 5839 for (i=0; i<nsends; i++) { 5840 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5841 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5842 for (j=0; j<nrows; j++) { 5843 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5844 for (l=0; l<sbs; l++) { 5845 PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 5846 5847 rowlen[j*sbs+l] = ncols; 5848 5849 len += ncols; 5850 PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5851 } 5852 k++; 5853 } 5854 PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5855 5856 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5857 } 5858 /* recvs and sends of i-array are completed */ 5859 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5860 PetscCall(PetscFree(svalues)); 5861 5862 /* allocate buffers for sending j and a arrays */ 5863 PetscCall(PetscMalloc1(len+1,&bufj)); 5864 PetscCall(PetscMalloc1(len+1,&bufa)); 5865 5866 /* create i-array of B_oth */ 5867 PetscCall(PetscMalloc1(aBn+2,&b_othi)); 5868 5869 b_othi[0] = 0; 5870 len = 0; /* total length of j or a array to be received */ 5871 k = 0; 5872 for (i=0; i<nrecvs; i++) { 5873 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5874 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5875 for (j=0; j<nrows; j++) { 5876 b_othi[k+1] = b_othi[k] + rowlen[j]; 5877 PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5878 k++; 5879 } 5880 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5881 } 5882 PetscCall(PetscFree(rvalues)); 5883 5884 /* allocate space for j and a arrays of B_oth */ 5885 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 5886 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5887 5888 /* j-array */ 5889 /*---------*/ 5890 /* post receives of j-array */ 5891 for (i=0; i<nrecvs; i++) { 5892 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5893 PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5894 } 5895 5896 /* pack the outgoing message j-array */ 5897 if (nsends) k = sstarts[0]; 5898 for (i=0; i<nsends; i++) { 5899 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5900 bufJ = bufj+sstartsj[i]; 5901 for (j=0; j<nrows; j++) { 5902 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5903 for (ll=0; ll<sbs; ll++) { 5904 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5905 for (l=0; l<ncols; l++) { 5906 *bufJ++ = cols[l]; 5907 } 5908 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5909 } 5910 } 5911 PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5912 } 5913 5914 /* recvs and sends of j-array are completed */ 5915 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5916 } else if (scall == MAT_REUSE_MATRIX) { 5917 sstartsj = *startsj_s; 5918 rstartsj = *startsj_r; 5919 bufa = *bufa_ptr; 5920 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5921 PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5922 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5923 5924 /* a-array */ 5925 /*---------*/ 5926 /* post receives of a-array */ 5927 for (i=0; i<nrecvs; i++) { 5928 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5929 PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 5930 } 5931 5932 /* pack the outgoing message a-array */ 5933 if (nsends) k = sstarts[0]; 5934 for (i=0; i<nsends; i++) { 5935 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5936 bufA = bufa+sstartsj[i]; 5937 for (j=0; j<nrows; j++) { 5938 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5939 for (ll=0; ll<sbs; ll++) { 5940 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5941 for (l=0; l<ncols; l++) { 5942 *bufA++ = vals[l]; 5943 } 5944 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5945 } 5946 } 5947 PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5948 } 5949 /* recvs and sends of a-array are completed */ 5950 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5951 PetscCall(PetscFree(reqs)); 5952 5953 if (scall == MAT_INITIAL_MATRIX) { 5954 /* put together the new matrix */ 5955 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5956 5957 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5958 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5959 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5960 b_oth->free_a = PETSC_TRUE; 5961 b_oth->free_ij = PETSC_TRUE; 5962 b_oth->nonew = 0; 5963 5964 PetscCall(PetscFree(bufj)); 5965 if (!startsj_s || !bufa_ptr) { 5966 PetscCall(PetscFree2(sstartsj,rstartsj)); 5967 PetscCall(PetscFree(bufa_ptr)); 5968 } else { 5969 *startsj_s = sstartsj; 5970 *startsj_r = rstartsj; 5971 *bufa_ptr = bufa; 5972 } 5973 } else if (scall == MAT_REUSE_MATRIX) { 5974 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5975 } 5976 5977 PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5978 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 5979 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5980 PetscFunctionReturn(0); 5981 } 5982 5983 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5984 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5985 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5986 #if defined(PETSC_HAVE_MKL_SPARSE) 5987 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5988 #endif 5989 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5990 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5991 #if defined(PETSC_HAVE_ELEMENTAL) 5992 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5993 #endif 5994 #if defined(PETSC_HAVE_SCALAPACK) 5995 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5996 #endif 5997 #if defined(PETSC_HAVE_HYPRE) 5998 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5999 #endif 6000 #if defined(PETSC_HAVE_CUDA) 6001 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 6002 #endif 6003 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6004 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 6005 #endif 6006 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 6007 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 6008 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6009 6010 /* 6011 Computes (B'*A')' since computing B*A directly is untenable 6012 6013 n p p 6014 [ ] [ ] [ ] 6015 m [ A ] * n [ B ] = m [ C ] 6016 [ ] [ ] [ ] 6017 6018 */ 6019 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 6020 { 6021 Mat At,Bt,Ct; 6022 6023 PetscFunctionBegin; 6024 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 6025 PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 6026 PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 6027 PetscCall(MatDestroy(&At)); 6028 PetscCall(MatDestroy(&Bt)); 6029 PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 6030 PetscCall(MatDestroy(&Ct)); 6031 PetscFunctionReturn(0); 6032 } 6033 6034 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 6035 { 6036 PetscBool cisdense; 6037 6038 PetscFunctionBegin; 6039 PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 6040 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 6041 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 6042 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 6043 if (!cisdense) { 6044 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 6045 } 6046 PetscCall(MatSetUp(C)); 6047 6048 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6049 PetscFunctionReturn(0); 6050 } 6051 6052 /* ----------------------------------------------------------------*/ 6053 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6054 { 6055 Mat_Product *product = C->product; 6056 Mat A = product->A,B=product->B; 6057 6058 PetscFunctionBegin; 6059 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6060 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6061 6062 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6063 C->ops->productsymbolic = MatProductSymbolic_AB; 6064 PetscFunctionReturn(0); 6065 } 6066 6067 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6068 { 6069 Mat_Product *product = C->product; 6070 6071 PetscFunctionBegin; 6072 if (product->type == MATPRODUCT_AB) { 6073 PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6074 } 6075 PetscFunctionReturn(0); 6076 } 6077 6078 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6079 6080 Input Parameters: 6081 6082 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6083 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6084 6085 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6086 6087 For Set1, j1[] contains column indices of the nonzeros. 6088 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6089 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6090 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6091 6092 Similar for Set2. 6093 6094 This routine merges the two sets of nonzeros row by row and removes repeats. 6095 6096 Output Parameters: (memory is allocated by the caller) 6097 6098 i[],j[]: the CSR of the merged matrix, which has m rows. 6099 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6100 imap2[]: similar to imap1[], but for Set2. 6101 Note we order nonzeros row-by-row and from left to right. 6102 */ 6103 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 6104 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 6105 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 6106 { 6107 PetscInt r,m; /* Row index of mat */ 6108 PetscCount t,t1,t2,b1,e1,b2,e2; 6109 6110 PetscFunctionBegin; 6111 PetscCall(MatGetLocalSize(mat,&m,NULL)); 6112 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6113 i[0] = 0; 6114 for (r=0; r<m; r++) { /* Do row by row merging */ 6115 b1 = rowBegin1[r]; 6116 e1 = rowEnd1[r]; 6117 b2 = rowBegin2[r]; 6118 e2 = rowEnd2[r]; 6119 while (b1 < e1 && b2 < e2) { 6120 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6121 j[t] = j1[b1]; 6122 imap1[t1] = t; 6123 imap2[t2] = t; 6124 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6125 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6126 t1++; t2++; t++; 6127 } else if (j1[b1] < j2[b2]) { 6128 j[t] = j1[b1]; 6129 imap1[t1] = t; 6130 b1 += jmap1[t1+1] - jmap1[t1]; 6131 t1++; t++; 6132 } else { 6133 j[t] = j2[b2]; 6134 imap2[t2] = t; 6135 b2 += jmap2[t2+1] - jmap2[t2]; 6136 t2++; t++; 6137 } 6138 } 6139 /* Merge the remaining in either j1[] or j2[] */ 6140 while (b1 < e1) { 6141 j[t] = j1[b1]; 6142 imap1[t1] = t; 6143 b1 += jmap1[t1+1] - jmap1[t1]; 6144 t1++; t++; 6145 } 6146 while (b2 < e2) { 6147 j[t] = j2[b2]; 6148 imap2[t2] = t; 6149 b2 += jmap2[t2+1] - jmap2[t2]; 6150 t2++; t++; 6151 } 6152 i[r+1] = t; 6153 } 6154 PetscFunctionReturn(0); 6155 } 6156 6157 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6158 6159 Input Parameters: 6160 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6161 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6162 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6163 6164 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6165 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6166 6167 Output Parameters: 6168 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6169 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6170 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6171 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6172 6173 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6174 Atot: number of entries belonging to the diagonal block. 6175 Annz: number of unique nonzeros belonging to the diagonal block. 6176 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6177 repeats (i.e., same 'i,j' pair). 6178 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6179 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6180 6181 Atot: number of entries belonging to the diagonal block 6182 Annz: number of unique nonzeros belonging to the diagonal block. 6183 6184 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6185 6186 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6187 */ 6188 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6189 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6190 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6191 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6192 { 6193 PetscInt cstart,cend,rstart,rend,row,col; 6194 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6195 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6196 PetscCount k,m,p,q,r,s,mid; 6197 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6198 6199 PetscFunctionBegin; 6200 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6201 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6202 m = rend - rstart; 6203 6204 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6205 6206 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6207 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6208 */ 6209 while (k<n) { 6210 row = i[k]; 6211 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6212 for (s=k; s<n; s++) if (i[s] != row) break; 6213 for (p=k; p<s; p++) { 6214 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6215 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6216 } 6217 PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6218 PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6219 rowBegin[row-rstart] = k; 6220 rowMid[row-rstart] = mid; 6221 rowEnd[row-rstart] = s; 6222 6223 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6224 Atot += mid - k; 6225 Btot += s - mid; 6226 6227 /* Count unique nonzeros of this diag/offdiag row */ 6228 for (p=k; p<mid;) { 6229 col = j[p]; 6230 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6231 Annz++; 6232 } 6233 6234 for (p=mid; p<s;) { 6235 col = j[p]; 6236 do {p++;} while (p<s && j[p] == col); 6237 Bnnz++; 6238 } 6239 k = s; 6240 } 6241 6242 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6243 PetscCall(PetscMalloc1(Atot,&Aperm)); 6244 PetscCall(PetscMalloc1(Btot,&Bperm)); 6245 PetscCall(PetscMalloc1(Annz+1,&Ajmap)); 6246 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap)); 6247 6248 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6249 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6250 for (r=0; r<m; r++) { 6251 k = rowBegin[r]; 6252 mid = rowMid[r]; 6253 s = rowEnd[r]; 6254 PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 6255 PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6256 Atot += mid - k; 6257 Btot += s - mid; 6258 6259 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6260 for (p=k; p<mid;) { 6261 col = j[p]; 6262 q = p; 6263 do {p++;} while (p<mid && j[p] == col); 6264 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6265 Annz++; 6266 } 6267 6268 for (p=mid; p<s;) { 6269 col = j[p]; 6270 q = p; 6271 do {p++;} while (p<s && j[p] == col); 6272 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6273 Bnnz++; 6274 } 6275 } 6276 /* Output */ 6277 *Aperm_ = Aperm; 6278 *Annz_ = Annz; 6279 *Atot_ = Atot; 6280 *Ajmap_ = Ajmap; 6281 *Bperm_ = Bperm; 6282 *Bnnz_ = Bnnz; 6283 *Btot_ = Btot; 6284 *Bjmap_ = Bjmap; 6285 PetscFunctionReturn(0); 6286 } 6287 6288 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6289 6290 Input Parameters: 6291 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6292 nnz: number of unique nonzeros in the merged matrix 6293 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6294 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6295 6296 Output Parameter: (memory is allocated by the caller) 6297 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6298 6299 Example: 6300 nnz1 = 4 6301 nnz = 6 6302 imap = [1,3,4,5] 6303 jmap = [0,3,5,6,7] 6304 then, 6305 jmap_new = [0,0,3,3,5,6,7] 6306 */ 6307 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[]) 6308 { 6309 PetscCount k,p; 6310 6311 PetscFunctionBegin; 6312 jmap_new[0] = 0; 6313 p = nnz; /* p loops over jmap_new[] backwards */ 6314 for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */ 6315 for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1]; 6316 } 6317 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6318 PetscFunctionReturn(0); 6319 } 6320 6321 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6322 { 6323 MPI_Comm comm; 6324 PetscMPIInt rank,size; 6325 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6326 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6327 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6328 6329 PetscFunctionBegin; 6330 PetscCall(PetscFree(mpiaij->garray)); 6331 PetscCall(VecDestroy(&mpiaij->lvec)); 6332 #if defined(PETSC_USE_CTABLE) 6333 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6334 #else 6335 PetscCall(PetscFree(mpiaij->colmap)); 6336 #endif 6337 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6338 mat->assembled = PETSC_FALSE; 6339 mat->was_assembled = PETSC_FALSE; 6340 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6341 6342 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 6343 PetscCallMPI(MPI_Comm_size(comm,&size)); 6344 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 6345 PetscCall(PetscLayoutSetUp(mat->rmap)); 6346 PetscCall(PetscLayoutSetUp(mat->cmap)); 6347 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6348 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6349 PetscCall(MatGetLocalSize(mat,&m,&n)); 6350 PetscCall(MatGetSize(mat,&M,&N)); 6351 6352 /* ---------------------------------------------------------------------------*/ 6353 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6354 /* entries come first, then local rows, then remote rows. */ 6355 /* ---------------------------------------------------------------------------*/ 6356 PetscCount n1 = coo_n,*perm1; 6357 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6358 PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1)); 6359 PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */ 6360 PetscCall(PetscArraycpy(j1,coo_j,n1)); 6361 for (k=0; k<n1; k++) perm1[k] = k; 6362 6363 /* Manipulate indices so that entries with negative row or col indices will have smallest 6364 row indices, local entries will have greater but negative row indices, and remote entries 6365 will have positive row indices. 6366 */ 6367 for (k=0; k<n1; k++) { 6368 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6369 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6370 else { 6371 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6372 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6373 } 6374 } 6375 6376 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6377 PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6378 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6379 PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6380 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6381 6382 /* ---------------------------------------------------------------------------*/ 6383 /* Split local rows into diag/offdiag portions */ 6384 /* ---------------------------------------------------------------------------*/ 6385 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6386 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6387 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6388 6389 PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 6390 PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 6391 PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6392 6393 /* ---------------------------------------------------------------------------*/ 6394 /* Send remote rows to their owner */ 6395 /* ---------------------------------------------------------------------------*/ 6396 /* Find which rows should be sent to which remote ranks*/ 6397 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6398 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6399 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6400 const PetscInt *ranges; 6401 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6402 6403 PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 6404 PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6405 for (k=rem; k<n1;) { 6406 PetscMPIInt owner; 6407 PetscInt firstRow,lastRow; 6408 6409 /* Locate a row range */ 6410 firstRow = i1[k]; /* first row of this owner */ 6411 PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6412 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6413 6414 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6415 PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6416 6417 /* All entries in [k,p) belong to this remote owner */ 6418 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6419 PetscMPIInt *sendto2; 6420 PetscInt *nentries2; 6421 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6422 6423 PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 6424 PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 6425 PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 6426 PetscCall(PetscFree2(sendto,nentries2)); 6427 sendto = sendto2; 6428 nentries = nentries2; 6429 maxNsend = maxNsend2; 6430 } 6431 sendto[nsend] = owner; 6432 nentries[nsend] = p - k; 6433 PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6434 nsend++; 6435 k = p; 6436 } 6437 6438 /* Build 1st SF to know offsets on remote to send data */ 6439 PetscSF sf1; 6440 PetscInt nroots = 1,nroots2 = 0; 6441 PetscInt nleaves = nsend,nleaves2 = 0; 6442 PetscInt *offsets; 6443 PetscSFNode *iremote; 6444 6445 PetscCall(PetscSFCreate(comm,&sf1)); 6446 PetscCall(PetscMalloc1(nsend,&iremote)); 6447 PetscCall(PetscMalloc1(nsend,&offsets)); 6448 for (k=0; k<nsend; k++) { 6449 iremote[k].rank = sendto[k]; 6450 iremote[k].index = 0; 6451 nleaves2 += nentries[k]; 6452 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6453 } 6454 PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6455 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 6456 PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6457 PetscCall(PetscSFDestroy(&sf1)); 6458 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem); 6459 6460 /* Build 2nd SF to send remote COOs to their owner */ 6461 PetscSF sf2; 6462 nroots = nroots2; 6463 nleaves = nleaves2; 6464 PetscCall(PetscSFCreate(comm,&sf2)); 6465 PetscCall(PetscSFSetFromOptions(sf2)); 6466 PetscCall(PetscMalloc1(nleaves,&iremote)); 6467 p = 0; 6468 for (k=0; k<nsend; k++) { 6469 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6470 for (q=0; q<nentries[k]; q++,p++) { 6471 iremote[p].rank = sendto[k]; 6472 iremote[p].index = offsets[k] + q; 6473 } 6474 } 6475 PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6476 6477 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6478 PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6479 6480 /* Send the remote COOs to their owner */ 6481 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6482 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6483 PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 6484 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 6485 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 6486 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 6487 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6488 6489 PetscCall(PetscFree(offsets)); 6490 PetscCall(PetscFree2(sendto,nentries)); 6491 6492 /* ---------------------------------------------------------------*/ 6493 /* Sort received COOs by row along with the permutation array */ 6494 /* ---------------------------------------------------------------*/ 6495 for (k=0; k<n2; k++) perm2[k] = k; 6496 PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6497 6498 /* ---------------------------------------------------------------*/ 6499 /* Split received COOs into diag/offdiag portions */ 6500 /* ---------------------------------------------------------------*/ 6501 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6502 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6503 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6504 6505 PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 6506 PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6507 6508 /* --------------------------------------------------------------------------*/ 6509 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6510 /* --------------------------------------------------------------------------*/ 6511 PetscInt *Ai,*Bi; 6512 PetscInt *Aj,*Bj; 6513 6514 PetscCall(PetscMalloc1(m+1,&Ai)); 6515 PetscCall(PetscMalloc1(m+1,&Bi)); 6516 PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6517 PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6518 6519 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6520 PetscCall(PetscMalloc1(Annz1,&Aimap1)); 6521 PetscCall(PetscMalloc1(Bnnz1,&Bimap1)); 6522 PetscCall(PetscMalloc1(Annz2,&Aimap2)); 6523 PetscCall(PetscMalloc1(Bnnz2,&Bimap2)); 6524 6525 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 6526 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6527 6528 /* --------------------------------------------------------------------------*/ 6529 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6530 /* expect nonzeros in A/B most likely have local contributing entries */ 6531 /* --------------------------------------------------------------------------*/ 6532 PetscInt Annz = Ai[m]; 6533 PetscInt Bnnz = Bi[m]; 6534 PetscCount *Ajmap1_new,*Bjmap1_new; 6535 6536 PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new)); 6537 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new)); 6538 6539 PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new)); 6540 PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new)); 6541 6542 PetscCall(PetscFree(Aimap1)); 6543 PetscCall(PetscFree(Ajmap1)); 6544 PetscCall(PetscFree(Bimap1)); 6545 PetscCall(PetscFree(Bjmap1)); 6546 PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 6547 PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 6548 PetscCall(PetscFree3(i1,j1,perm1)); 6549 PetscCall(PetscFree3(i2,j2,perm2)); 6550 6551 Ajmap1 = Ajmap1_new; 6552 Bjmap1 = Bjmap1_new; 6553 6554 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6555 if (Annz < Annz1 + Annz2) { 6556 PetscInt *Aj_new; 6557 PetscCall(PetscMalloc1(Annz,&Aj_new)); 6558 PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 6559 PetscCall(PetscFree(Aj)); 6560 Aj = Aj_new; 6561 } 6562 6563 if (Bnnz < Bnnz1 + Bnnz2) { 6564 PetscInt *Bj_new; 6565 PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 6566 PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 6567 PetscCall(PetscFree(Bj)); 6568 Bj = Bj_new; 6569 } 6570 6571 /* --------------------------------------------------------------------------------*/ 6572 /* Create new submatrices for on-process and off-process coupling */ 6573 /* --------------------------------------------------------------------------------*/ 6574 PetscScalar *Aa,*Ba; 6575 MatType rtype; 6576 Mat_SeqAIJ *a,*b; 6577 PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 6578 PetscCall(PetscCalloc1(Bnnz,&Ba)); 6579 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6580 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6581 PetscCall(MatDestroy(&mpiaij->A)); 6582 PetscCall(MatDestroy(&mpiaij->B)); 6583 PetscCall(MatGetRootType_Private(mat,&rtype)); 6584 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 6585 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 6586 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6587 6588 a = (Mat_SeqAIJ*)mpiaij->A->data; 6589 b = (Mat_SeqAIJ*)mpiaij->B->data; 6590 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6591 a->free_a = b->free_a = PETSC_TRUE; 6592 a->free_ij = b->free_ij = PETSC_TRUE; 6593 6594 /* conversion must happen AFTER multiply setup */ 6595 PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 6596 PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 6597 PetscCall(VecDestroy(&mpiaij->lvec)); 6598 PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 6599 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6600 6601 mpiaij->coo_n = coo_n; 6602 mpiaij->coo_sf = sf2; 6603 mpiaij->sendlen = nleaves; 6604 mpiaij->recvlen = nroots; 6605 6606 mpiaij->Annz = Annz; 6607 mpiaij->Bnnz = Bnnz; 6608 6609 mpiaij->Annz2 = Annz2; 6610 mpiaij->Bnnz2 = Bnnz2; 6611 6612 mpiaij->Atot1 = Atot1; 6613 mpiaij->Atot2 = Atot2; 6614 mpiaij->Btot1 = Btot1; 6615 mpiaij->Btot2 = Btot2; 6616 6617 mpiaij->Ajmap1 = Ajmap1; 6618 mpiaij->Aperm1 = Aperm1; 6619 6620 mpiaij->Bjmap1 = Bjmap1; 6621 mpiaij->Bperm1 = Bperm1; 6622 6623 mpiaij->Aimap2 = Aimap2; 6624 mpiaij->Ajmap2 = Ajmap2; 6625 mpiaij->Aperm2 = Aperm2; 6626 6627 mpiaij->Bimap2 = Bimap2; 6628 mpiaij->Bjmap2 = Bjmap2; 6629 mpiaij->Bperm2 = Bperm2; 6630 6631 mpiaij->Cperm1 = Cperm1; 6632 6633 /* Allocate in preallocation. If not used, it has zero cost on host */ 6634 PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6635 PetscFunctionReturn(0); 6636 } 6637 6638 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6639 { 6640 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6641 Mat A = mpiaij->A,B = mpiaij->B; 6642 PetscCount Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2; 6643 PetscScalar *Aa,*Ba; 6644 PetscScalar *sendbuf = mpiaij->sendbuf; 6645 PetscScalar *recvbuf = mpiaij->recvbuf; 6646 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2; 6647 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2; 6648 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6649 const PetscCount *Cperm1 = mpiaij->Cperm1; 6650 6651 PetscFunctionBegin; 6652 PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 6653 PetscCall(MatSeqAIJGetArray(B,&Ba)); 6654 6655 /* Pack entries to be sent to remote */ 6656 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6657 6658 /* Send remote entries to their owner and overlap the communication with local computation */ 6659 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6660 /* Add local entries to A and B */ 6661 for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6662 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6663 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]]; 6664 Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum; 6665 } 6666 for (PetscCount i=0; i<Bnnz; i++) { 6667 PetscScalar sum = 0.0; 6668 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]]; 6669 Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum; 6670 } 6671 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6672 6673 /* Add received remote entries to A and B */ 6674 for (PetscCount i=0; i<Annz2; i++) { 6675 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6676 } 6677 for (PetscCount i=0; i<Bnnz2; i++) { 6678 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6679 } 6680 PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 6681 PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6682 PetscFunctionReturn(0); 6683 } 6684 6685 /* ----------------------------------------------------------------*/ 6686 6687 /*MC 6688 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6689 6690 Options Database Keys: 6691 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6692 6693 Level: beginner 6694 6695 Notes: 6696 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6697 in this case the values associated with the rows and columns one passes in are set to zero 6698 in the matrix 6699 6700 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6701 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6702 6703 .seealso: `MatCreateAIJ()` 6704 M*/ 6705 6706 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6707 { 6708 Mat_MPIAIJ *b; 6709 PetscMPIInt size; 6710 6711 PetscFunctionBegin; 6712 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 6713 6714 PetscCall(PetscNewLog(B,&b)); 6715 B->data = (void*)b; 6716 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6717 B->assembled = PETSC_FALSE; 6718 B->insertmode = NOT_SET_VALUES; 6719 b->size = size; 6720 6721 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6722 6723 /* build cache for off array entries formed */ 6724 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 6725 6726 b->donotstash = PETSC_FALSE; 6727 b->colmap = NULL; 6728 b->garray = NULL; 6729 b->roworiented = PETSC_TRUE; 6730 6731 /* stuff used for matrix vector multiply */ 6732 b->lvec = NULL; 6733 b->Mvctx = NULL; 6734 6735 /* stuff for MatGetRow() */ 6736 b->rowindices = NULL; 6737 b->rowvalues = NULL; 6738 b->getrowactive = PETSC_FALSE; 6739 6740 /* flexible pointer used in CUSPARSE classes */ 6741 b->spptr = NULL; 6742 6743 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6744 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 6745 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 6746 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 6747 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 6748 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 6749 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6750 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 6751 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 6752 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 6753 #if defined(PETSC_HAVE_CUDA) 6754 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6755 #endif 6756 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6757 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 6758 #endif 6759 #if defined(PETSC_HAVE_MKL_SPARSE) 6760 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6761 #endif 6762 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 6763 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 6764 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 6765 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 6766 #if defined(PETSC_HAVE_ELEMENTAL) 6767 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 6768 #endif 6769 #if defined(PETSC_HAVE_SCALAPACK) 6770 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6771 #endif 6772 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 6773 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 6774 #if defined(PETSC_HAVE_HYPRE) 6775 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 6776 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6777 #endif 6778 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 6779 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 6780 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 6781 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 6782 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6783 PetscFunctionReturn(0); 6784 } 6785 6786 /*@C 6787 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6788 and "off-diagonal" part of the matrix in CSR format. 6789 6790 Collective 6791 6792 Input Parameters: 6793 + comm - MPI communicator 6794 . m - number of local rows (Cannot be PETSC_DECIDE) 6795 . n - This value should be the same as the local size used in creating the 6796 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6797 calculated if N is given) For square matrices n is almost always m. 6798 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6799 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6800 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6801 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6802 . a - matrix values 6803 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6804 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6805 - oa - matrix values 6806 6807 Output Parameter: 6808 . mat - the matrix 6809 6810 Level: advanced 6811 6812 Notes: 6813 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6814 must free the arrays once the matrix has been destroyed and not before. 6815 6816 The i and j indices are 0 based 6817 6818 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6819 6820 This sets local rows and cannot be used to set off-processor values. 6821 6822 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6823 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6824 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6825 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6826 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6827 communication if it is known that only local entries will be set. 6828 6829 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6830 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6831 @*/ 6832 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6833 { 6834 Mat_MPIAIJ *maij; 6835 6836 PetscFunctionBegin; 6837 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6838 PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6839 PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6840 PetscCall(MatCreate(comm,mat)); 6841 PetscCall(MatSetSizes(*mat,m,n,M,N)); 6842 PetscCall(MatSetType(*mat,MATMPIAIJ)); 6843 maij = (Mat_MPIAIJ*) (*mat)->data; 6844 6845 (*mat)->preallocated = PETSC_TRUE; 6846 6847 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6848 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6849 6850 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 6851 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 6852 6853 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 6854 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 6855 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 6856 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 6857 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 6858 PetscFunctionReturn(0); 6859 } 6860 6861 typedef struct { 6862 Mat *mp; /* intermediate products */ 6863 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6864 PetscInt cp; /* number of intermediate products */ 6865 6866 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6867 PetscInt *startsj_s,*startsj_r; 6868 PetscScalar *bufa; 6869 Mat P_oth; 6870 6871 /* may take advantage of merging product->B */ 6872 Mat Bloc; /* B-local by merging diag and off-diag */ 6873 6874 /* cusparse does not have support to split between symbolic and numeric phases. 6875 When api_user is true, we don't need to update the numerical values 6876 of the temporary storage */ 6877 PetscBool reusesym; 6878 6879 /* support for COO values insertion */ 6880 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6881 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6882 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6883 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6884 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6885 PetscMemType mtype; 6886 6887 /* customization */ 6888 PetscBool abmerge; 6889 PetscBool P_oth_bind; 6890 } MatMatMPIAIJBACKEND; 6891 6892 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6893 { 6894 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6895 PetscInt i; 6896 6897 PetscFunctionBegin; 6898 PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 6899 PetscCall(PetscFree(mmdata->bufa)); 6900 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 6901 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 6902 PetscCall(MatDestroy(&mmdata->P_oth)); 6903 PetscCall(MatDestroy(&mmdata->Bloc)); 6904 PetscCall(PetscSFDestroy(&mmdata->sf)); 6905 for (i = 0; i < mmdata->cp; i++) { 6906 PetscCall(MatDestroy(&mmdata->mp[i])); 6907 } 6908 PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 6909 PetscCall(PetscFree(mmdata->own[0])); 6910 PetscCall(PetscFree(mmdata->own)); 6911 PetscCall(PetscFree(mmdata->off[0])); 6912 PetscCall(PetscFree(mmdata->off)); 6913 PetscCall(PetscFree(mmdata)); 6914 PetscFunctionReturn(0); 6915 } 6916 6917 /* Copy selected n entries with indices in idx[] of A to v[]. 6918 If idx is NULL, copy the whole data array of A to v[] 6919 */ 6920 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6921 { 6922 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6923 6924 PetscFunctionBegin; 6925 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6926 if (f) { 6927 PetscCall((*f)(A,n,idx,v)); 6928 } else { 6929 const PetscScalar *vv; 6930 6931 PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6932 if (n && idx) { 6933 PetscScalar *w = v; 6934 const PetscInt *oi = idx; 6935 PetscInt j; 6936 6937 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6938 } else { 6939 PetscCall(PetscArraycpy(v,vv,n)); 6940 } 6941 PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6942 } 6943 PetscFunctionReturn(0); 6944 } 6945 6946 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6947 { 6948 MatMatMPIAIJBACKEND *mmdata; 6949 PetscInt i,n_d,n_o; 6950 6951 PetscFunctionBegin; 6952 MatCheckProduct(C,1); 6953 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6954 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6955 if (!mmdata->reusesym) { /* update temporary matrices */ 6956 if (mmdata->P_oth) { 6957 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6958 } 6959 if (mmdata->Bloc) { 6960 PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 6961 } 6962 } 6963 mmdata->reusesym = PETSC_FALSE; 6964 6965 for (i = 0; i < mmdata->cp; i++) { 6966 PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6967 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6968 } 6969 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6970 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6971 6972 if (mmdata->mptmp[i]) continue; 6973 if (noff) { 6974 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6975 6976 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 6977 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 6978 n_o += noff; 6979 n_d += nown; 6980 } else { 6981 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6982 6983 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 6984 n_d += mm->nz; 6985 } 6986 } 6987 if (mmdata->hasoffproc) { /* offprocess insertion */ 6988 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6989 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6990 } 6991 PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 6992 PetscFunctionReturn(0); 6993 } 6994 6995 /* Support for Pt * A, A * P, or Pt * A * P */ 6996 #define MAX_NUMBER_INTERMEDIATE 4 6997 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6998 { 6999 Mat_Product *product = C->product; 7000 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7001 Mat_MPIAIJ *a,*p; 7002 MatMatMPIAIJBACKEND *mmdata; 7003 ISLocalToGlobalMapping P_oth_l2g = NULL; 7004 IS glob = NULL; 7005 const char *prefix; 7006 char pprefix[256]; 7007 const PetscInt *globidx,*P_oth_idx; 7008 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 7009 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 7010 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7011 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7012 /* a base offset; type-2: sparse with a local to global map table */ 7013 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7014 7015 MatProductType ptype; 7016 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 7017 PetscMPIInt size; 7018 7019 PetscFunctionBegin; 7020 MatCheckProduct(C,1); 7021 PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 7022 ptype = product->type; 7023 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 7024 ptype = MATPRODUCT_AB; 7025 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7026 } 7027 switch (ptype) { 7028 case MATPRODUCT_AB: 7029 A = product->A; 7030 P = product->B; 7031 m = A->rmap->n; 7032 n = P->cmap->n; 7033 M = A->rmap->N; 7034 N = P->cmap->N; 7035 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7036 break; 7037 case MATPRODUCT_AtB: 7038 P = product->A; 7039 A = product->B; 7040 m = P->cmap->n; 7041 n = A->cmap->n; 7042 M = P->cmap->N; 7043 N = A->cmap->N; 7044 hasoffproc = PETSC_TRUE; 7045 break; 7046 case MATPRODUCT_PtAP: 7047 A = product->A; 7048 P = product->B; 7049 m = P->cmap->n; 7050 n = P->cmap->n; 7051 M = P->cmap->N; 7052 N = P->cmap->N; 7053 hasoffproc = PETSC_TRUE; 7054 break; 7055 default: 7056 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7057 } 7058 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 7059 if (size == 1) hasoffproc = PETSC_FALSE; 7060 7061 /* defaults */ 7062 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 7063 mp[i] = NULL; 7064 mptmp[i] = PETSC_FALSE; 7065 rmapt[i] = -1; 7066 cmapt[i] = -1; 7067 rmapa[i] = NULL; 7068 cmapa[i] = NULL; 7069 } 7070 7071 /* customization */ 7072 PetscCall(PetscNew(&mmdata)); 7073 mmdata->reusesym = product->api_user; 7074 if (ptype == MATPRODUCT_AB) { 7075 if (product->api_user) { 7076 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 7077 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 7078 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7079 PetscOptionsEnd(); 7080 } else { 7081 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 7082 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 7083 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7084 PetscOptionsEnd(); 7085 } 7086 } else if (ptype == MATPRODUCT_PtAP) { 7087 if (product->api_user) { 7088 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 7089 PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7090 PetscOptionsEnd(); 7091 } else { 7092 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 7093 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7094 PetscOptionsEnd(); 7095 } 7096 } 7097 a = (Mat_MPIAIJ*)A->data; 7098 p = (Mat_MPIAIJ*)P->data; 7099 PetscCall(MatSetSizes(C,m,n,M,N)); 7100 PetscCall(PetscLayoutSetUp(C->rmap)); 7101 PetscCall(PetscLayoutSetUp(C->cmap)); 7102 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 7103 PetscCall(MatGetOptionsPrefix(C,&prefix)); 7104 7105 cp = 0; 7106 switch (ptype) { 7107 case MATPRODUCT_AB: /* A * P */ 7108 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7109 7110 /* A_diag * P_local (merged or not) */ 7111 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7112 /* P is product->B */ 7113 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7114 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7115 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7116 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7117 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7118 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7119 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7120 mp[cp]->product->api_user = product->api_user; 7121 PetscCall(MatProductSetFromOptions(mp[cp])); 7122 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7123 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7124 PetscCall(ISGetIndices(glob,&globidx)); 7125 rmapt[cp] = 1; 7126 cmapt[cp] = 2; 7127 cmapa[cp] = globidx; 7128 mptmp[cp] = PETSC_FALSE; 7129 cp++; 7130 } else { /* A_diag * P_diag and A_diag * P_off */ 7131 PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 7132 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7133 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7134 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7135 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7136 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7137 mp[cp]->product->api_user = product->api_user; 7138 PetscCall(MatProductSetFromOptions(mp[cp])); 7139 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7140 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7141 rmapt[cp] = 1; 7142 cmapt[cp] = 1; 7143 mptmp[cp] = PETSC_FALSE; 7144 cp++; 7145 PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 7146 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7147 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7148 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7149 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7150 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7151 mp[cp]->product->api_user = product->api_user; 7152 PetscCall(MatProductSetFromOptions(mp[cp])); 7153 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7154 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7155 rmapt[cp] = 1; 7156 cmapt[cp] = 2; 7157 cmapa[cp] = p->garray; 7158 mptmp[cp] = PETSC_FALSE; 7159 cp++; 7160 } 7161 7162 /* A_off * P_other */ 7163 if (mmdata->P_oth) { 7164 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 7165 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7166 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7167 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7168 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7169 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7170 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7171 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7172 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7173 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7174 mp[cp]->product->api_user = product->api_user; 7175 PetscCall(MatProductSetFromOptions(mp[cp])); 7176 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7177 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7178 rmapt[cp] = 1; 7179 cmapt[cp] = 2; 7180 cmapa[cp] = P_oth_idx; 7181 mptmp[cp] = PETSC_FALSE; 7182 cp++; 7183 } 7184 break; 7185 7186 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7187 /* A is product->B */ 7188 PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7189 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7190 PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 7191 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7192 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7193 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7194 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7195 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7196 mp[cp]->product->api_user = product->api_user; 7197 PetscCall(MatProductSetFromOptions(mp[cp])); 7198 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7199 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7200 PetscCall(ISGetIndices(glob,&globidx)); 7201 rmapt[cp] = 2; 7202 rmapa[cp] = globidx; 7203 cmapt[cp] = 2; 7204 cmapa[cp] = globidx; 7205 mptmp[cp] = PETSC_FALSE; 7206 cp++; 7207 } else { 7208 PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 7209 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7210 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7211 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7212 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7213 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7214 mp[cp]->product->api_user = product->api_user; 7215 PetscCall(MatProductSetFromOptions(mp[cp])); 7216 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7217 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7218 PetscCall(ISGetIndices(glob,&globidx)); 7219 rmapt[cp] = 1; 7220 cmapt[cp] = 2; 7221 cmapa[cp] = globidx; 7222 mptmp[cp] = PETSC_FALSE; 7223 cp++; 7224 PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 7225 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7226 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7227 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7228 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7229 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7230 mp[cp]->product->api_user = product->api_user; 7231 PetscCall(MatProductSetFromOptions(mp[cp])); 7232 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7233 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7234 rmapt[cp] = 2; 7235 rmapa[cp] = p->garray; 7236 cmapt[cp] = 2; 7237 cmapa[cp] = globidx; 7238 mptmp[cp] = PETSC_FALSE; 7239 cp++; 7240 } 7241 break; 7242 case MATPRODUCT_PtAP: 7243 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7244 /* P is product->B */ 7245 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7246 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7247 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 7248 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7249 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7250 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7251 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7252 mp[cp]->product->api_user = product->api_user; 7253 PetscCall(MatProductSetFromOptions(mp[cp])); 7254 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7255 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7256 PetscCall(ISGetIndices(glob,&globidx)); 7257 rmapt[cp] = 2; 7258 rmapa[cp] = globidx; 7259 cmapt[cp] = 2; 7260 cmapa[cp] = globidx; 7261 mptmp[cp] = PETSC_FALSE; 7262 cp++; 7263 if (mmdata->P_oth) { 7264 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 7265 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7266 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7267 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7268 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7269 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7270 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7271 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7272 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7273 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7274 mp[cp]->product->api_user = product->api_user; 7275 PetscCall(MatProductSetFromOptions(mp[cp])); 7276 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7277 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7278 mptmp[cp] = PETSC_TRUE; 7279 cp++; 7280 PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 7281 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7282 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7283 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7284 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7285 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7286 mp[cp]->product->api_user = product->api_user; 7287 PetscCall(MatProductSetFromOptions(mp[cp])); 7288 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7289 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7290 rmapt[cp] = 2; 7291 rmapa[cp] = globidx; 7292 cmapt[cp] = 2; 7293 cmapa[cp] = P_oth_idx; 7294 mptmp[cp] = PETSC_FALSE; 7295 cp++; 7296 } 7297 break; 7298 default: 7299 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7300 } 7301 /* sanity check */ 7302 if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7303 7304 PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7305 for (i = 0; i < cp; i++) { 7306 mmdata->mp[i] = mp[i]; 7307 mmdata->mptmp[i] = mptmp[i]; 7308 } 7309 mmdata->cp = cp; 7310 C->product->data = mmdata; 7311 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7312 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7313 7314 /* memory type */ 7315 mmdata->mtype = PETSC_MEMTYPE_HOST; 7316 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 7317 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7318 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7319 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7320 7321 /* prepare coo coordinates for values insertion */ 7322 7323 /* count total nonzeros of those intermediate seqaij Mats 7324 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7325 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7326 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7327 */ 7328 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7329 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7330 if (mptmp[cp]) continue; 7331 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7332 const PetscInt *rmap = rmapa[cp]; 7333 const PetscInt mr = mp[cp]->rmap->n; 7334 const PetscInt rs = C->rmap->rstart; 7335 const PetscInt re = C->rmap->rend; 7336 const PetscInt *ii = mm->i; 7337 for (i = 0; i < mr; i++) { 7338 const PetscInt gr = rmap[i]; 7339 const PetscInt nz = ii[i+1] - ii[i]; 7340 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7341 else ncoo_oown += nz; /* this row is local */ 7342 } 7343 } else ncoo_d += mm->nz; 7344 } 7345 7346 /* 7347 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7348 7349 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7350 7351 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7352 7353 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7354 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7355 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7356 7357 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7358 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7359 */ 7360 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 7361 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7362 7363 /* gather (i,j) of nonzeros inserted by remote procs */ 7364 if (hasoffproc) { 7365 PetscSF msf; 7366 PetscInt ncoo2,*coo_i2,*coo_j2; 7367 7368 PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 7369 PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 7370 PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7371 7372 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7373 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7374 PetscInt *idxoff = mmdata->off[cp]; 7375 PetscInt *idxown = mmdata->own[cp]; 7376 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7377 const PetscInt *rmap = rmapa[cp]; 7378 const PetscInt *cmap = cmapa[cp]; 7379 const PetscInt *ii = mm->i; 7380 PetscInt *coi = coo_i + ncoo_o; 7381 PetscInt *coj = coo_j + ncoo_o; 7382 const PetscInt mr = mp[cp]->rmap->n; 7383 const PetscInt rs = C->rmap->rstart; 7384 const PetscInt re = C->rmap->rend; 7385 const PetscInt cs = C->cmap->rstart; 7386 for (i = 0; i < mr; i++) { 7387 const PetscInt *jj = mm->j + ii[i]; 7388 const PetscInt gr = rmap[i]; 7389 const PetscInt nz = ii[i+1] - ii[i]; 7390 if (gr < rs || gr >= re) { /* this is an offproc row */ 7391 for (j = ii[i]; j < ii[i+1]; j++) { 7392 *coi++ = gr; 7393 *idxoff++ = j; 7394 } 7395 if (!cmapt[cp]) { /* already global */ 7396 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7397 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7398 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7399 } else { /* offdiag */ 7400 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7401 } 7402 ncoo_o += nz; 7403 } else { /* this is a local row */ 7404 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7405 } 7406 } 7407 } 7408 mmdata->off[cp + 1] = idxoff; 7409 mmdata->own[cp + 1] = idxown; 7410 } 7411 7412 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7413 PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 7414 PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 7415 PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 7416 ncoo = ncoo_d + ncoo_oown + ncoo2; 7417 PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 7418 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7419 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 7420 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7421 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7422 PetscCall(PetscFree2(coo_i,coo_j)); 7423 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7424 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 7425 coo_i = coo_i2; 7426 coo_j = coo_j2; 7427 } else { /* no offproc values insertion */ 7428 ncoo = ncoo_d; 7429 PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7430 7431 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7432 PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 7433 PetscCall(PetscSFSetUp(mmdata->sf)); 7434 } 7435 mmdata->hasoffproc = hasoffproc; 7436 7437 /* gather (i,j) of nonzeros inserted locally */ 7438 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7439 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7440 PetscInt *coi = coo_i + ncoo_d; 7441 PetscInt *coj = coo_j + ncoo_d; 7442 const PetscInt *jj = mm->j; 7443 const PetscInt *ii = mm->i; 7444 const PetscInt *cmap = cmapa[cp]; 7445 const PetscInt *rmap = rmapa[cp]; 7446 const PetscInt mr = mp[cp]->rmap->n; 7447 const PetscInt rs = C->rmap->rstart; 7448 const PetscInt re = C->rmap->rend; 7449 const PetscInt cs = C->cmap->rstart; 7450 7451 if (mptmp[cp]) continue; 7452 if (rmapt[cp] == 1) { /* consecutive rows */ 7453 /* fill coo_i */ 7454 for (i = 0; i < mr; i++) { 7455 const PetscInt gr = i + rs; 7456 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7457 } 7458 /* fill coo_j */ 7459 if (!cmapt[cp]) { /* type-0, already global */ 7460 PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7461 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7462 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7463 } else { /* type-2, local to global for sparse columns */ 7464 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7465 } 7466 ncoo_d += mm->nz; 7467 } else if (rmapt[cp] == 2) { /* sparse rows */ 7468 for (i = 0; i < mr; i++) { 7469 const PetscInt *jj = mm->j + ii[i]; 7470 const PetscInt gr = rmap[i]; 7471 const PetscInt nz = ii[i+1] - ii[i]; 7472 if (gr >= rs && gr < re) { /* local rows */ 7473 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7474 if (!cmapt[cp]) { /* type-0, already global */ 7475 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7476 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7477 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7478 } else { /* type-2, local to global for sparse columns */ 7479 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7480 } 7481 ncoo_d += nz; 7482 } 7483 } 7484 } 7485 } 7486 if (glob) { 7487 PetscCall(ISRestoreIndices(glob,&globidx)); 7488 } 7489 PetscCall(ISDestroy(&glob)); 7490 if (P_oth_l2g) { 7491 PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 7492 } 7493 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7494 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7495 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 7496 7497 /* preallocate with COO data */ 7498 PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 7499 PetscCall(PetscFree2(coo_i,coo_j)); 7500 PetscFunctionReturn(0); 7501 } 7502 7503 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7504 { 7505 Mat_Product *product = mat->product; 7506 #if defined(PETSC_HAVE_DEVICE) 7507 PetscBool match = PETSC_FALSE; 7508 PetscBool usecpu = PETSC_FALSE; 7509 #else 7510 PetscBool match = PETSC_TRUE; 7511 #endif 7512 7513 PetscFunctionBegin; 7514 MatCheckProduct(mat,1); 7515 #if defined(PETSC_HAVE_DEVICE) 7516 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7517 PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 7518 } 7519 if (match) { /* we can always fallback to the CPU if requested */ 7520 switch (product->type) { 7521 case MATPRODUCT_AB: 7522 if (product->api_user) { 7523 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat"); 7524 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7525 PetscOptionsEnd(); 7526 } else { 7527 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat"); 7528 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7529 PetscOptionsEnd(); 7530 } 7531 break; 7532 case MATPRODUCT_AtB: 7533 if (product->api_user) { 7534 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat"); 7535 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7536 PetscOptionsEnd(); 7537 } else { 7538 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat"); 7539 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7540 PetscOptionsEnd(); 7541 } 7542 break; 7543 case MATPRODUCT_PtAP: 7544 if (product->api_user) { 7545 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat"); 7546 PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7547 PetscOptionsEnd(); 7548 } else { 7549 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat"); 7550 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7551 PetscOptionsEnd(); 7552 } 7553 break; 7554 default: 7555 break; 7556 } 7557 match = (PetscBool)!usecpu; 7558 } 7559 #endif 7560 if (match) { 7561 switch (product->type) { 7562 case MATPRODUCT_AB: 7563 case MATPRODUCT_AtB: 7564 case MATPRODUCT_PtAP: 7565 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7566 break; 7567 default: 7568 break; 7569 } 7570 } 7571 /* fallback to MPIAIJ ops */ 7572 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7573 PetscFunctionReturn(0); 7574 } 7575 7576 /* 7577 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7578 7579 n - the number of block indices in cc[] 7580 cc - the block indices (must be large enough to contain the indices) 7581 */ 7582 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc) 7583 { 7584 PetscInt cnt = -1,nidx,j; 7585 const PetscInt *idx; 7586 7587 PetscFunctionBegin; 7588 PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL)); 7589 if (nidx) { 7590 cnt = 0; 7591 cc[cnt] = idx[0]/bs; 7592 for (j=1; j<nidx; j++) { 7593 if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs; 7594 } 7595 } 7596 PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL)); 7597 *n = cnt+1; 7598 PetscFunctionReturn(0); 7599 } 7600 7601 /* 7602 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7603 7604 ncollapsed - the number of block indices 7605 collapsed - the block indices (must be large enough to contain the indices) 7606 */ 7607 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed) 7608 { 7609 PetscInt i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp; 7610 7611 PetscFunctionBegin; 7612 PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev)); 7613 for (i=start+1; i<start+bs; i++) { 7614 PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur)); 7615 PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged)); 7616 cprevtmp = cprev; cprev = merged; merged = cprevtmp; 7617 } 7618 *ncollapsed = nprev; 7619 if (collapsed) *collapsed = cprev; 7620 PetscFunctionReturn(0); 7621 } 7622 7623 /* -------------------------------------------------------------------------- */ 7624 /* 7625 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7626 7627 Input Parameter: 7628 . Amat - matrix 7629 - symmetrize - make the result symmetric 7630 + scale - scale with diagonal 7631 7632 Output Parameter: 7633 . a_Gmat - output scalar graph >= 0 7634 7635 */ 7636 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat) 7637 { 7638 PetscInt Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs; 7639 MPI_Comm comm; 7640 Mat Gmat; 7641 PetscBool ismpiaij,isseqaij; 7642 Mat a, b, c; 7643 MatType jtype; 7644 7645 PetscFunctionBegin; 7646 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 7647 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7648 PetscCall(MatGetSize(Amat, &MM, &NN)); 7649 PetscCall(MatGetBlockSize(Amat, &bs)); 7650 nloc = (Iend-Istart)/bs; 7651 7652 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij)); 7653 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij)); 7654 PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type"); 7655 7656 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7657 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7658 implementation */ 7659 if (bs > 1) { 7660 PetscCall(MatGetType(Amat,&jtype)); 7661 PetscCall(MatCreate(comm, &Gmat)); 7662 PetscCall(MatSetType(Gmat, jtype)); 7663 PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE)); 7664 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7665 if (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray) { 7666 PetscInt *d_nnz, *o_nnz; 7667 MatScalar *aa,val,AA[4096]; 7668 PetscInt *aj,*ai,AJ[4096],nc; 7669 if (isseqaij) { a = Amat; b = NULL; } 7670 else { 7671 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data; 7672 a = d->A; b = d->B; 7673 } 7674 PetscCall(PetscInfo(Amat,"New bs>1 Graph. nloc=%" PetscInt_FMT "\n",nloc)); 7675 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7676 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7677 PetscInt *nnz = (c==a) ? d_nnz : o_nnz, nmax=0; 7678 const PetscInt *cols; 7679 for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows 7680 PetscCall(MatGetRow(c,brow,&jj,&cols,NULL)); 7681 nnz[brow/bs] = jj/bs; 7682 if (jj%bs) ok = 0; 7683 if (cols) j0 = cols[0]; 7684 else j0 = -1; 7685 PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL)); 7686 if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs]; 7687 for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks 7688 PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL)); 7689 if (jj%bs) ok = 0; 7690 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7691 if (nnz[brow/bs] != jj/bs) ok = 0; 7692 PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL)); 7693 } 7694 if (!ok) { 7695 PetscCall(PetscFree2(d_nnz,o_nnz)); 7696 goto old_bs; 7697 } 7698 } 7699 PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax); 7700 } 7701 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7702 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7703 PetscCall(PetscFree2(d_nnz,o_nnz)); 7704 // diag 7705 for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows 7706 Mat_SeqAIJ *aseq = (Mat_SeqAIJ*)a->data; 7707 ai = aseq->i; 7708 n = ai[brow+1] - ai[brow]; 7709 aj = aseq->j + ai[brow]; 7710 for (int k=0; k<n; k += bs) { // block columns 7711 AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart) 7712 val = 0; 7713 for (int ii=0; ii<bs; ii++) { // rows in block 7714 aa = aseq->a + ai[brow+ii] + k; 7715 for (int jj=0; jj<bs; jj++) { // columns in block 7716 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7717 } 7718 } 7719 AA[k/bs] = val; 7720 } 7721 grow = Istart/bs + brow/bs; 7722 PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES)); 7723 } 7724 // off-diag 7725 if (ismpiaij) { 7726 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)Amat->data; 7727 const PetscScalar *vals; 7728 const PetscInt *cols, *garray = aij->garray; 7729 PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?"); 7730 for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows 7731 PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL)); 7732 for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) { 7733 AA[k/bs] = 0; 7734 AJ[cidx] = garray[cols[k]]/bs; 7735 } 7736 nc = ncols/bs; 7737 PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL)); 7738 for (int ii=0; ii<bs; ii++) { // rows in block 7739 PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals)); 7740 for (int k=0; k<ncols; k += bs) { 7741 for (int jj=0; jj<bs; jj++) { // cols in block 7742 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj])); 7743 } 7744 } 7745 PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals)); 7746 } 7747 grow = Istart/bs + brow/bs; 7748 PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES)); 7749 } 7750 } 7751 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7752 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7753 } else { 7754 const PetscScalar *vals; 7755 const PetscInt *idx; 7756 PetscInt *d_nnz, *o_nnz,*w0,*w1,*w2; 7757 old_bs: 7758 /* 7759 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7760 */ 7761 PetscCall(PetscInfo(Amat,"OLD bs>1 CreateGraph\n")); 7762 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7763 if (isseqaij) { 7764 PetscInt max_d_nnz; 7765 /* 7766 Determine exact preallocation count for (sequential) scalar matrix 7767 */ 7768 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz)); 7769 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7770 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7771 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) { 7772 PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7773 } 7774 PetscCall(PetscFree3(w0,w1,w2)); 7775 } else if (ismpiaij) { 7776 Mat Daij,Oaij; 7777 const PetscInt *garray; 7778 PetscInt max_d_nnz; 7779 PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray)); 7780 /* 7781 Determine exact preallocation count for diagonal block portion of scalar matrix 7782 */ 7783 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz)); 7784 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7785 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7786 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7787 PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7788 } 7789 PetscCall(PetscFree3(w0,w1,w2)); 7790 /* 7791 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7792 */ 7793 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7794 o_nnz[jj] = 0; 7795 for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */ 7796 PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7797 o_nnz[jj] += ncols; 7798 PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7799 } 7800 if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc; 7801 } 7802 } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type"); 7803 /* get scalar copy (norms) of matrix */ 7804 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7805 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7806 PetscCall(PetscFree2(d_nnz,o_nnz)); 7807 for (Ii = Istart; Ii < Iend; Ii++) { 7808 PetscInt dest_row = Ii/bs; 7809 PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals)); 7810 for (jj=0; jj<ncols; jj++) { 7811 PetscInt dest_col = idx[jj]/bs; 7812 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7813 PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES)); 7814 } 7815 PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals)); 7816 } 7817 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7818 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7819 } 7820 } else { 7821 /* TODO GPU: optimization proposal, each class provides fast implementation of this 7822 procedure via MatAbs API */ 7823 /* just copy scalar matrix & abs() */ 7824 PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7825 if (isseqaij) { a = Gmat; b = NULL; } 7826 else { 7827 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7828 a = d->A; b = d->B; 7829 } 7830 /* abs */ 7831 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7832 MatInfo info; 7833 PetscScalar *avals; 7834 PetscCall(MatGetInfo(c,MAT_LOCAL,&info)); 7835 PetscCall(MatSeqAIJGetArray(c,&avals)); 7836 for (int jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7837 PetscCall(MatSeqAIJRestoreArray(c,&avals)); 7838 } 7839 } 7840 if (symmetrize) { 7841 PetscBool issym; 7842 PetscCall(MatGetOption(Amat,MAT_SYMMETRIC,&issym)); 7843 if (!issym) { 7844 Mat matTrans; 7845 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7846 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7847 PetscCall(MatDestroy(&matTrans)); 7848 } 7849 PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE)); 7850 } else { 7851 PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7852 } 7853 if (scale) { 7854 /* scale c for all diagonal values = 1 or -1 */ 7855 Vec diag; 7856 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7857 PetscCall(MatGetDiagonal(Gmat, diag)); 7858 PetscCall(VecReciprocal(diag)); 7859 PetscCall(VecSqrtAbs(diag)); 7860 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7861 PetscCall(VecDestroy(&diag)); 7862 } 7863 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7864 *a_Gmat = Gmat; 7865 PetscFunctionReturn(0); 7866 } 7867 7868 /* -------------------------------------------------------------------------- */ 7869 /*@C 7870 MatFilter_AIJ - filter values with small absolute values 7871 With vfilter < 0 does nothing so should not be called. 7872 7873 Collective on Mat 7874 7875 Input Parameters: 7876 + Gmat - the graph 7877 . vfilter - threshold parameter [0,1) 7878 7879 Output Parameter: 7880 . filteredG - output filtered scalar graph 7881 7882 Level: developer 7883 7884 Notes: 7885 This is called before graph coarsers are called. 7886 This could go into Mat, move 'symm' to GAMG 7887 7888 .seealso: `PCGAMGSetThreshold()` 7889 @*/ 7890 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat,PetscReal vfilter, Mat *filteredG) 7891 { 7892 PetscInt Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc; 7893 Mat tGmat; 7894 MPI_Comm comm; 7895 const PetscScalar *vals; 7896 const PetscInt *idx; 7897 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols=0; 7898 MatScalar *AA; // this is checked in graph 7899 PetscBool isseqaij; 7900 Mat a, b, c; 7901 MatType jtype; 7902 7903 PetscFunctionBegin; 7904 PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm)); 7905 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij)); 7906 PetscCall(MatGetType(Gmat,&jtype)); 7907 PetscCall(MatCreate(comm, &tGmat)); 7908 PetscCall(MatSetType(tGmat, jtype)); 7909 7910 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7911 Also, if the matrix is symmetric, can we skip this 7912 operation? It can be very expensive on large matrices. */ 7913 7914 // global sizes 7915 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7916 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7917 nloc = Iend - Istart; 7918 PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz)); 7919 if (isseqaij) { a = Gmat; b = NULL; } 7920 else { 7921 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7922 a = d->A; b = d->B; 7923 garray = d->garray; 7924 } 7925 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7926 for (PetscInt row=0; row < nloc; row++) { 7927 PetscCall(MatGetRow(a,row,&ncols,NULL,NULL)); 7928 d_nnz[row] = ncols; 7929 if (ncols>maxcols) maxcols=ncols; 7930 PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL)); 7931 } 7932 if (b) { 7933 for (PetscInt row=0; row < nloc; row++) { 7934 PetscCall(MatGetRow(b,row,&ncols,NULL,NULL)); 7935 o_nnz[row] = ncols; 7936 if (ncols>maxcols) maxcols=ncols; 7937 PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL)); 7938 } 7939 } 7940 PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM)); 7941 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7942 PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz)); 7943 PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz)); 7944 PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 7945 PetscCall(PetscFree2(d_nnz,o_nnz)); 7946 // 7947 PetscCall(PetscMalloc2(maxcols, &AA,maxcols, &AJ)); 7948 nnz0 = nnz1 = 0; 7949 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7950 for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) { 7951 PetscCall(MatGetRow(c,row,&ncols,&idx,&vals)); 7952 for (ncol_row=jj=0; jj<ncols; jj++,nnz0++) { 7953 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7954 if (PetscRealPart(sv) > vfilter) { 7955 nnz1++; 7956 PetscInt cid = idx[jj] + Istart; //diag 7957 if (c!=a) cid = garray[idx[jj]]; 7958 AA[ncol_row] = vals[jj]; 7959 AJ[ncol_row] = cid; 7960 ncol_row++; 7961 } 7962 } 7963 PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals)); 7964 PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES)); 7965 } 7966 } 7967 PetscCall(PetscFree2(AA,AJ)); 7968 PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY)); 7969 PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY)); 7970 PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */ 7971 7972 PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", 7973 (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter, 7974 (!nloc) ? 1. : (double)nnz0/(double)nloc,MM,(int)maxcols)); 7975 7976 *filteredG = tGmat; 7977 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7978 PetscFunctionReturn(0); 7979 } 7980 7981 /* 7982 Special version for direct calls from Fortran 7983 */ 7984 #include <petsc/private/fortranimpl.h> 7985 7986 /* Change these macros so can be used in void function */ 7987 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7988 #undef PetscCall 7989 #define PetscCall(...) do { \ 7990 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7991 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7992 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7993 return; \ 7994 } \ 7995 } while (0) 7996 7997 #undef SETERRQ 7998 #define SETERRQ(comm,ierr,...) do { \ 7999 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 8000 return; \ 8001 } while (0) 8002 8003 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8004 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8005 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8006 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8007 #else 8008 #endif 8009 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 8010 { 8011 Mat mat = *mmat; 8012 PetscInt m = *mm, n = *mn; 8013 InsertMode addv = *maddv; 8014 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 8015 PetscScalar value; 8016 8017 MatCheckPreallocated(mat,1); 8018 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8019 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 8020 { 8021 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 8022 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 8023 PetscBool roworiented = aij->roworiented; 8024 8025 /* Some Variables required in the macro */ 8026 Mat A = aij->A; 8027 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 8028 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 8029 MatScalar *aa; 8030 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8031 Mat B = aij->B; 8032 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 8033 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 8034 MatScalar *ba; 8035 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8036 * cannot use "#if defined" inside a macro. */ 8037 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8038 8039 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 8040 PetscInt nonew = a->nonew; 8041 MatScalar *ap1,*ap2; 8042 8043 PetscFunctionBegin; 8044 PetscCall(MatSeqAIJGetArray(A,&aa)); 8045 PetscCall(MatSeqAIJGetArray(B,&ba)); 8046 for (i=0; i<m; i++) { 8047 if (im[i] < 0) continue; 8048 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 8049 if (im[i] >= rstart && im[i] < rend) { 8050 row = im[i] - rstart; 8051 lastcol1 = -1; 8052 rp1 = aj + ai[row]; 8053 ap1 = aa + ai[row]; 8054 rmax1 = aimax[row]; 8055 nrow1 = ailen[row]; 8056 low1 = 0; 8057 high1 = nrow1; 8058 lastcol2 = -1; 8059 rp2 = bj + bi[row]; 8060 ap2 = ba + bi[row]; 8061 rmax2 = bimax[row]; 8062 nrow2 = bilen[row]; 8063 low2 = 0; 8064 high2 = nrow2; 8065 8066 for (j=0; j<n; j++) { 8067 if (roworiented) value = v[i*n+j]; 8068 else value = v[i+j*m]; 8069 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8070 if (in[j] >= cstart && in[j] < cend) { 8071 col = in[j] - cstart; 8072 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 8073 } else if (in[j] < 0) continue; 8074 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8075 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 8076 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 8077 } else { 8078 if (mat->was_assembled) { 8079 if (!aij->colmap) { 8080 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8081 } 8082 #if defined(PETSC_USE_CTABLE) 8083 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 8084 col--; 8085 #else 8086 col = aij->colmap[in[j]] - 1; 8087 #endif 8088 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 8089 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8090 col = in[j]; 8091 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8092 B = aij->B; 8093 b = (Mat_SeqAIJ*)B->data; 8094 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 8095 rp2 = bj + bi[row]; 8096 ap2 = ba + bi[row]; 8097 rmax2 = bimax[row]; 8098 nrow2 = bilen[row]; 8099 low2 = 0; 8100 high2 = nrow2; 8101 bm = aij->B->rmap->n; 8102 ba = b->a; 8103 inserted = PETSC_FALSE; 8104 } 8105 } else col = in[j]; 8106 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 8107 } 8108 } 8109 } else if (!aij->donotstash) { 8110 if (roworiented) { 8111 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8112 } else { 8113 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8114 } 8115 } 8116 } 8117 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 8118 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 8119 } 8120 PetscFunctionReturnVoid(); 8121 } 8122 8123 /* Undefining these here since they were redefined from their original definition above! No 8124 * other PETSc functions should be defined past this point, as it is impossible to recover the 8125 * original definitions */ 8126 #undef PetscCall 8127 #undef SETERRQ 8128