1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B)); 15 PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B)); 16 PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 17 PetscFunctionReturn(0); 18 } 19 20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 21 { 22 Mat B; 23 24 PetscFunctionBegin; 25 PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B)); 26 PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 27 PetscCall(MatDestroy(&B)); 28 PetscFunctionReturn(0); 29 } 30 31 /*MC 32 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 33 34 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 35 and MATMPIAIJ otherwise. As a result, for single process communicators, 36 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 37 for communicators controlling multiple processes. It is recommended that you call both of 38 the above preallocation routines for simplicity. 39 40 Options Database Keys: 41 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 42 43 Developer Notes: 44 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 45 enough exist. 46 47 Level: beginner 48 49 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 50 M*/ 51 52 /*MC 53 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 54 55 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 56 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 57 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 58 for communicators controlling multiple processes. It is recommended that you call both of 59 the above preallocation routines for simplicity. 60 61 Options Database Keys: 62 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 63 64 Level: beginner 65 66 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 67 M*/ 68 69 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 70 { 71 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 72 73 PetscFunctionBegin; 74 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 75 A->boundtocpu = flg; 76 #endif 77 if (a->A) PetscCall(MatBindToCPU(a->A,flg)); 78 if (a->B) PetscCall(MatBindToCPU(a->B,flg)); 79 80 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 81 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 82 * to differ from the parent matrix. */ 83 if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg)); 84 if (a->diag) PetscCall(VecBindToCPU(a->diag,flg)); 85 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 90 { 91 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 92 93 PetscFunctionBegin; 94 if (mat->A) { 95 PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 96 PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 97 } 98 PetscFunctionReturn(0); 99 } 100 101 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 102 { 103 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 104 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 105 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 106 const PetscInt *ia,*ib; 107 const MatScalar *aa,*bb,*aav,*bav; 108 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 109 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 110 111 PetscFunctionBegin; 112 *keptrows = NULL; 113 114 ia = a->i; 115 ib = b->i; 116 PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 117 PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 118 for (i=0; i<m; i++) { 119 na = ia[i+1] - ia[i]; 120 nb = ib[i+1] - ib[i]; 121 if (!na && !nb) { 122 cnt++; 123 goto ok1; 124 } 125 aa = aav + ia[i]; 126 for (j=0; j<na; j++) { 127 if (aa[j] != 0.0) goto ok1; 128 } 129 bb = bav + ib[i]; 130 for (j=0; j <nb; j++) { 131 if (bb[j] != 0.0) goto ok1; 132 } 133 cnt++; 134 ok1:; 135 } 136 PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 137 if (!n0rows) { 138 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 139 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 140 PetscFunctionReturn(0); 141 } 142 PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 143 cnt = 0; 144 for (i=0; i<m; i++) { 145 na = ia[i+1] - ia[i]; 146 nb = ib[i+1] - ib[i]; 147 if (!na && !nb) continue; 148 aa = aav + ia[i]; 149 for (j=0; j<na;j++) { 150 if (aa[j] != 0.0) { 151 rows[cnt++] = rstart + i; 152 goto ok2; 153 } 154 } 155 bb = bav + ib[i]; 156 for (j=0; j<nb; j++) { 157 if (bb[j] != 0.0) { 158 rows[cnt++] = rstart + i; 159 goto ok2; 160 } 161 } 162 ok2:; 163 } 164 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 165 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 166 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 167 PetscFunctionReturn(0); 168 } 169 170 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 171 { 172 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 173 PetscBool cong; 174 175 PetscFunctionBegin; 176 PetscCall(MatHasCongruentLayouts(Y,&cong)); 177 if (Y->assembled && cong) { 178 PetscCall(MatDiagonalSet(aij->A,D,is)); 179 } else { 180 PetscCall(MatDiagonalSet_Default(Y,D,is)); 181 } 182 PetscFunctionReturn(0); 183 } 184 185 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 188 PetscInt i,rstart,nrows,*rows; 189 190 PetscFunctionBegin; 191 *zrows = NULL; 192 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 193 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 194 for (i=0; i<nrows; i++) rows[i] += rstart; 195 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 200 { 201 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 202 PetscInt i,m,n,*garray = aij->garray; 203 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 204 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 205 PetscReal *work; 206 const PetscScalar *dummy; 207 208 PetscFunctionBegin; 209 PetscCall(MatGetSize(A,&m,&n)); 210 PetscCall(PetscCalloc1(n,&work)); 211 PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 212 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 213 PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 214 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 215 if (type == NORM_2) { 216 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 217 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 218 } 219 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 220 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 221 } 222 } else if (type == NORM_1) { 223 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 224 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 225 } 226 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 227 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 228 } 229 } else if (type == NORM_INFINITY) { 230 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 231 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 232 } 233 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 234 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 235 } 236 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 237 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 238 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 239 } 240 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 241 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 242 } 243 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 244 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 245 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 246 } 247 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 248 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 249 } 250 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 251 if (type == NORM_INFINITY) { 252 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 253 } else { 254 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 255 } 256 PetscCall(PetscFree(work)); 257 if (type == NORM_2) { 258 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 259 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 260 for (i=0; i<n; i++) reductions[i] /= m; 261 } 262 PetscFunctionReturn(0); 263 } 264 265 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 266 { 267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 268 IS sis,gis; 269 const PetscInt *isis,*igis; 270 PetscInt n,*iis,nsis,ngis,rstart,i; 271 272 PetscFunctionBegin; 273 PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 274 PetscCall(MatFindNonzeroRows(a->B,&gis)); 275 PetscCall(ISGetSize(gis,&ngis)); 276 PetscCall(ISGetSize(sis,&nsis)); 277 PetscCall(ISGetIndices(sis,&isis)); 278 PetscCall(ISGetIndices(gis,&igis)); 279 280 PetscCall(PetscMalloc1(ngis+nsis,&iis)); 281 PetscCall(PetscArraycpy(iis,igis,ngis)); 282 PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 283 n = ngis + nsis; 284 PetscCall(PetscSortRemoveDupsInt(&n,iis)); 285 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 286 for (i=0; i<n; i++) iis[i] += rstart; 287 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 288 289 PetscCall(ISRestoreIndices(sis,&isis)); 290 PetscCall(ISRestoreIndices(gis,&igis)); 291 PetscCall(ISDestroy(&sis)); 292 PetscCall(ISDestroy(&gis)); 293 PetscFunctionReturn(0); 294 } 295 296 /* 297 Local utility routine that creates a mapping from the global column 298 number to the local number in the off-diagonal part of the local 299 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 300 a slightly higher hash table cost; without it it is not scalable (each processor 301 has an order N integer array but is fast to access. 302 */ 303 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscInt n = aij->B->cmap->n,i; 307 308 PetscFunctionBegin; 309 PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 310 #if defined(PETSC_USE_CTABLE) 311 PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 312 for (i=0; i<n; i++) { 313 PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 314 } 315 #else 316 PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 317 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 318 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 319 #endif 320 PetscFunctionReturn(0); 321 } 322 323 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 324 { \ 325 if (col <= lastcol1) low1 = 0; \ 326 else high1 = nrow1; \ 327 lastcol1 = col;\ 328 while (high1-low1 > 5) { \ 329 t = (low1+high1)/2; \ 330 if (rp1[t] > col) high1 = t; \ 331 else low1 = t; \ 332 } \ 333 for (_i=low1; _i<high1; _i++) { \ 334 if (rp1[_i] > col) break; \ 335 if (rp1[_i] == col) { \ 336 if (addv == ADD_VALUES) { \ 337 ap1[_i] += value; \ 338 /* Not sure LogFlops will slow dow the code or not */ \ 339 (void)PetscLogFlops(1.0); \ 340 } \ 341 else ap1[_i] = value; \ 342 goto a_noinsert; \ 343 } \ 344 } \ 345 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 346 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 347 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 348 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 349 N = nrow1++ - 1; a->nz++; high1++; \ 350 /* shift up all the later entries in this row */ \ 351 PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 352 PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 353 rp1[_i] = col; \ 354 ap1[_i] = value; \ 355 A->nonzerostate++;\ 356 a_noinsert: ; \ 357 ailen[row] = nrow1; \ 358 } 359 360 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 361 { \ 362 if (col <= lastcol2) low2 = 0; \ 363 else high2 = nrow2; \ 364 lastcol2 = col; \ 365 while (high2-low2 > 5) { \ 366 t = (low2+high2)/2; \ 367 if (rp2[t] > col) high2 = t; \ 368 else low2 = t; \ 369 } \ 370 for (_i=low2; _i<high2; _i++) { \ 371 if (rp2[_i] > col) break; \ 372 if (rp2[_i] == col) { \ 373 if (addv == ADD_VALUES) { \ 374 ap2[_i] += value; \ 375 (void)PetscLogFlops(1.0); \ 376 } \ 377 else ap2[_i] = value; \ 378 goto b_noinsert; \ 379 } \ 380 } \ 381 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 382 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 383 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 384 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 385 N = nrow2++ - 1; b->nz++; high2++; \ 386 /* shift up all the later entries in this row */ \ 387 PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 388 PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 389 rp2[_i] = col; \ 390 ap2[_i] = value; \ 391 B->nonzerostate++; \ 392 b_noinsert: ; \ 393 bilen[row] = nrow2; \ 394 } 395 396 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 397 { 398 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 399 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 400 PetscInt l,*garray = mat->garray,diag; 401 PetscScalar *aa,*ba; 402 403 PetscFunctionBegin; 404 /* code only works for square matrices A */ 405 406 /* find size of row to the left of the diagonal part */ 407 PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 408 row = row - diag; 409 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 410 if (garray[b->j[b->i[row]+l]] > diag) break; 411 } 412 if (l) { 413 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 414 PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 415 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 416 } 417 418 /* diagonal part */ 419 if (a->i[row+1]-a->i[row]) { 420 PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 421 PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 422 PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 423 } 424 425 /* right of diagonal part */ 426 if (b->i[row+1]-b->i[row]-l) { 427 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 428 PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 429 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 430 } 431 PetscFunctionReturn(0); 432 } 433 434 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 435 { 436 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 437 PetscScalar value = 0.0; 438 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 439 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 440 PetscBool roworiented = aij->roworiented; 441 442 /* Some Variables required in the macro */ 443 Mat A = aij->A; 444 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 445 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 446 PetscBool ignorezeroentries = a->ignorezeroentries; 447 Mat B = aij->B; 448 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 449 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 450 MatScalar *aa,*ba; 451 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 452 PetscInt nonew; 453 MatScalar *ap1,*ap2; 454 455 PetscFunctionBegin; 456 PetscCall(MatSeqAIJGetArray(A,&aa)); 457 PetscCall(MatSeqAIJGetArray(B,&ba)); 458 for (i=0; i<m; i++) { 459 if (im[i] < 0) continue; 460 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 461 if (im[i] >= rstart && im[i] < rend) { 462 row = im[i] - rstart; 463 lastcol1 = -1; 464 rp1 = aj + ai[row]; 465 ap1 = aa + ai[row]; 466 rmax1 = aimax[row]; 467 nrow1 = ailen[row]; 468 low1 = 0; 469 high1 = nrow1; 470 lastcol2 = -1; 471 rp2 = bj + bi[row]; 472 ap2 = ba + bi[row]; 473 rmax2 = bimax[row]; 474 nrow2 = bilen[row]; 475 low2 = 0; 476 high2 = nrow2; 477 478 for (j=0; j<n; j++) { 479 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 480 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 481 if (in[j] >= cstart && in[j] < cend) { 482 col = in[j] - cstart; 483 nonew = a->nonew; 484 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 485 } else if (in[j] < 0) { 486 continue; 487 } else { 488 PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 489 if (mat->was_assembled) { 490 if (!aij->colmap) { 491 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 492 } 493 #if defined(PETSC_USE_CTABLE) 494 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 495 col--; 496 #else 497 col = aij->colmap[in[j]] - 1; 498 #endif 499 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 500 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 501 col = in[j]; 502 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 503 B = aij->B; 504 b = (Mat_SeqAIJ*)B->data; 505 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 506 rp2 = bj + bi[row]; 507 ap2 = ba + bi[row]; 508 rmax2 = bimax[row]; 509 nrow2 = bilen[row]; 510 low2 = 0; 511 high2 = nrow2; 512 bm = aij->B->rmap->n; 513 ba = b->a; 514 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 515 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 516 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 517 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 518 } 519 } else col = in[j]; 520 nonew = b->nonew; 521 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 522 } 523 } 524 } else { 525 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 526 if (!aij->donotstash) { 527 mat->assembled = PETSC_FALSE; 528 if (roworiented) { 529 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 530 } else { 531 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 532 } 533 } 534 } 535 } 536 PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 537 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 538 PetscFunctionReturn(0); 539 } 540 541 /* 542 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 543 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 544 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 545 */ 546 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 547 { 548 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 549 Mat A = aij->A; /* diagonal part of the matrix */ 550 Mat B = aij->B; /* offdiagonal part of the matrix */ 551 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 552 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 553 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 554 PetscInt *ailen = a->ilen,*aj = a->j; 555 PetscInt *bilen = b->ilen,*bj = b->j; 556 PetscInt am = aij->A->rmap->n,j; 557 PetscInt diag_so_far = 0,dnz; 558 PetscInt offd_so_far = 0,onz; 559 560 PetscFunctionBegin; 561 /* Iterate over all rows of the matrix */ 562 for (j=0; j<am; j++) { 563 dnz = onz = 0; 564 /* Iterate over all non-zero columns of the current row */ 565 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 566 /* If column is in the diagonal */ 567 if (mat_j[col] >= cstart && mat_j[col] < cend) { 568 aj[diag_so_far++] = mat_j[col] - cstart; 569 dnz++; 570 } else { /* off-diagonal entries */ 571 bj[offd_so_far++] = mat_j[col]; 572 onz++; 573 } 574 } 575 ailen[j] = dnz; 576 bilen[j] = onz; 577 } 578 PetscFunctionReturn(0); 579 } 580 581 /* 582 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 583 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 584 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 585 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 586 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 587 */ 588 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 589 { 590 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 591 Mat A = aij->A; /* diagonal part of the matrix */ 592 Mat B = aij->B; /* offdiagonal part of the matrix */ 593 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 594 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 595 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 596 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 597 PetscInt *ailen = a->ilen,*aj = a->j; 598 PetscInt *bilen = b->ilen,*bj = b->j; 599 PetscInt am = aij->A->rmap->n,j; 600 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 601 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 602 PetscScalar *aa = a->a,*ba = b->a; 603 604 PetscFunctionBegin; 605 /* Iterate over all rows of the matrix */ 606 for (j=0; j<am; j++) { 607 dnz_row = onz_row = 0; 608 rowstart_offd = full_offd_i[j]; 609 rowstart_diag = full_diag_i[j]; 610 /* Iterate over all non-zero columns of the current row */ 611 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 612 /* If column is in the diagonal */ 613 if (mat_j[col] >= cstart && mat_j[col] < cend) { 614 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 615 aa[rowstart_diag+dnz_row] = mat_a[col]; 616 dnz_row++; 617 } else { /* off-diagonal entries */ 618 bj[rowstart_offd+onz_row] = mat_j[col]; 619 ba[rowstart_offd+onz_row] = mat_a[col]; 620 onz_row++; 621 } 622 } 623 ailen[j] = dnz_row; 624 bilen[j] = onz_row; 625 } 626 PetscFunctionReturn(0); 627 } 628 629 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* negative row */ 638 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= rstart && idxm[i] < rend) { 640 row = idxm[i] - rstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* negative column */ 643 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= cstart && idxn[j] < cend) { 645 col = idxn[j] - cstart; 646 PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 647 } else { 648 if (!aij->colmap) { 649 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 653 col--; 654 #else 655 col = aij->colmap[idxn[j]] - 1; 656 #endif 657 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 658 else { 659 PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 660 } 661 } 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 671 PetscInt nstash,reallocs; 672 673 PetscFunctionBegin; 674 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 675 676 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 677 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 678 PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 679 PetscFunctionReturn(0); 680 } 681 682 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 PetscMPIInt n; 686 PetscInt i,j,rstart,ncols,flg; 687 PetscInt *row,*col; 688 PetscBool other_disassembled; 689 PetscScalar *val; 690 691 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 692 693 PetscFunctionBegin; 694 if (!aij->donotstash && !mat->nooffprocentries) { 695 while (1) { 696 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 697 if (!flg) break; 698 699 for (i=0; i<n;) { 700 /* Now identify the consecutive vals belonging to the same row */ 701 for (j=i,rstart=row[j]; j<n; j++) { 702 if (row[j] != rstart) break; 703 } 704 if (j < n) ncols = j-i; 705 else ncols = n-i; 706 /* Now assemble all these values with a single function call */ 707 PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 708 i = j; 709 } 710 } 711 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 712 } 713 #if defined(PETSC_HAVE_DEVICE) 714 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 715 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 716 if (mat->boundtocpu) { 717 PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 718 PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 719 } 720 #endif 721 PetscCall(MatAssemblyBegin(aij->A,mode)); 722 PetscCall(MatAssemblyEnd(aij->A,mode)); 723 724 /* determine if any processor has disassembled, if so we must 725 also disassemble ourself, in order that we may reassemble. */ 726 /* 727 if nonzero structure of submatrix B cannot change then we know that 728 no processor disassembled thus we can skip this stuff 729 */ 730 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 731 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 732 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 733 PetscCall(MatDisAssemble_MPIAIJ(mat)); 734 } 735 } 736 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 737 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 738 } 739 PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 740 #if defined(PETSC_HAVE_DEVICE) 741 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 742 #endif 743 PetscCall(MatAssemblyBegin(aij->B,mode)); 744 PetscCall(MatAssemblyEnd(aij->B,mode)); 745 746 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 747 748 aij->rowvalues = NULL; 749 750 PetscCall(VecDestroy(&aij->diag)); 751 752 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 753 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 754 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 755 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 756 } 757 #if defined(PETSC_HAVE_DEVICE) 758 mat->offloadmask = PETSC_OFFLOAD_BOTH; 759 #endif 760 PetscFunctionReturn(0); 761 } 762 763 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 764 { 765 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 766 767 PetscFunctionBegin; 768 PetscCall(MatZeroEntries(l->A)); 769 PetscCall(MatZeroEntries(l->B)); 770 PetscFunctionReturn(0); 771 } 772 773 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 774 { 775 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 776 PetscObjectState sA, sB; 777 PetscInt *lrows; 778 PetscInt r, len; 779 PetscBool cong, lch, gch; 780 781 PetscFunctionBegin; 782 /* get locally owned rows */ 783 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 784 PetscCall(MatHasCongruentLayouts(A,&cong)); 785 /* fix right hand side if needed */ 786 if (x && b) { 787 const PetscScalar *xx; 788 PetscScalar *bb; 789 790 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 791 PetscCall(VecGetArrayRead(x, &xx)); 792 PetscCall(VecGetArray(b, &bb)); 793 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 794 PetscCall(VecRestoreArrayRead(x, &xx)); 795 PetscCall(VecRestoreArray(b, &bb)); 796 } 797 798 sA = mat->A->nonzerostate; 799 sB = mat->B->nonzerostate; 800 801 if (diag != 0.0 && cong) { 802 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 803 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 804 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 805 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 806 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 807 PetscInt nnwA, nnwB; 808 PetscBool nnzA, nnzB; 809 810 nnwA = aijA->nonew; 811 nnwB = aijB->nonew; 812 nnzA = aijA->keepnonzeropattern; 813 nnzB = aijB->keepnonzeropattern; 814 if (!nnzA) { 815 PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 816 aijA->nonew = 0; 817 } 818 if (!nnzB) { 819 PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 820 aijB->nonew = 0; 821 } 822 /* Must zero here before the next loop */ 823 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 824 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 825 for (r = 0; r < len; ++r) { 826 const PetscInt row = lrows[r] + A->rmap->rstart; 827 if (row >= A->cmap->N) continue; 828 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 829 } 830 aijA->nonew = nnwA; 831 aijB->nonew = nnwB; 832 } else { 833 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 834 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 835 } 836 PetscCall(PetscFree(lrows)); 837 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 838 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 839 840 /* reduce nonzerostate */ 841 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 842 PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 843 if (gch) A->nonzerostate++; 844 PetscFunctionReturn(0); 845 } 846 847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 848 { 849 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 850 PetscMPIInt n = A->rmap->n; 851 PetscInt i,j,r,m,len = 0; 852 PetscInt *lrows,*owners = A->rmap->range; 853 PetscMPIInt p = 0; 854 PetscSFNode *rrows; 855 PetscSF sf; 856 const PetscScalar *xx; 857 PetscScalar *bb,*mask,*aij_a; 858 Vec xmask,lmask; 859 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 860 const PetscInt *aj, *ii,*ridx; 861 PetscScalar *aa; 862 863 PetscFunctionBegin; 864 /* Create SF where leaves are input rows and roots are owned rows */ 865 PetscCall(PetscMalloc1(n, &lrows)); 866 for (r = 0; r < n; ++r) lrows[r] = -1; 867 PetscCall(PetscMalloc1(N, &rrows)); 868 for (r = 0; r < N; ++r) { 869 const PetscInt idx = rows[r]; 870 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 871 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 872 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 873 } 874 rrows[r].rank = p; 875 rrows[r].index = rows[r] - owners[p]; 876 } 877 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 878 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 879 /* Collect flags for rows to be zeroed */ 880 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 881 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 882 PetscCall(PetscSFDestroy(&sf)); 883 /* Compress and put in row numbers */ 884 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 885 /* zero diagonal part of matrix */ 886 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 887 /* handle off diagonal part of matrix */ 888 PetscCall(MatCreateVecs(A,&xmask,NULL)); 889 PetscCall(VecDuplicate(l->lvec,&lmask)); 890 PetscCall(VecGetArray(xmask,&bb)); 891 for (i=0; i<len; i++) bb[lrows[i]] = 1; 892 PetscCall(VecRestoreArray(xmask,&bb)); 893 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 894 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 895 PetscCall(VecDestroy(&xmask)); 896 if (x && b) { /* this code is buggy when the row and column layout don't match */ 897 PetscBool cong; 898 899 PetscCall(MatHasCongruentLayouts(A,&cong)); 900 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 901 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 902 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 903 PetscCall(VecGetArrayRead(l->lvec,&xx)); 904 PetscCall(VecGetArray(b,&bb)); 905 } 906 PetscCall(VecGetArray(lmask,&mask)); 907 /* remove zeroed rows of off diagonal matrix */ 908 PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 909 ii = aij->i; 910 for (i=0; i<len; i++) { 911 PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 912 } 913 /* loop over all elements of off process part of matrix zeroing removed columns*/ 914 if (aij->compressedrow.use) { 915 m = aij->compressedrow.nrows; 916 ii = aij->compressedrow.i; 917 ridx = aij->compressedrow.rindex; 918 for (i=0; i<m; i++) { 919 n = ii[i+1] - ii[i]; 920 aj = aij->j + ii[i]; 921 aa = aij_a + ii[i]; 922 923 for (j=0; j<n; j++) { 924 if (PetscAbsScalar(mask[*aj])) { 925 if (b) bb[*ridx] -= *aa*xx[*aj]; 926 *aa = 0.0; 927 } 928 aa++; 929 aj++; 930 } 931 ridx++; 932 } 933 } else { /* do not use compressed row format */ 934 m = l->B->rmap->n; 935 for (i=0; i<m; i++) { 936 n = ii[i+1] - ii[i]; 937 aj = aij->j + ii[i]; 938 aa = aij_a + ii[i]; 939 for (j=0; j<n; j++) { 940 if (PetscAbsScalar(mask[*aj])) { 941 if (b) bb[i] -= *aa*xx[*aj]; 942 *aa = 0.0; 943 } 944 aa++; 945 aj++; 946 } 947 } 948 } 949 if (x && b) { 950 PetscCall(VecRestoreArray(b,&bb)); 951 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 952 } 953 PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 954 PetscCall(VecRestoreArray(lmask,&mask)); 955 PetscCall(VecDestroy(&lmask)); 956 PetscCall(PetscFree(lrows)); 957 958 /* only change matrix nonzero state if pattern was allowed to be changed */ 959 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 960 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 961 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 962 } 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 967 { 968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 969 PetscInt nt; 970 VecScatter Mvctx = a->Mvctx; 971 972 PetscFunctionBegin; 973 PetscCall(VecGetLocalSize(xx,&nt)); 974 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 975 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 976 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 977 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 978 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 979 PetscFunctionReturn(0); 980 } 981 982 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 985 986 PetscFunctionBegin; 987 PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 988 PetscFunctionReturn(0); 989 } 990 991 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 992 { 993 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 994 VecScatter Mvctx = a->Mvctx; 995 996 PetscFunctionBegin; 997 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 998 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 999 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1000 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1005 { 1006 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1007 1008 PetscFunctionBegin; 1009 /* do nondiagonal part */ 1010 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1011 /* do local part */ 1012 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1013 /* add partial results together */ 1014 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1015 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1020 { 1021 MPI_Comm comm; 1022 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1023 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1024 IS Me,Notme; 1025 PetscInt M,N,first,last,*notme,i; 1026 PetscBool lf; 1027 PetscMPIInt size; 1028 1029 PetscFunctionBegin; 1030 /* Easy test: symmetric diagonal block */ 1031 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1032 PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 1033 PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1034 if (!*f) PetscFunctionReturn(0); 1035 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 1036 PetscCallMPI(MPI_Comm_size(comm,&size)); 1037 if (size == 1) PetscFunctionReturn(0); 1038 1039 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1040 PetscCall(MatGetSize(Amat,&M,&N)); 1041 PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 1042 PetscCall(PetscMalloc1(N-last+first,¬me)); 1043 for (i=0; i<first; i++) notme[i] = i; 1044 for (i=last; i<M; i++) notme[i-last+first] = i; 1045 PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 1046 PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 1047 PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 1048 Aoff = Aoffs[0]; 1049 PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 1050 Boff = Boffs[0]; 1051 PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 1052 PetscCall(MatDestroyMatrices(1,&Aoffs)); 1053 PetscCall(MatDestroyMatrices(1,&Boffs)); 1054 PetscCall(ISDestroy(&Me)); 1055 PetscCall(ISDestroy(&Notme)); 1056 PetscCall(PetscFree(notme)); 1057 PetscFunctionReturn(0); 1058 } 1059 1060 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1061 { 1062 PetscFunctionBegin; 1063 PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 1071 PetscFunctionBegin; 1072 /* do nondiagonal part */ 1073 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1074 /* do local part */ 1075 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1076 /* add partial results together */ 1077 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1078 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 /* 1083 This only works correctly for square matrices where the subblock A->A is the 1084 diagonal block 1085 */ 1086 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1087 { 1088 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1089 1090 PetscFunctionBegin; 1091 PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1092 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1093 PetscCall(MatGetDiagonal(a->A,v)); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1098 { 1099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1100 1101 PetscFunctionBegin; 1102 PetscCall(MatScale(a->A,aa)); 1103 PetscCall(MatScale(a->B,aa)); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1108 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1109 { 1110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1111 1112 PetscFunctionBegin; 1113 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1114 PetscCall(PetscFree(aij->Aperm1)); 1115 PetscCall(PetscFree(aij->Bperm1)); 1116 PetscCall(PetscFree(aij->Ajmap1)); 1117 PetscCall(PetscFree(aij->Bjmap1)); 1118 1119 PetscCall(PetscFree(aij->Aimap2)); 1120 PetscCall(PetscFree(aij->Bimap2)); 1121 PetscCall(PetscFree(aij->Aperm2)); 1122 PetscCall(PetscFree(aij->Bperm2)); 1123 PetscCall(PetscFree(aij->Ajmap2)); 1124 PetscCall(PetscFree(aij->Bjmap2)); 1125 1126 PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 1127 PetscCall(PetscFree(aij->Cperm1)); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1132 { 1133 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1134 1135 PetscFunctionBegin; 1136 #if defined(PETSC_USE_LOG) 1137 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1138 #endif 1139 PetscCall(MatStashDestroy_Private(&mat->stash)); 1140 PetscCall(VecDestroy(&aij->diag)); 1141 PetscCall(MatDestroy(&aij->A)); 1142 PetscCall(MatDestroy(&aij->B)); 1143 #if defined(PETSC_USE_CTABLE) 1144 PetscCall(PetscTableDestroy(&aij->colmap)); 1145 #else 1146 PetscCall(PetscFree(aij->colmap)); 1147 #endif 1148 PetscCall(PetscFree(aij->garray)); 1149 PetscCall(VecDestroy(&aij->lvec)); 1150 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1151 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 1152 PetscCall(PetscFree(aij->ld)); 1153 1154 /* Free COO */ 1155 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1156 1157 PetscCall(PetscFree(mat->data)); 1158 1159 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1160 PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 1161 1162 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1164 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 1166 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 1167 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 1169 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 1171 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 1172 #if defined(PETSC_HAVE_CUDA) 1173 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 1174 #endif 1175 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1176 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 1177 #endif 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 1179 #if defined(PETSC_HAVE_ELEMENTAL) 1180 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 1181 #endif 1182 #if defined(PETSC_HAVE_SCALAPACK) 1183 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1184 #endif 1185 #if defined(PETSC_HAVE_HYPRE) 1186 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 1187 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 1188 #endif 1189 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1190 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 1192 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 1195 #if defined(PETSC_HAVE_MKL_SPARSE) 1196 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 1197 #endif 1198 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 1199 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1200 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 1201 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 1202 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1207 { 1208 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1209 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1210 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1211 const PetscInt *garray = aij->garray; 1212 const PetscScalar *aa,*ba; 1213 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1214 PetscInt *rowlens; 1215 PetscInt *colidxs; 1216 PetscScalar *matvals; 1217 1218 PetscFunctionBegin; 1219 PetscCall(PetscViewerSetUp(viewer)); 1220 1221 M = mat->rmap->N; 1222 N = mat->cmap->N; 1223 m = mat->rmap->n; 1224 rs = mat->rmap->rstart; 1225 cs = mat->cmap->rstart; 1226 nz = A->nz + B->nz; 1227 1228 /* write matrix header */ 1229 header[0] = MAT_FILE_CLASSID; 1230 header[1] = M; header[2] = N; header[3] = nz; 1231 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1232 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1233 1234 /* fill in and store row lengths */ 1235 PetscCall(PetscMalloc1(m,&rowlens)); 1236 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1237 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1238 PetscCall(PetscFree(rowlens)); 1239 1240 /* fill in and store column indices */ 1241 PetscCall(PetscMalloc1(nz,&colidxs)); 1242 for (cnt=0, i=0; i<m; i++) { 1243 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1244 if (garray[B->j[jb]] > cs) break; 1245 colidxs[cnt++] = garray[B->j[jb]]; 1246 } 1247 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1248 colidxs[cnt++] = A->j[ja] + cs; 1249 for (; jb<B->i[i+1]; jb++) 1250 colidxs[cnt++] = garray[B->j[jb]]; 1251 } 1252 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 1254 PetscCall(PetscFree(colidxs)); 1255 1256 /* fill in and store nonzero values */ 1257 PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 1258 PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 1259 PetscCall(PetscMalloc1(nz,&matvals)); 1260 for (cnt=0, i=0; i<m; i++) { 1261 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1262 if (garray[B->j[jb]] > cs) break; 1263 matvals[cnt++] = ba[jb]; 1264 } 1265 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1266 matvals[cnt++] = aa[ja]; 1267 for (; jb<B->i[i+1]; jb++) 1268 matvals[cnt++] = ba[jb]; 1269 } 1270 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 1271 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 1272 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1273 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 1274 PetscCall(PetscFree(matvals)); 1275 1276 /* write block size option to the viewer's .info file */ 1277 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #include <petscdraw.h> 1282 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1283 { 1284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1285 PetscMPIInt rank = aij->rank,size = aij->size; 1286 PetscBool isdraw,iascii,isbinary; 1287 PetscViewer sviewer; 1288 PetscViewerFormat format; 1289 1290 PetscFunctionBegin; 1291 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1292 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1293 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1294 if (iascii) { 1295 PetscCall(PetscViewerGetFormat(viewer,&format)); 1296 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1297 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1298 PetscCall(PetscMalloc1(size,&nz)); 1299 PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1300 for (i=0; i<(PetscInt)size; i++) { 1301 nmax = PetscMax(nmax,nz[i]); 1302 nmin = PetscMin(nmin,nz[i]); 1303 navg += nz[i]; 1304 } 1305 PetscCall(PetscFree(nz)); 1306 navg = navg/size; 1307 PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1308 PetscFunctionReturn(0); 1309 } 1310 PetscCall(PetscViewerGetFormat(viewer,&format)); 1311 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1312 MatInfo info; 1313 PetscInt *inodes=NULL; 1314 1315 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 1316 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 1317 PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 1318 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1319 if (!inodes) { 1320 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1321 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1322 } else { 1323 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1324 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1325 } 1326 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 1327 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1328 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 1329 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1330 PetscCall(PetscViewerFlush(viewer)); 1331 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1332 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 1333 PetscCall(VecScatterView(aij->Mvctx,viewer)); 1334 PetscFunctionReturn(0); 1335 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1336 PetscInt inodecount,inodelimit,*inodes; 1337 PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1338 if (inodes) { 1339 PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1340 } else { 1341 PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1342 } 1343 PetscFunctionReturn(0); 1344 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1345 PetscFunctionReturn(0); 1346 } 1347 } else if (isbinary) { 1348 if (size == 1) { 1349 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1350 PetscCall(MatView(aij->A,viewer)); 1351 } else { 1352 PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 1353 } 1354 PetscFunctionReturn(0); 1355 } else if (iascii && size == 1) { 1356 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1357 PetscCall(MatView(aij->A,viewer)); 1358 PetscFunctionReturn(0); 1359 } else if (isdraw) { 1360 PetscDraw draw; 1361 PetscBool isnull; 1362 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1363 PetscCall(PetscDrawIsNull(draw,&isnull)); 1364 if (isnull) PetscFunctionReturn(0); 1365 } 1366 1367 { /* assemble the entire matrix onto first processor */ 1368 Mat A = NULL, Av; 1369 IS isrow,iscol; 1370 1371 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1372 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1373 PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 1374 PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 1375 /* The commented code uses MatCreateSubMatrices instead */ 1376 /* 1377 Mat *AA, A = NULL, Av; 1378 IS isrow,iscol; 1379 1380 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1381 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1382 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1383 if (rank == 0) { 1384 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1385 A = AA[0]; 1386 Av = AA[0]; 1387 } 1388 PetscCall(MatDestroySubMatrices(1,&AA)); 1389 */ 1390 PetscCall(ISDestroy(&iscol)); 1391 PetscCall(ISDestroy(&isrow)); 1392 /* 1393 Everyone has to call to draw the matrix since the graphics waits are 1394 synchronized across all processors that share the PetscDraw object 1395 */ 1396 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1397 if (rank == 0) { 1398 if (((PetscObject)mat)->name) { 1399 PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 1400 } 1401 PetscCall(MatView_SeqAIJ(Av,sviewer)); 1402 } 1403 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1404 PetscCall(PetscViewerFlush(viewer)); 1405 PetscCall(MatDestroy(&A)); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1411 { 1412 PetscBool iascii,isdraw,issocket,isbinary; 1413 1414 PetscFunctionBegin; 1415 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1416 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1417 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1418 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1419 if (iascii || isdraw || isbinary || issocket) { 1420 PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1421 } 1422 PetscFunctionReturn(0); 1423 } 1424 1425 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1426 { 1427 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1428 Vec bb1 = NULL; 1429 PetscBool hasop; 1430 1431 PetscFunctionBegin; 1432 if (flag == SOR_APPLY_UPPER) { 1433 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1438 PetscCall(VecDuplicate(bb,&bb1)); 1439 } 1440 1441 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1442 if (flag & SOR_ZERO_INITIAL_GUESS) { 1443 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1444 its--; 1445 } 1446 1447 while (its--) { 1448 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1449 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1450 1451 /* update rhs: bb1 = bb - B*x */ 1452 PetscCall(VecScale(mat->lvec,-1.0)); 1453 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1454 1455 /* local sweep */ 1456 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 1457 } 1458 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1459 if (flag & SOR_ZERO_INITIAL_GUESS) { 1460 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1461 its--; 1462 } 1463 while (its--) { 1464 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1465 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1466 1467 /* update rhs: bb1 = bb - B*x */ 1468 PetscCall(VecScale(mat->lvec,-1.0)); 1469 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1470 1471 /* local sweep */ 1472 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 1473 } 1474 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1475 if (flag & SOR_ZERO_INITIAL_GUESS) { 1476 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1477 its--; 1478 } 1479 while (its--) { 1480 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1481 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1482 1483 /* update rhs: bb1 = bb - B*x */ 1484 PetscCall(VecScale(mat->lvec,-1.0)); 1485 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1486 1487 /* local sweep */ 1488 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 1489 } 1490 } else if (flag & SOR_EISENSTAT) { 1491 Vec xx1; 1492 1493 PetscCall(VecDuplicate(bb,&xx1)); 1494 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1495 1496 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1497 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1498 if (!mat->diag) { 1499 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 1500 PetscCall(MatGetDiagonal(matin,mat->diag)); 1501 } 1502 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1503 if (hasop) { 1504 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1505 } else { 1506 PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1507 } 1508 PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1509 1510 PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1511 1512 /* local sweep */ 1513 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 1514 PetscCall(VecAXPY(xx,1.0,xx1)); 1515 PetscCall(VecDestroy(&xx1)); 1516 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1517 1518 PetscCall(VecDestroy(&bb1)); 1519 1520 matin->factorerrortype = mat->A->factorerrortype; 1521 PetscFunctionReturn(0); 1522 } 1523 1524 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1525 { 1526 Mat aA,aB,Aperm; 1527 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1528 PetscScalar *aa,*ba; 1529 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1530 PetscSF rowsf,sf; 1531 IS parcolp = NULL; 1532 PetscBool done; 1533 1534 PetscFunctionBegin; 1535 PetscCall(MatGetLocalSize(A,&m,&n)); 1536 PetscCall(ISGetIndices(rowp,&rwant)); 1537 PetscCall(ISGetIndices(colp,&cwant)); 1538 PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 1539 1540 /* Invert row permutation to find out where my rows should go */ 1541 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 1542 PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 1543 PetscCall(PetscSFSetFromOptions(rowsf)); 1544 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1545 PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1546 PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1547 1548 /* Invert column permutation to find out where my columns should go */ 1549 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1550 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 1551 PetscCall(PetscSFSetFromOptions(sf)); 1552 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1553 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1554 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1555 PetscCall(PetscSFDestroy(&sf)); 1556 1557 PetscCall(ISRestoreIndices(rowp,&rwant)); 1558 PetscCall(ISRestoreIndices(colp,&cwant)); 1559 PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 1560 1561 /* Find out where my gcols should go */ 1562 PetscCall(MatGetSize(aB,NULL,&ng)); 1563 PetscCall(PetscMalloc1(ng,&gcdest)); 1564 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1565 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 1566 PetscCall(PetscSFSetFromOptions(sf)); 1567 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1568 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1569 PetscCall(PetscSFDestroy(&sf)); 1570 1571 PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 1572 PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1573 PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1574 for (i=0; i<m; i++) { 1575 PetscInt row = rdest[i]; 1576 PetscMPIInt rowner; 1577 PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 1578 for (j=ai[i]; j<ai[i+1]; j++) { 1579 PetscInt col = cdest[aj[j]]; 1580 PetscMPIInt cowner; 1581 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 1582 if (rowner == cowner) dnnz[i]++; 1583 else onnz[i]++; 1584 } 1585 for (j=bi[i]; j<bi[i+1]; j++) { 1586 PetscInt col = gcdest[bj[j]]; 1587 PetscMPIInt cowner; 1588 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 1589 if (rowner == cowner) dnnz[i]++; 1590 else onnz[i]++; 1591 } 1592 } 1593 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1594 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1595 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1596 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1597 PetscCall(PetscSFDestroy(&rowsf)); 1598 1599 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 1600 PetscCall(MatSeqAIJGetArray(aA,&aa)); 1601 PetscCall(MatSeqAIJGetArray(aB,&ba)); 1602 for (i=0; i<m; i++) { 1603 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1604 PetscInt j0,rowlen; 1605 rowlen = ai[i+1] - ai[i]; 1606 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1607 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1608 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1609 } 1610 rowlen = bi[i+1] - bi[i]; 1611 for (j0=j=0; j<rowlen; j0=j) { 1612 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1613 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1614 } 1615 } 1616 PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 1617 PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 1618 PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1619 PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1620 PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 1621 PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 1622 PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 1623 PetscCall(PetscFree3(work,rdest,cdest)); 1624 PetscCall(PetscFree(gcdest)); 1625 if (parcolp) PetscCall(ISDestroy(&colp)); 1626 *B = Aperm; 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1631 { 1632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1633 1634 PetscFunctionBegin; 1635 PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1636 if (ghosts) *ghosts = aij->garray; 1637 PetscFunctionReturn(0); 1638 } 1639 1640 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1641 { 1642 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1643 Mat A = mat->A,B = mat->B; 1644 PetscLogDouble isend[5],irecv[5]; 1645 1646 PetscFunctionBegin; 1647 info->block_size = 1.0; 1648 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1649 1650 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1651 isend[3] = info->memory; isend[4] = info->mallocs; 1652 1653 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1654 1655 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1656 isend[3] += info->memory; isend[4] += info->mallocs; 1657 if (flag == MAT_LOCAL) { 1658 info->nz_used = isend[0]; 1659 info->nz_allocated = isend[1]; 1660 info->nz_unneeded = isend[2]; 1661 info->memory = isend[3]; 1662 info->mallocs = isend[4]; 1663 } else if (flag == MAT_GLOBAL_MAX) { 1664 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1665 1666 info->nz_used = irecv[0]; 1667 info->nz_allocated = irecv[1]; 1668 info->nz_unneeded = irecv[2]; 1669 info->memory = irecv[3]; 1670 info->mallocs = irecv[4]; 1671 } else if (flag == MAT_GLOBAL_SUM) { 1672 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1673 1674 info->nz_used = irecv[0]; 1675 info->nz_allocated = irecv[1]; 1676 info->nz_unneeded = irecv[2]; 1677 info->memory = irecv[3]; 1678 info->mallocs = irecv[4]; 1679 } 1680 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1681 info->fill_ratio_needed = 0; 1682 info->factor_mallocs = 0; 1683 PetscFunctionReturn(0); 1684 } 1685 1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1687 { 1688 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1689 1690 PetscFunctionBegin; 1691 switch (op) { 1692 case MAT_NEW_NONZERO_LOCATIONS: 1693 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1694 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1695 case MAT_KEEP_NONZERO_PATTERN: 1696 case MAT_NEW_NONZERO_LOCATION_ERR: 1697 case MAT_USE_INODES: 1698 case MAT_IGNORE_ZERO_ENTRIES: 1699 case MAT_FORM_EXPLICIT_TRANSPOSE: 1700 MatCheckPreallocated(A,1); 1701 PetscCall(MatSetOption(a->A,op,flg)); 1702 PetscCall(MatSetOption(a->B,op,flg)); 1703 break; 1704 case MAT_ROW_ORIENTED: 1705 MatCheckPreallocated(A,1); 1706 a->roworiented = flg; 1707 1708 PetscCall(MatSetOption(a->A,op,flg)); 1709 PetscCall(MatSetOption(a->B,op,flg)); 1710 break; 1711 case MAT_FORCE_DIAGONAL_ENTRIES: 1712 case MAT_SORTED_FULL: 1713 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1714 break; 1715 case MAT_IGNORE_OFF_PROC_ENTRIES: 1716 a->donotstash = flg; 1717 break; 1718 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1719 case MAT_SPD: 1720 case MAT_SYMMETRIC: 1721 case MAT_STRUCTURALLY_SYMMETRIC: 1722 case MAT_HERMITIAN: 1723 case MAT_SYMMETRY_ETERNAL: 1724 break; 1725 case MAT_SUBMAT_SINGLEIS: 1726 A->submat_singleis = flg; 1727 break; 1728 case MAT_STRUCTURE_ONLY: 1729 /* The option is handled directly by MatSetOption() */ 1730 break; 1731 default: 1732 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1733 } 1734 PetscFunctionReturn(0); 1735 } 1736 1737 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1738 { 1739 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1740 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1741 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1742 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1743 PetscInt *cmap,*idx_p; 1744 1745 PetscFunctionBegin; 1746 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1747 mat->getrowactive = PETSC_TRUE; 1748 1749 if (!mat->rowvalues && (idx || v)) { 1750 /* 1751 allocate enough space to hold information from the longest row. 1752 */ 1753 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1754 PetscInt max = 1,tmp; 1755 for (i=0; i<matin->rmap->n; i++) { 1756 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1757 if (max < tmp) max = tmp; 1758 } 1759 PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 1760 } 1761 1762 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1763 lrow = row - rstart; 1764 1765 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1766 if (!v) {pvA = NULL; pvB = NULL;} 1767 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1768 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1769 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1770 nztot = nzA + nzB; 1771 1772 cmap = mat->garray; 1773 if (v || idx) { 1774 if (nztot) { 1775 /* Sort by increasing column numbers, assuming A and B already sorted */ 1776 PetscInt imark = -1; 1777 if (v) { 1778 *v = v_p = mat->rowvalues; 1779 for (i=0; i<nzB; i++) { 1780 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1781 else break; 1782 } 1783 imark = i; 1784 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1785 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1786 } 1787 if (idx) { 1788 *idx = idx_p = mat->rowindices; 1789 if (imark > -1) { 1790 for (i=0; i<imark; i++) { 1791 idx_p[i] = cmap[cworkB[i]]; 1792 } 1793 } else { 1794 for (i=0; i<nzB; i++) { 1795 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1796 else break; 1797 } 1798 imark = i; 1799 } 1800 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1801 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1802 } 1803 } else { 1804 if (idx) *idx = NULL; 1805 if (v) *v = NULL; 1806 } 1807 } 1808 *nz = nztot; 1809 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1810 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1811 PetscFunctionReturn(0); 1812 } 1813 1814 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1815 { 1816 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1817 1818 PetscFunctionBegin; 1819 PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1820 aij->getrowactive = PETSC_FALSE; 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1825 { 1826 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1827 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1828 PetscInt i,j,cstart = mat->cmap->rstart; 1829 PetscReal sum = 0.0; 1830 const MatScalar *v,*amata,*bmata; 1831 1832 PetscFunctionBegin; 1833 if (aij->size == 1) { 1834 PetscCall(MatNorm(aij->A,type,norm)); 1835 } else { 1836 PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 1837 PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 1838 if (type == NORM_FROBENIUS) { 1839 v = amata; 1840 for (i=0; i<amat->nz; i++) { 1841 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1842 } 1843 v = bmata; 1844 for (i=0; i<bmat->nz; i++) { 1845 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1846 } 1847 PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1848 *norm = PetscSqrtReal(*norm); 1849 PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 1850 } else if (type == NORM_1) { /* max column norm */ 1851 PetscReal *tmp,*tmp2; 1852 PetscInt *jj,*garray = aij->garray; 1853 PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 1854 PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 1855 *norm = 0.0; 1856 v = amata; jj = amat->j; 1857 for (j=0; j<amat->nz; j++) { 1858 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1859 } 1860 v = bmata; jj = bmat->j; 1861 for (j=0; j<bmat->nz; j++) { 1862 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1863 } 1864 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1865 for (j=0; j<mat->cmap->N; j++) { 1866 if (tmp2[j] > *norm) *norm = tmp2[j]; 1867 } 1868 PetscCall(PetscFree(tmp)); 1869 PetscCall(PetscFree(tmp2)); 1870 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1871 } else if (type == NORM_INFINITY) { /* max row norm */ 1872 PetscReal ntemp = 0.0; 1873 for (j=0; j<aij->A->rmap->n; j++) { 1874 v = amata + amat->i[j]; 1875 sum = 0.0; 1876 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1877 sum += PetscAbsScalar(*v); v++; 1878 } 1879 v = bmata + bmat->i[j]; 1880 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1881 sum += PetscAbsScalar(*v); v++; 1882 } 1883 if (sum > ntemp) ntemp = sum; 1884 } 1885 PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 1886 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1887 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1888 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 1889 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 1890 } 1891 PetscFunctionReturn(0); 1892 } 1893 1894 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1895 { 1896 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1897 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1898 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1899 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1900 Mat B,A_diag,*B_diag; 1901 const MatScalar *pbv,*bv; 1902 1903 PetscFunctionBegin; 1904 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1905 ai = Aloc->i; aj = Aloc->j; 1906 bi = Bloc->i; bj = Bloc->j; 1907 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1908 PetscInt *d_nnz,*g_nnz,*o_nnz; 1909 PetscSFNode *oloc; 1910 PETSC_UNUSED PetscSF sf; 1911 1912 PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 1913 /* compute d_nnz for preallocation */ 1914 PetscCall(PetscArrayzero(d_nnz,na)); 1915 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1916 /* compute local off-diagonal contributions */ 1917 PetscCall(PetscArrayzero(g_nnz,nb)); 1918 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1919 /* map those to global */ 1920 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1921 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 1922 PetscCall(PetscSFSetFromOptions(sf)); 1923 PetscCall(PetscArrayzero(o_nnz,na)); 1924 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1925 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1926 PetscCall(PetscSFDestroy(&sf)); 1927 1928 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1929 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1930 PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1931 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1932 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 1933 PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1934 } else { 1935 B = *matout; 1936 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1937 } 1938 1939 b = (Mat_MPIAIJ*)B->data; 1940 A_diag = a->A; 1941 B_diag = &b->A; 1942 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1943 A_diag_ncol = A_diag->cmap->N; 1944 B_diag_ilen = sub_B_diag->ilen; 1945 B_diag_i = sub_B_diag->i; 1946 1947 /* Set ilen for diagonal of B */ 1948 for (i=0; i<A_diag_ncol; i++) { 1949 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1950 } 1951 1952 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1953 very quickly (=without using MatSetValues), because all writes are local. */ 1954 PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1955 1956 /* copy over the B part */ 1957 PetscCall(PetscMalloc1(bi[mb],&cols)); 1958 PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1959 pbv = bv; 1960 row = A->rmap->rstart; 1961 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1962 cols_tmp = cols; 1963 for (i=0; i<mb; i++) { 1964 ncol = bi[i+1]-bi[i]; 1965 PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 1966 row++; 1967 pbv += ncol; cols_tmp += ncol; 1968 } 1969 PetscCall(PetscFree(cols)); 1970 PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1971 1972 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1973 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1974 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1975 *matout = B; 1976 } else { 1977 PetscCall(MatHeaderMerge(A,&B)); 1978 } 1979 PetscFunctionReturn(0); 1980 } 1981 1982 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1983 { 1984 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1985 Mat a = aij->A,b = aij->B; 1986 PetscInt s1,s2,s3; 1987 1988 PetscFunctionBegin; 1989 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1990 if (rr) { 1991 PetscCall(VecGetLocalSize(rr,&s1)); 1992 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1993 /* Overlap communication with computation. */ 1994 PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1995 } 1996 if (ll) { 1997 PetscCall(VecGetLocalSize(ll,&s1)); 1998 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1999 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 2000 } 2001 /* scale the diagonal block */ 2002 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 2003 2004 if (rr) { 2005 /* Do a scatter end and then right scale the off-diagonal block */ 2006 PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2007 PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 2008 } 2009 PetscFunctionReturn(0); 2010 } 2011 2012 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2013 { 2014 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2015 2016 PetscFunctionBegin; 2017 PetscCall(MatSetUnfactored(a->A)); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2022 { 2023 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2024 Mat a,b,c,d; 2025 PetscBool flg; 2026 2027 PetscFunctionBegin; 2028 a = matA->A; b = matA->B; 2029 c = matB->A; d = matB->B; 2030 2031 PetscCall(MatEqual(a,c,&flg)); 2032 if (flg) { 2033 PetscCall(MatEqual(b,d,&flg)); 2034 } 2035 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2036 PetscFunctionReturn(0); 2037 } 2038 2039 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2040 { 2041 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2042 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2043 2044 PetscFunctionBegin; 2045 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2046 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2047 /* because of the column compression in the off-processor part of the matrix a->B, 2048 the number of columns in a->B and b->B may be different, hence we cannot call 2049 the MatCopy() directly on the two parts. If need be, we can provide a more 2050 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2051 then copying the submatrices */ 2052 PetscCall(MatCopy_Basic(A,B,str)); 2053 } else { 2054 PetscCall(MatCopy(a->A,b->A,str)); 2055 PetscCall(MatCopy(a->B,b->B,str)); 2056 } 2057 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2058 PetscFunctionReturn(0); 2059 } 2060 2061 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2062 { 2063 PetscFunctionBegin; 2064 PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /* 2069 Computes the number of nonzeros per row needed for preallocation when X and Y 2070 have different nonzero structure. 2071 */ 2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2073 { 2074 PetscInt i,j,k,nzx,nzy; 2075 2076 PetscFunctionBegin; 2077 /* Set the number of nonzeros in the new matrix */ 2078 for (i=0; i<m; i++) { 2079 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2080 nzx = xi[i+1] - xi[i]; 2081 nzy = yi[i+1] - yi[i]; 2082 nnz[i] = 0; 2083 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2084 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2085 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2086 nnz[i]++; 2087 } 2088 for (; k<nzy; k++) nnz[i]++; 2089 } 2090 PetscFunctionReturn(0); 2091 } 2092 2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2095 { 2096 PetscInt m = Y->rmap->N; 2097 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2098 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2099 2100 PetscFunctionBegin; 2101 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2106 { 2107 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2108 2109 PetscFunctionBegin; 2110 if (str == SAME_NONZERO_PATTERN) { 2111 PetscCall(MatAXPY(yy->A,a,xx->A,str)); 2112 PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2113 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2114 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2115 } else { 2116 Mat B; 2117 PetscInt *nnz_d,*nnz_o; 2118 2119 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 2120 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 2121 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2122 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2123 PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 2124 PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 2125 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 2126 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 2127 PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 2128 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2129 PetscCall(MatHeaderMerge(Y,&B)); 2130 PetscCall(PetscFree(nnz_d)); 2131 PetscCall(PetscFree(nnz_o)); 2132 } 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2137 2138 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2139 { 2140 PetscFunctionBegin; 2141 if (PetscDefined(USE_COMPLEX)) { 2142 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2143 2144 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2145 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2146 } 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2151 { 2152 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2153 2154 PetscFunctionBegin; 2155 PetscCall(MatRealPart(a->A)); 2156 PetscCall(MatRealPart(a->B)); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2161 { 2162 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2163 2164 PetscFunctionBegin; 2165 PetscCall(MatImaginaryPart(a->A)); 2166 PetscCall(MatImaginaryPart(a->B)); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2171 { 2172 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2173 PetscInt i,*idxb = NULL,m = A->rmap->n; 2174 PetscScalar *va,*vv; 2175 Vec vB,vA; 2176 const PetscScalar *vb; 2177 2178 PetscFunctionBegin; 2179 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 2180 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2181 2182 PetscCall(VecGetArrayWrite(vA,&va)); 2183 if (idx) { 2184 for (i=0; i<m; i++) { 2185 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2186 } 2187 } 2188 2189 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 2190 PetscCall(PetscMalloc1(m,&idxb)); 2191 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2192 2193 PetscCall(VecGetArrayWrite(v,&vv)); 2194 PetscCall(VecGetArrayRead(vB,&vb)); 2195 for (i=0; i<m; i++) { 2196 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2197 vv[i] = vb[i]; 2198 if (idx) idx[i] = a->garray[idxb[i]]; 2199 } else { 2200 vv[i] = va[i]; 2201 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2202 idx[i] = a->garray[idxb[i]]; 2203 } 2204 } 2205 PetscCall(VecRestoreArrayWrite(vA,&vv)); 2206 PetscCall(VecRestoreArrayWrite(vA,&va)); 2207 PetscCall(VecRestoreArrayRead(vB,&vb)); 2208 PetscCall(PetscFree(idxb)); 2209 PetscCall(VecDestroy(&vA)); 2210 PetscCall(VecDestroy(&vB)); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2215 { 2216 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2217 PetscInt m = A->rmap->n,n = A->cmap->n; 2218 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2219 PetscInt *cmap = mat->garray; 2220 PetscInt *diagIdx, *offdiagIdx; 2221 Vec diagV, offdiagV; 2222 PetscScalar *a, *diagA, *offdiagA; 2223 const PetscScalar *ba,*bav; 2224 PetscInt r,j,col,ncols,*bi,*bj; 2225 Mat B = mat->B; 2226 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2227 2228 PetscFunctionBegin; 2229 /* When a process holds entire A and other processes have no entry */ 2230 if (A->cmap->N == n) { 2231 PetscCall(VecGetArrayWrite(v,&diagA)); 2232 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2233 PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 2234 PetscCall(VecDestroy(&diagV)); 2235 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2236 PetscFunctionReturn(0); 2237 } else if (n == 0) { 2238 if (m) { 2239 PetscCall(VecGetArrayWrite(v,&a)); 2240 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2241 PetscCall(VecRestoreArrayWrite(v,&a)); 2242 } 2243 PetscFunctionReturn(0); 2244 } 2245 2246 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2248 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2249 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2250 2251 /* Get offdiagIdx[] for implicit 0.0 */ 2252 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2253 ba = bav; 2254 bi = b->i; 2255 bj = b->j; 2256 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2257 for (r = 0; r < m; r++) { 2258 ncols = bi[r+1] - bi[r]; 2259 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2260 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2261 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2262 offdiagA[r] = 0.0; 2263 2264 /* Find first hole in the cmap */ 2265 for (j=0; j<ncols; j++) { 2266 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2267 if (col > j && j < cstart) { 2268 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2269 break; 2270 } else if (col > j + n && j >= cstart) { 2271 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2272 break; 2273 } 2274 } 2275 if (j == ncols && ncols < A->cmap->N - n) { 2276 /* a hole is outside compressed Bcols */ 2277 if (ncols == 0) { 2278 if (cstart) { 2279 offdiagIdx[r] = 0; 2280 } else offdiagIdx[r] = cend; 2281 } else { /* ncols > 0 */ 2282 offdiagIdx[r] = cmap[ncols-1] + 1; 2283 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2284 } 2285 } 2286 } 2287 2288 for (j=0; j<ncols; j++) { 2289 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2290 ba++; bj++; 2291 } 2292 } 2293 2294 PetscCall(VecGetArrayWrite(v, &a)); 2295 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2296 for (r = 0; r < m; ++r) { 2297 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2298 a[r] = diagA[r]; 2299 if (idx) idx[r] = cstart + diagIdx[r]; 2300 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2301 a[r] = diagA[r]; 2302 if (idx) { 2303 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2304 idx[r] = cstart + diagIdx[r]; 2305 } else idx[r] = offdiagIdx[r]; 2306 } 2307 } else { 2308 a[r] = offdiagA[r]; 2309 if (idx) idx[r] = offdiagIdx[r]; 2310 } 2311 } 2312 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2313 PetscCall(VecRestoreArrayWrite(v, &a)); 2314 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2315 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2316 PetscCall(VecDestroy(&diagV)); 2317 PetscCall(VecDestroy(&offdiagV)); 2318 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2323 { 2324 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2325 PetscInt m = A->rmap->n,n = A->cmap->n; 2326 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2327 PetscInt *cmap = mat->garray; 2328 PetscInt *diagIdx, *offdiagIdx; 2329 Vec diagV, offdiagV; 2330 PetscScalar *a, *diagA, *offdiagA; 2331 const PetscScalar *ba,*bav; 2332 PetscInt r,j,col,ncols,*bi,*bj; 2333 Mat B = mat->B; 2334 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2335 2336 PetscFunctionBegin; 2337 /* When a process holds entire A and other processes have no entry */ 2338 if (A->cmap->N == n) { 2339 PetscCall(VecGetArrayWrite(v,&diagA)); 2340 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2341 PetscCall(MatGetRowMin(mat->A,diagV,idx)); 2342 PetscCall(VecDestroy(&diagV)); 2343 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2344 PetscFunctionReturn(0); 2345 } else if (n == 0) { 2346 if (m) { 2347 PetscCall(VecGetArrayWrite(v,&a)); 2348 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2349 PetscCall(VecRestoreArrayWrite(v,&a)); 2350 } 2351 PetscFunctionReturn(0); 2352 } 2353 2354 PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 2355 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2356 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2357 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2358 2359 /* Get offdiagIdx[] for implicit 0.0 */ 2360 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2361 ba = bav; 2362 bi = b->i; 2363 bj = b->j; 2364 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2365 for (r = 0; r < m; r++) { 2366 ncols = bi[r+1] - bi[r]; 2367 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2368 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2369 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2370 offdiagA[r] = 0.0; 2371 2372 /* Find first hole in the cmap */ 2373 for (j=0; j<ncols; j++) { 2374 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2375 if (col > j && j < cstart) { 2376 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2377 break; 2378 } else if (col > j + n && j >= cstart) { 2379 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2380 break; 2381 } 2382 } 2383 if (j == ncols && ncols < A->cmap->N - n) { 2384 /* a hole is outside compressed Bcols */ 2385 if (ncols == 0) { 2386 if (cstart) { 2387 offdiagIdx[r] = 0; 2388 } else offdiagIdx[r] = cend; 2389 } else { /* ncols > 0 */ 2390 offdiagIdx[r] = cmap[ncols-1] + 1; 2391 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2392 } 2393 } 2394 } 2395 2396 for (j=0; j<ncols; j++) { 2397 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2398 ba++; bj++; 2399 } 2400 } 2401 2402 PetscCall(VecGetArrayWrite(v, &a)); 2403 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2404 for (r = 0; r < m; ++r) { 2405 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2406 a[r] = diagA[r]; 2407 if (idx) idx[r] = cstart + diagIdx[r]; 2408 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2409 a[r] = diagA[r]; 2410 if (idx) { 2411 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2412 idx[r] = cstart + diagIdx[r]; 2413 } else idx[r] = offdiagIdx[r]; 2414 } 2415 } else { 2416 a[r] = offdiagA[r]; 2417 if (idx) idx[r] = offdiagIdx[r]; 2418 } 2419 } 2420 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2421 PetscCall(VecRestoreArrayWrite(v, &a)); 2422 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2423 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2424 PetscCall(VecDestroy(&diagV)); 2425 PetscCall(VecDestroy(&offdiagV)); 2426 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2431 { 2432 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2433 PetscInt m = A->rmap->n,n = A->cmap->n; 2434 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2435 PetscInt *cmap = mat->garray; 2436 PetscInt *diagIdx, *offdiagIdx; 2437 Vec diagV, offdiagV; 2438 PetscScalar *a, *diagA, *offdiagA; 2439 const PetscScalar *ba,*bav; 2440 PetscInt r,j,col,ncols,*bi,*bj; 2441 Mat B = mat->B; 2442 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2443 2444 PetscFunctionBegin; 2445 /* When a process holds entire A and other processes have no entry */ 2446 if (A->cmap->N == n) { 2447 PetscCall(VecGetArrayWrite(v,&diagA)); 2448 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2449 PetscCall(MatGetRowMax(mat->A,diagV,idx)); 2450 PetscCall(VecDestroy(&diagV)); 2451 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2452 PetscFunctionReturn(0); 2453 } else if (n == 0) { 2454 if (m) { 2455 PetscCall(VecGetArrayWrite(v,&a)); 2456 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2457 PetscCall(VecRestoreArrayWrite(v,&a)); 2458 } 2459 PetscFunctionReturn(0); 2460 } 2461 2462 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2463 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2464 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2465 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2466 2467 /* Get offdiagIdx[] for implicit 0.0 */ 2468 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2469 ba = bav; 2470 bi = b->i; 2471 bj = b->j; 2472 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2473 for (r = 0; r < m; r++) { 2474 ncols = bi[r+1] - bi[r]; 2475 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2476 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2477 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2478 offdiagA[r] = 0.0; 2479 2480 /* Find first hole in the cmap */ 2481 for (j=0; j<ncols; j++) { 2482 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2483 if (col > j && j < cstart) { 2484 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2485 break; 2486 } else if (col > j + n && j >= cstart) { 2487 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2488 break; 2489 } 2490 } 2491 if (j == ncols && ncols < A->cmap->N - n) { 2492 /* a hole is outside compressed Bcols */ 2493 if (ncols == 0) { 2494 if (cstart) { 2495 offdiagIdx[r] = 0; 2496 } else offdiagIdx[r] = cend; 2497 } else { /* ncols > 0 */ 2498 offdiagIdx[r] = cmap[ncols-1] + 1; 2499 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2500 } 2501 } 2502 } 2503 2504 for (j=0; j<ncols; j++) { 2505 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2506 ba++; bj++; 2507 } 2508 } 2509 2510 PetscCall(VecGetArrayWrite(v, &a)); 2511 PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 2512 for (r = 0; r < m; ++r) { 2513 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2514 a[r] = diagA[r]; 2515 if (idx) idx[r] = cstart + diagIdx[r]; 2516 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2517 a[r] = diagA[r]; 2518 if (idx) { 2519 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2520 idx[r] = cstart + diagIdx[r]; 2521 } else idx[r] = offdiagIdx[r]; 2522 } 2523 } else { 2524 a[r] = offdiagA[r]; 2525 if (idx) idx[r] = offdiagIdx[r]; 2526 } 2527 } 2528 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2529 PetscCall(VecRestoreArrayWrite(v, &a)); 2530 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2531 PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 2532 PetscCall(VecDestroy(&diagV)); 2533 PetscCall(VecDestroy(&offdiagV)); 2534 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2539 { 2540 Mat *dummy; 2541 2542 PetscFunctionBegin; 2543 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2544 *newmat = *dummy; 2545 PetscCall(PetscFree(dummy)); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2550 { 2551 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2552 2553 PetscFunctionBegin; 2554 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2555 A->factorerrortype = a->A->factorerrortype; 2556 PetscFunctionReturn(0); 2557 } 2558 2559 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2560 { 2561 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2562 2563 PetscFunctionBegin; 2564 PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2565 PetscCall(MatSetRandom(aij->A,rctx)); 2566 if (x->assembled) { 2567 PetscCall(MatSetRandom(aij->B,rctx)); 2568 } else { 2569 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2570 } 2571 PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 2572 PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2577 { 2578 PetscFunctionBegin; 2579 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2580 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2581 PetscFunctionReturn(0); 2582 } 2583 2584 /*@ 2585 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2586 2587 Not collective 2588 2589 Input Parameter: 2590 . A - the matrix 2591 2592 Output Parameter: 2593 . nz - the number of nonzeros 2594 2595 Level: advanced 2596 2597 @*/ 2598 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A,PetscCount *nz) 2599 { 2600 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)A->data; 2601 Mat_SeqAIJ *aaij = (Mat_SeqAIJ*)maij->A->data, *baij = (Mat_SeqAIJ*)maij->B->data; 2602 2603 PetscFunctionBegin; 2604 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2605 PetscFunctionReturn(0); 2606 } 2607 2608 /*@ 2609 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2610 2611 Collective on Mat 2612 2613 Input Parameters: 2614 + A - the matrix 2615 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2616 2617 Level: advanced 2618 2619 @*/ 2620 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2621 { 2622 PetscFunctionBegin; 2623 PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2624 PetscFunctionReturn(0); 2625 } 2626 2627 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2628 { 2629 PetscBool sc = PETSC_FALSE,flg; 2630 2631 PetscFunctionBegin; 2632 PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options"); 2633 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2634 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2635 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2636 PetscOptionsHeadEnd(); 2637 PetscFunctionReturn(0); 2638 } 2639 2640 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2641 { 2642 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2643 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2644 2645 PetscFunctionBegin; 2646 if (!Y->preallocated) { 2647 PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2648 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2649 PetscInt nonew = aij->nonew; 2650 PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2651 aij->nonew = nonew; 2652 } 2653 PetscCall(MatShift_Basic(Y,a)); 2654 PetscFunctionReturn(0); 2655 } 2656 2657 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2658 { 2659 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2660 2661 PetscFunctionBegin; 2662 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2663 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2664 if (d) { 2665 PetscInt rstart; 2666 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2667 *d += rstart; 2668 2669 } 2670 PetscFunctionReturn(0); 2671 } 2672 2673 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2674 { 2675 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2676 2677 PetscFunctionBegin; 2678 PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2679 PetscFunctionReturn(0); 2680 } 2681 2682 /* -------------------------------------------------------------------*/ 2683 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2684 MatGetRow_MPIAIJ, 2685 MatRestoreRow_MPIAIJ, 2686 MatMult_MPIAIJ, 2687 /* 4*/ MatMultAdd_MPIAIJ, 2688 MatMultTranspose_MPIAIJ, 2689 MatMultTransposeAdd_MPIAIJ, 2690 NULL, 2691 NULL, 2692 NULL, 2693 /*10*/ NULL, 2694 NULL, 2695 NULL, 2696 MatSOR_MPIAIJ, 2697 MatTranspose_MPIAIJ, 2698 /*15*/ MatGetInfo_MPIAIJ, 2699 MatEqual_MPIAIJ, 2700 MatGetDiagonal_MPIAIJ, 2701 MatDiagonalScale_MPIAIJ, 2702 MatNorm_MPIAIJ, 2703 /*20*/ MatAssemblyBegin_MPIAIJ, 2704 MatAssemblyEnd_MPIAIJ, 2705 MatSetOption_MPIAIJ, 2706 MatZeroEntries_MPIAIJ, 2707 /*24*/ MatZeroRows_MPIAIJ, 2708 NULL, 2709 NULL, 2710 NULL, 2711 NULL, 2712 /*29*/ MatSetUp_MPIAIJ, 2713 NULL, 2714 NULL, 2715 MatGetDiagonalBlock_MPIAIJ, 2716 NULL, 2717 /*34*/ MatDuplicate_MPIAIJ, 2718 NULL, 2719 NULL, 2720 NULL, 2721 NULL, 2722 /*39*/ MatAXPY_MPIAIJ, 2723 MatCreateSubMatrices_MPIAIJ, 2724 MatIncreaseOverlap_MPIAIJ, 2725 MatGetValues_MPIAIJ, 2726 MatCopy_MPIAIJ, 2727 /*44*/ MatGetRowMax_MPIAIJ, 2728 MatScale_MPIAIJ, 2729 MatShift_MPIAIJ, 2730 MatDiagonalSet_MPIAIJ, 2731 MatZeroRowsColumns_MPIAIJ, 2732 /*49*/ MatSetRandom_MPIAIJ, 2733 MatGetRowIJ_MPIAIJ, 2734 MatRestoreRowIJ_MPIAIJ, 2735 NULL, 2736 NULL, 2737 /*54*/ MatFDColoringCreate_MPIXAIJ, 2738 NULL, 2739 MatSetUnfactored_MPIAIJ, 2740 MatPermute_MPIAIJ, 2741 NULL, 2742 /*59*/ MatCreateSubMatrix_MPIAIJ, 2743 MatDestroy_MPIAIJ, 2744 MatView_MPIAIJ, 2745 NULL, 2746 NULL, 2747 /*64*/ NULL, 2748 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2749 NULL, 2750 NULL, 2751 NULL, 2752 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2753 MatGetRowMinAbs_MPIAIJ, 2754 NULL, 2755 NULL, 2756 NULL, 2757 NULL, 2758 /*75*/ MatFDColoringApply_AIJ, 2759 MatSetFromOptions_MPIAIJ, 2760 NULL, 2761 NULL, 2762 MatFindZeroDiagonals_MPIAIJ, 2763 /*80*/ NULL, 2764 NULL, 2765 NULL, 2766 /*83*/ MatLoad_MPIAIJ, 2767 MatIsSymmetric_MPIAIJ, 2768 NULL, 2769 NULL, 2770 NULL, 2771 NULL, 2772 /*89*/ NULL, 2773 NULL, 2774 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2775 NULL, 2776 NULL, 2777 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2778 NULL, 2779 NULL, 2780 NULL, 2781 MatBindToCPU_MPIAIJ, 2782 /*99*/ MatProductSetFromOptions_MPIAIJ, 2783 NULL, 2784 NULL, 2785 MatConjugate_MPIAIJ, 2786 NULL, 2787 /*104*/MatSetValuesRow_MPIAIJ, 2788 MatRealPart_MPIAIJ, 2789 MatImaginaryPart_MPIAIJ, 2790 NULL, 2791 NULL, 2792 /*109*/NULL, 2793 NULL, 2794 MatGetRowMin_MPIAIJ, 2795 NULL, 2796 MatMissingDiagonal_MPIAIJ, 2797 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2798 NULL, 2799 MatGetGhosts_MPIAIJ, 2800 NULL, 2801 NULL, 2802 /*119*/MatMultDiagonalBlock_MPIAIJ, 2803 NULL, 2804 NULL, 2805 NULL, 2806 MatGetMultiProcBlock_MPIAIJ, 2807 /*124*/MatFindNonzeroRows_MPIAIJ, 2808 MatGetColumnReductions_MPIAIJ, 2809 MatInvertBlockDiagonal_MPIAIJ, 2810 MatInvertVariableBlockDiagonal_MPIAIJ, 2811 MatCreateSubMatricesMPI_MPIAIJ, 2812 /*129*/NULL, 2813 NULL, 2814 NULL, 2815 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2816 NULL, 2817 /*134*/NULL, 2818 NULL, 2819 NULL, 2820 NULL, 2821 NULL, 2822 /*139*/MatSetBlockSizes_MPIAIJ, 2823 NULL, 2824 NULL, 2825 MatFDColoringSetUp_MPIXAIJ, 2826 MatFindOffBlockDiagonalEntries_MPIAIJ, 2827 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2828 /*145*/NULL, 2829 NULL, 2830 NULL, 2831 MatCreateGraph_Simple_AIJ, 2832 MatFilter_AIJ 2833 }; 2834 2835 /* ----------------------------------------------------------------------------------------*/ 2836 2837 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2838 { 2839 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2840 2841 PetscFunctionBegin; 2842 PetscCall(MatStoreValues(aij->A)); 2843 PetscCall(MatStoreValues(aij->B)); 2844 PetscFunctionReturn(0); 2845 } 2846 2847 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2848 { 2849 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2850 2851 PetscFunctionBegin; 2852 PetscCall(MatRetrieveValues(aij->A)); 2853 PetscCall(MatRetrieveValues(aij->B)); 2854 PetscFunctionReturn(0); 2855 } 2856 2857 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2858 { 2859 Mat_MPIAIJ *b; 2860 PetscMPIInt size; 2861 2862 PetscFunctionBegin; 2863 PetscCall(PetscLayoutSetUp(B->rmap)); 2864 PetscCall(PetscLayoutSetUp(B->cmap)); 2865 b = (Mat_MPIAIJ*)B->data; 2866 2867 #if defined(PETSC_USE_CTABLE) 2868 PetscCall(PetscTableDestroy(&b->colmap)); 2869 #else 2870 PetscCall(PetscFree(b->colmap)); 2871 #endif 2872 PetscCall(PetscFree(b->garray)); 2873 PetscCall(VecDestroy(&b->lvec)); 2874 PetscCall(VecScatterDestroy(&b->Mvctx)); 2875 2876 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2877 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2878 PetscCall(MatDestroy(&b->B)); 2879 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2880 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2881 PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 2882 PetscCall(MatSetType(b->B,MATSEQAIJ)); 2883 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2884 2885 if (!B->preallocated) { 2886 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2887 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2888 PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 2889 PetscCall(MatSetType(b->A,MATSEQAIJ)); 2890 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2891 } 2892 2893 PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 2894 PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2895 B->preallocated = PETSC_TRUE; 2896 B->was_assembled = PETSC_FALSE; 2897 B->assembled = PETSC_FALSE; 2898 PetscFunctionReturn(0); 2899 } 2900 2901 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2902 { 2903 Mat_MPIAIJ *b; 2904 2905 PetscFunctionBegin; 2906 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2907 PetscCall(PetscLayoutSetUp(B->rmap)); 2908 PetscCall(PetscLayoutSetUp(B->cmap)); 2909 b = (Mat_MPIAIJ*)B->data; 2910 2911 #if defined(PETSC_USE_CTABLE) 2912 PetscCall(PetscTableDestroy(&b->colmap)); 2913 #else 2914 PetscCall(PetscFree(b->colmap)); 2915 #endif 2916 PetscCall(PetscFree(b->garray)); 2917 PetscCall(VecDestroy(&b->lvec)); 2918 PetscCall(VecScatterDestroy(&b->Mvctx)); 2919 2920 PetscCall(MatResetPreallocation(b->A)); 2921 PetscCall(MatResetPreallocation(b->B)); 2922 B->preallocated = PETSC_TRUE; 2923 B->was_assembled = PETSC_FALSE; 2924 B->assembled = PETSC_FALSE; 2925 PetscFunctionReturn(0); 2926 } 2927 2928 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2929 { 2930 Mat mat; 2931 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2932 2933 PetscFunctionBegin; 2934 *newmat = NULL; 2935 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2936 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2937 PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 2938 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2939 a = (Mat_MPIAIJ*)mat->data; 2940 2941 mat->factortype = matin->factortype; 2942 mat->assembled = matin->assembled; 2943 mat->insertmode = NOT_SET_VALUES; 2944 mat->preallocated = matin->preallocated; 2945 2946 a->size = oldmat->size; 2947 a->rank = oldmat->rank; 2948 a->donotstash = oldmat->donotstash; 2949 a->roworiented = oldmat->roworiented; 2950 a->rowindices = NULL; 2951 a->rowvalues = NULL; 2952 a->getrowactive = PETSC_FALSE; 2953 2954 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2955 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2956 2957 if (oldmat->colmap) { 2958 #if defined(PETSC_USE_CTABLE) 2959 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2960 #else 2961 PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 2962 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 2963 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2964 #endif 2965 } else a->colmap = NULL; 2966 if (oldmat->garray) { 2967 PetscInt len; 2968 len = oldmat->B->cmap->n; 2969 PetscCall(PetscMalloc1(len+1,&a->garray)); 2970 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2971 if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2972 } else a->garray = NULL; 2973 2974 /* It may happen MatDuplicate is called with a non-assembled matrix 2975 In fact, MatDuplicate only requires the matrix to be preallocated 2976 This may happen inside a DMCreateMatrix_Shell */ 2977 if (oldmat->lvec) { 2978 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2979 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2980 } 2981 if (oldmat->Mvctx) { 2982 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2983 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2984 } 2985 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2986 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2987 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2988 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2989 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2990 *newmat = mat; 2991 PetscFunctionReturn(0); 2992 } 2993 2994 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2995 { 2996 PetscBool isbinary, ishdf5; 2997 2998 PetscFunctionBegin; 2999 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3000 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3001 /* force binary viewer to load .info file if it has not yet done so */ 3002 PetscCall(PetscViewerSetUp(viewer)); 3003 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 3004 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 3005 if (isbinary) { 3006 PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 3007 } else if (ishdf5) { 3008 #if defined(PETSC_HAVE_HDF5) 3009 PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 3010 #else 3011 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3012 #endif 3013 } else { 3014 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3015 } 3016 PetscFunctionReturn(0); 3017 } 3018 3019 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3020 { 3021 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3022 PetscInt *rowidxs,*colidxs; 3023 PetscScalar *matvals; 3024 3025 PetscFunctionBegin; 3026 PetscCall(PetscViewerSetUp(viewer)); 3027 3028 /* read in matrix header */ 3029 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3030 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3031 M = header[1]; N = header[2]; nz = header[3]; 3032 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3033 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3034 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3035 3036 /* set block sizes from the viewer's .info file */ 3037 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3038 /* set global sizes if not set already */ 3039 if (mat->rmap->N < 0) mat->rmap->N = M; 3040 if (mat->cmap->N < 0) mat->cmap->N = N; 3041 PetscCall(PetscLayoutSetUp(mat->rmap)); 3042 PetscCall(PetscLayoutSetUp(mat->cmap)); 3043 3044 /* check if the matrix sizes are correct */ 3045 PetscCall(MatGetSize(mat,&rows,&cols)); 3046 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3047 3048 /* read in row lengths and build row indices */ 3049 PetscCall(MatGetLocalSize(mat,&m,NULL)); 3050 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3051 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3052 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3053 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3054 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3055 /* read in column indices and matrix values */ 3056 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3057 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3058 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3059 /* store matrix indices and values */ 3060 PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 3061 PetscCall(PetscFree(rowidxs)); 3062 PetscCall(PetscFree2(colidxs,matvals)); 3063 PetscFunctionReturn(0); 3064 } 3065 3066 /* Not scalable because of ISAllGather() unless getting all columns. */ 3067 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3068 { 3069 IS iscol_local; 3070 PetscBool isstride; 3071 PetscMPIInt lisstride=0,gisstride; 3072 3073 PetscFunctionBegin; 3074 /* check if we are grabbing all columns*/ 3075 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 3076 3077 if (isstride) { 3078 PetscInt start,len,mstart,mlen; 3079 PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 3080 PetscCall(ISGetLocalSize(iscol,&len)); 3081 PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3082 if (mstart == start && mlen-mstart == len) lisstride = 1; 3083 } 3084 3085 PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3086 if (gisstride) { 3087 PetscInt N; 3088 PetscCall(MatGetSize(mat,NULL,&N)); 3089 PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 3090 PetscCall(ISSetIdentity(iscol_local)); 3091 PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3092 } else { 3093 PetscInt cbs; 3094 PetscCall(ISGetBlockSize(iscol,&cbs)); 3095 PetscCall(ISAllGather(iscol,&iscol_local)); 3096 PetscCall(ISSetBlockSize(iscol_local,cbs)); 3097 } 3098 3099 *isseq = iscol_local; 3100 PetscFunctionReturn(0); 3101 } 3102 3103 /* 3104 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3105 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3106 3107 Input Parameters: 3108 mat - matrix 3109 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3110 i.e., mat->rstart <= isrow[i] < mat->rend 3111 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3112 i.e., mat->cstart <= iscol[i] < mat->cend 3113 Output Parameter: 3114 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3115 iscol_o - sequential column index set for retrieving mat->B 3116 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3117 */ 3118 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3119 { 3120 Vec x,cmap; 3121 const PetscInt *is_idx; 3122 PetscScalar *xarray,*cmaparray; 3123 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3124 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3125 Mat B=a->B; 3126 Vec lvec=a->lvec,lcmap; 3127 PetscInt i,cstart,cend,Bn=B->cmap->N; 3128 MPI_Comm comm; 3129 VecScatter Mvctx=a->Mvctx; 3130 3131 PetscFunctionBegin; 3132 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3133 PetscCall(ISGetLocalSize(iscol,&ncols)); 3134 3135 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3136 PetscCall(MatCreateVecs(mat,&x,NULL)); 3137 PetscCall(VecSet(x,-1.0)); 3138 PetscCall(VecDuplicate(x,&cmap)); 3139 PetscCall(VecSet(cmap,-1.0)); 3140 3141 /* Get start indices */ 3142 PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3143 isstart -= ncols; 3144 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3145 3146 PetscCall(ISGetIndices(iscol,&is_idx)); 3147 PetscCall(VecGetArray(x,&xarray)); 3148 PetscCall(VecGetArray(cmap,&cmaparray)); 3149 PetscCall(PetscMalloc1(ncols,&idx)); 3150 for (i=0; i<ncols; i++) { 3151 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3152 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3153 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3154 } 3155 PetscCall(VecRestoreArray(x,&xarray)); 3156 PetscCall(VecRestoreArray(cmap,&cmaparray)); 3157 PetscCall(ISRestoreIndices(iscol,&is_idx)); 3158 3159 /* Get iscol_d */ 3160 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 3161 PetscCall(ISGetBlockSize(iscol,&i)); 3162 PetscCall(ISSetBlockSize(*iscol_d,i)); 3163 3164 /* Get isrow_d */ 3165 PetscCall(ISGetLocalSize(isrow,&m)); 3166 rstart = mat->rmap->rstart; 3167 PetscCall(PetscMalloc1(m,&idx)); 3168 PetscCall(ISGetIndices(isrow,&is_idx)); 3169 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3170 PetscCall(ISRestoreIndices(isrow,&is_idx)); 3171 3172 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 3173 PetscCall(ISGetBlockSize(isrow,&i)); 3174 PetscCall(ISSetBlockSize(*isrow_d,i)); 3175 3176 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3177 PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3178 PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3179 3180 PetscCall(VecDuplicate(lvec,&lcmap)); 3181 3182 PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3183 PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3184 3185 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3186 /* off-process column indices */ 3187 count = 0; 3188 PetscCall(PetscMalloc1(Bn,&idx)); 3189 PetscCall(PetscMalloc1(Bn,&cmap1)); 3190 3191 PetscCall(VecGetArray(lvec,&xarray)); 3192 PetscCall(VecGetArray(lcmap,&cmaparray)); 3193 for (i=0; i<Bn; i++) { 3194 if (PetscRealPart(xarray[i]) > -1.0) { 3195 idx[count] = i; /* local column index in off-diagonal part B */ 3196 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3197 count++; 3198 } 3199 } 3200 PetscCall(VecRestoreArray(lvec,&xarray)); 3201 PetscCall(VecRestoreArray(lcmap,&cmaparray)); 3202 3203 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3204 /* cannot ensure iscol_o has same blocksize as iscol! */ 3205 3206 PetscCall(PetscFree(idx)); 3207 *garray = cmap1; 3208 3209 PetscCall(VecDestroy(&x)); 3210 PetscCall(VecDestroy(&cmap)); 3211 PetscCall(VecDestroy(&lcmap)); 3212 PetscFunctionReturn(0); 3213 } 3214 3215 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3216 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3217 { 3218 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3219 Mat M = NULL; 3220 MPI_Comm comm; 3221 IS iscol_d,isrow_d,iscol_o; 3222 Mat Asub = NULL,Bsub = NULL; 3223 PetscInt n; 3224 3225 PetscFunctionBegin; 3226 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3227 3228 if (call == MAT_REUSE_MATRIX) { 3229 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3230 PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 3231 PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3232 3233 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 3234 PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3235 3236 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 3237 PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3238 3239 /* Update diagonal and off-diagonal portions of submat */ 3240 asub = (Mat_MPIAIJ*)(*submat)->data; 3241 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 3242 PetscCall(ISGetLocalSize(iscol_o,&n)); 3243 if (n) { 3244 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 3245 } 3246 PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 3247 PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 3248 3249 } else { /* call == MAT_INITIAL_MATRIX) */ 3250 const PetscInt *garray; 3251 PetscInt BsubN; 3252 3253 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3254 PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 3255 3256 /* Create local submatrices Asub and Bsub */ 3257 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 3258 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 3259 3260 /* Create submatrix M */ 3261 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 3262 3263 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3264 asub = (Mat_MPIAIJ*)M->data; 3265 3266 PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3267 n = asub->B->cmap->N; 3268 if (BsubN > n) { 3269 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3270 const PetscInt *idx; 3271 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3272 PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 3273 3274 PetscCall(PetscMalloc1(n,&idx_new)); 3275 j = 0; 3276 PetscCall(ISGetIndices(iscol_o,&idx)); 3277 for (i=0; i<n; i++) { 3278 if (j >= BsubN) break; 3279 while (subgarray[i] > garray[j]) j++; 3280 3281 if (subgarray[i] == garray[j]) { 3282 idx_new[i] = idx[j++]; 3283 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3284 } 3285 PetscCall(ISRestoreIndices(iscol_o,&idx)); 3286 3287 PetscCall(ISDestroy(&iscol_o)); 3288 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 3289 3290 } else if (BsubN < n) { 3291 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3292 } 3293 3294 PetscCall(PetscFree(garray)); 3295 *submat = M; 3296 3297 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3298 PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 3299 PetscCall(ISDestroy(&isrow_d)); 3300 3301 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 3302 PetscCall(ISDestroy(&iscol_d)); 3303 3304 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 3305 PetscCall(ISDestroy(&iscol_o)); 3306 } 3307 PetscFunctionReturn(0); 3308 } 3309 3310 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3311 { 3312 IS iscol_local=NULL,isrow_d; 3313 PetscInt csize; 3314 PetscInt n,i,j,start,end; 3315 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3316 MPI_Comm comm; 3317 3318 PetscFunctionBegin; 3319 /* If isrow has same processor distribution as mat, 3320 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3321 if (call == MAT_REUSE_MATRIX) { 3322 PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3323 if (isrow_d) { 3324 sameRowDist = PETSC_TRUE; 3325 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3326 } else { 3327 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3328 if (iscol_local) { 3329 sameRowDist = PETSC_TRUE; 3330 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3331 } 3332 } 3333 } else { 3334 /* Check if isrow has same processor distribution as mat */ 3335 sameDist[0] = PETSC_FALSE; 3336 PetscCall(ISGetLocalSize(isrow,&n)); 3337 if (!n) { 3338 sameDist[0] = PETSC_TRUE; 3339 } else { 3340 PetscCall(ISGetMinMax(isrow,&i,&j)); 3341 PetscCall(MatGetOwnershipRange(mat,&start,&end)); 3342 if (i >= start && j < end) { 3343 sameDist[0] = PETSC_TRUE; 3344 } 3345 } 3346 3347 /* Check if iscol has same processor distribution as mat */ 3348 sameDist[1] = PETSC_FALSE; 3349 PetscCall(ISGetLocalSize(iscol,&n)); 3350 if (!n) { 3351 sameDist[1] = PETSC_TRUE; 3352 } else { 3353 PetscCall(ISGetMinMax(iscol,&i,&j)); 3354 PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 3355 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3356 } 3357 3358 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3359 PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 3360 sameRowDist = tsameDist[0]; 3361 } 3362 3363 if (sameRowDist) { 3364 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3365 /* isrow and iscol have same processor distribution as mat */ 3366 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 3367 PetscFunctionReturn(0); 3368 } else { /* sameRowDist */ 3369 /* isrow has same processor distribution as mat */ 3370 if (call == MAT_INITIAL_MATRIX) { 3371 PetscBool sorted; 3372 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3373 PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 3374 PetscCall(ISGetSize(iscol,&i)); 3375 PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3376 3377 PetscCall(ISSorted(iscol_local,&sorted)); 3378 if (sorted) { 3379 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3380 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 3381 PetscFunctionReturn(0); 3382 } 3383 } else { /* call == MAT_REUSE_MATRIX */ 3384 IS iscol_sub; 3385 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3386 if (iscol_sub) { 3387 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 3388 PetscFunctionReturn(0); 3389 } 3390 } 3391 } 3392 } 3393 3394 /* General case: iscol -> iscol_local which has global size of iscol */ 3395 if (call == MAT_REUSE_MATRIX) { 3396 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 3397 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3398 } else { 3399 if (!iscol_local) { 3400 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3401 } 3402 } 3403 3404 PetscCall(ISGetLocalSize(iscol,&csize)); 3405 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 3406 3407 if (call == MAT_INITIAL_MATRIX) { 3408 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 3409 PetscCall(ISDestroy(&iscol_local)); 3410 } 3411 PetscFunctionReturn(0); 3412 } 3413 3414 /*@C 3415 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3416 and "off-diagonal" part of the matrix in CSR format. 3417 3418 Collective 3419 3420 Input Parameters: 3421 + comm - MPI communicator 3422 . A - "diagonal" portion of matrix 3423 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3424 - garray - global index of B columns 3425 3426 Output Parameter: 3427 . mat - the matrix, with input A as its local diagonal matrix 3428 Level: advanced 3429 3430 Notes: 3431 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3432 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3433 3434 .seealso: `MatCreateMPIAIJWithSplitArrays()` 3435 @*/ 3436 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3437 { 3438 Mat_MPIAIJ *maij; 3439 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3440 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3441 const PetscScalar *oa; 3442 Mat Bnew; 3443 PetscInt m,n,N; 3444 3445 PetscFunctionBegin; 3446 PetscCall(MatCreate(comm,mat)); 3447 PetscCall(MatGetSize(A,&m,&n)); 3448 PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3449 PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3450 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3451 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3452 3453 /* Get global columns of mat */ 3454 PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3455 3456 PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 3457 PetscCall(MatSetType(*mat,MATMPIAIJ)); 3458 PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3459 maij = (Mat_MPIAIJ*)(*mat)->data; 3460 3461 (*mat)->preallocated = PETSC_TRUE; 3462 3463 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3464 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3465 3466 /* Set A as diagonal portion of *mat */ 3467 maij->A = A; 3468 3469 nz = oi[m]; 3470 for (i=0; i<nz; i++) { 3471 col = oj[i]; 3472 oj[i] = garray[col]; 3473 } 3474 3475 /* Set Bnew as off-diagonal portion of *mat */ 3476 PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 3477 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 3478 PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3479 bnew = (Mat_SeqAIJ*)Bnew->data; 3480 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3481 maij->B = Bnew; 3482 3483 PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3484 3485 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3486 b->free_a = PETSC_FALSE; 3487 b->free_ij = PETSC_FALSE; 3488 PetscCall(MatDestroy(&B)); 3489 3490 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3491 bnew->free_a = PETSC_TRUE; 3492 bnew->free_ij = PETSC_TRUE; 3493 3494 /* condense columns of maij->B */ 3495 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3496 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3497 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3498 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 3499 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3500 PetscFunctionReturn(0); 3501 } 3502 3503 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3504 3505 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3506 { 3507 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3508 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3509 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3510 Mat M,Msub,B=a->B; 3511 MatScalar *aa; 3512 Mat_SeqAIJ *aij; 3513 PetscInt *garray = a->garray,*colsub,Ncols; 3514 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3515 IS iscol_sub,iscmap; 3516 const PetscInt *is_idx,*cmap; 3517 PetscBool allcolumns=PETSC_FALSE; 3518 MPI_Comm comm; 3519 3520 PetscFunctionBegin; 3521 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3522 if (call == MAT_REUSE_MATRIX) { 3523 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3524 PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3525 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3526 3527 PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 3528 PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3529 3530 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 3531 PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3532 3533 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3534 3535 } else { /* call == MAT_INITIAL_MATRIX) */ 3536 PetscBool flg; 3537 3538 PetscCall(ISGetLocalSize(iscol,&n)); 3539 PetscCall(ISGetSize(iscol,&Ncols)); 3540 3541 /* (1) iscol -> nonscalable iscol_local */ 3542 /* Check for special case: each processor gets entire matrix columns */ 3543 PetscCall(ISIdentity(iscol_local,&flg)); 3544 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3545 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3546 if (allcolumns) { 3547 iscol_sub = iscol_local; 3548 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3549 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3550 3551 } else { 3552 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3553 PetscInt *idx,*cmap1,k; 3554 PetscCall(PetscMalloc1(Ncols,&idx)); 3555 PetscCall(PetscMalloc1(Ncols,&cmap1)); 3556 PetscCall(ISGetIndices(iscol_local,&is_idx)); 3557 count = 0; 3558 k = 0; 3559 for (i=0; i<Ncols; i++) { 3560 j = is_idx[i]; 3561 if (j >= cstart && j < cend) { 3562 /* diagonal part of mat */ 3563 idx[count] = j; 3564 cmap1[count++] = i; /* column index in submat */ 3565 } else if (Bn) { 3566 /* off-diagonal part of mat */ 3567 if (j == garray[k]) { 3568 idx[count] = j; 3569 cmap1[count++] = i; /* column index in submat */ 3570 } else if (j > garray[k]) { 3571 while (j > garray[k] && k < Bn-1) k++; 3572 if (j == garray[k]) { 3573 idx[count] = j; 3574 cmap1[count++] = i; /* column index in submat */ 3575 } 3576 } 3577 } 3578 } 3579 PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 3580 3581 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 3582 PetscCall(ISGetBlockSize(iscol,&cbs)); 3583 PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3584 3585 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3586 } 3587 3588 /* (3) Create sequential Msub */ 3589 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3590 } 3591 3592 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3593 aij = (Mat_SeqAIJ*)(Msub)->data; 3594 ii = aij->i; 3595 PetscCall(ISGetIndices(iscmap,&cmap)); 3596 3597 /* 3598 m - number of local rows 3599 Ncols - number of columns (same on all processors) 3600 rstart - first row in new global matrix generated 3601 */ 3602 PetscCall(MatGetSize(Msub,&m,NULL)); 3603 3604 if (call == MAT_INITIAL_MATRIX) { 3605 /* (4) Create parallel newmat */ 3606 PetscMPIInt rank,size; 3607 PetscInt csize; 3608 3609 PetscCallMPI(MPI_Comm_size(comm,&size)); 3610 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3611 3612 /* 3613 Determine the number of non-zeros in the diagonal and off-diagonal 3614 portions of the matrix in order to do correct preallocation 3615 */ 3616 3617 /* first get start and end of "diagonal" columns */ 3618 PetscCall(ISGetLocalSize(iscol,&csize)); 3619 if (csize == PETSC_DECIDE) { 3620 PetscCall(ISGetSize(isrow,&mglobal)); 3621 if (mglobal == Ncols) { /* square matrix */ 3622 nlocal = m; 3623 } else { 3624 nlocal = Ncols/size + ((Ncols % size) > rank); 3625 } 3626 } else { 3627 nlocal = csize; 3628 } 3629 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3630 rstart = rend - nlocal; 3631 PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3632 3633 /* next, compute all the lengths */ 3634 jj = aij->j; 3635 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3636 olens = dlens + m; 3637 for (i=0; i<m; i++) { 3638 jend = ii[i+1] - ii[i]; 3639 olen = 0; 3640 dlen = 0; 3641 for (j=0; j<jend; j++) { 3642 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3643 else dlen++; 3644 jj++; 3645 } 3646 olens[i] = olen; 3647 dlens[i] = dlen; 3648 } 3649 3650 PetscCall(ISGetBlockSize(isrow,&bs)); 3651 PetscCall(ISGetBlockSize(iscol,&cbs)); 3652 3653 PetscCall(MatCreate(comm,&M)); 3654 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 3655 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3656 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3657 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3658 PetscCall(PetscFree(dlens)); 3659 3660 } else { /* call == MAT_REUSE_MATRIX */ 3661 M = *newmat; 3662 PetscCall(MatGetLocalSize(M,&i,NULL)); 3663 PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3664 PetscCall(MatZeroEntries(M)); 3665 /* 3666 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3667 rather than the slower MatSetValues(). 3668 */ 3669 M->was_assembled = PETSC_TRUE; 3670 M->assembled = PETSC_FALSE; 3671 } 3672 3673 /* (5) Set values of Msub to *newmat */ 3674 PetscCall(PetscMalloc1(count,&colsub)); 3675 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 3676 3677 jj = aij->j; 3678 PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3679 for (i=0; i<m; i++) { 3680 row = rstart + i; 3681 nz = ii[i+1] - ii[i]; 3682 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3683 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 3684 jj += nz; aa += nz; 3685 } 3686 PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 3687 PetscCall(ISRestoreIndices(iscmap,&cmap)); 3688 3689 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3690 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3691 3692 PetscCall(PetscFree(colsub)); 3693 3694 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3695 if (call == MAT_INITIAL_MATRIX) { 3696 *newmat = M; 3697 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 3698 PetscCall(MatDestroy(&Msub)); 3699 3700 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 3701 PetscCall(ISDestroy(&iscol_sub)); 3702 3703 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 3704 PetscCall(ISDestroy(&iscmap)); 3705 3706 if (iscol_local) { 3707 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 3708 PetscCall(ISDestroy(&iscol_local)); 3709 } 3710 } 3711 PetscFunctionReturn(0); 3712 } 3713 3714 /* 3715 Not great since it makes two copies of the submatrix, first an SeqAIJ 3716 in local and then by concatenating the local matrices the end result. 3717 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3718 3719 Note: This requires a sequential iscol with all indices. 3720 */ 3721 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3722 { 3723 PetscMPIInt rank,size; 3724 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3725 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3726 Mat M,Mreuse; 3727 MatScalar *aa,*vwork; 3728 MPI_Comm comm; 3729 Mat_SeqAIJ *aij; 3730 PetscBool colflag,allcolumns=PETSC_FALSE; 3731 3732 PetscFunctionBegin; 3733 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3734 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3735 PetscCallMPI(MPI_Comm_size(comm,&size)); 3736 3737 /* Check for special case: each processor gets entire matrix columns */ 3738 PetscCall(ISIdentity(iscol,&colflag)); 3739 PetscCall(ISGetLocalSize(iscol,&n)); 3740 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3741 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3742 3743 if (call == MAT_REUSE_MATRIX) { 3744 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 3745 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3746 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3747 } else { 3748 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3749 } 3750 3751 /* 3752 m - number of local rows 3753 n - number of columns (same on all processors) 3754 rstart - first row in new global matrix generated 3755 */ 3756 PetscCall(MatGetSize(Mreuse,&m,&n)); 3757 PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3758 if (call == MAT_INITIAL_MATRIX) { 3759 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3760 ii = aij->i; 3761 jj = aij->j; 3762 3763 /* 3764 Determine the number of non-zeros in the diagonal and off-diagonal 3765 portions of the matrix in order to do correct preallocation 3766 */ 3767 3768 /* first get start and end of "diagonal" columns */ 3769 if (csize == PETSC_DECIDE) { 3770 PetscCall(ISGetSize(isrow,&mglobal)); 3771 if (mglobal == n) { /* square matrix */ 3772 nlocal = m; 3773 } else { 3774 nlocal = n/size + ((n % size) > rank); 3775 } 3776 } else { 3777 nlocal = csize; 3778 } 3779 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3780 rstart = rend - nlocal; 3781 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3782 3783 /* next, compute all the lengths */ 3784 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3785 olens = dlens + m; 3786 for (i=0; i<m; i++) { 3787 jend = ii[i+1] - ii[i]; 3788 olen = 0; 3789 dlen = 0; 3790 for (j=0; j<jend; j++) { 3791 if (*jj < rstart || *jj >= rend) olen++; 3792 else dlen++; 3793 jj++; 3794 } 3795 olens[i] = olen; 3796 dlens[i] = dlen; 3797 } 3798 PetscCall(MatCreate(comm,&M)); 3799 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 3800 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3801 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3802 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3803 PetscCall(PetscFree(dlens)); 3804 } else { 3805 PetscInt ml,nl; 3806 3807 M = *newmat; 3808 PetscCall(MatGetLocalSize(M,&ml,&nl)); 3809 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3810 PetscCall(MatZeroEntries(M)); 3811 /* 3812 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3813 rather than the slower MatSetValues(). 3814 */ 3815 M->was_assembled = PETSC_TRUE; 3816 M->assembled = PETSC_FALSE; 3817 } 3818 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3819 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3820 ii = aij->i; 3821 jj = aij->j; 3822 3823 /* trigger copy to CPU if needed */ 3824 PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3825 for (i=0; i<m; i++) { 3826 row = rstart + i; 3827 nz = ii[i+1] - ii[i]; 3828 cwork = jj; jj += nz; 3829 vwork = aa; aa += nz; 3830 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3831 } 3832 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3833 3834 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3835 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3836 *newmat = M; 3837 3838 /* save submatrix used in processor for next request */ 3839 if (call == MAT_INITIAL_MATRIX) { 3840 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 3841 PetscCall(MatDestroy(&Mreuse)); 3842 } 3843 PetscFunctionReturn(0); 3844 } 3845 3846 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3847 { 3848 PetscInt m,cstart, cend,j,nnz,i,d,*ld; 3849 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3850 const PetscInt *JJ; 3851 PetscBool nooffprocentries; 3852 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)B->data; 3853 3854 PetscFunctionBegin; 3855 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3856 3857 PetscCall(PetscLayoutSetUp(B->rmap)); 3858 PetscCall(PetscLayoutSetUp(B->cmap)); 3859 m = B->rmap->n; 3860 cstart = B->cmap->rstart; 3861 cend = B->cmap->rend; 3862 rstart = B->rmap->rstart; 3863 3864 PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3865 3866 if (PetscDefined(USE_DEBUG)) { 3867 for (i=0; i<m; i++) { 3868 nnz = Ii[i+1]- Ii[i]; 3869 JJ = J + Ii[i]; 3870 PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3871 PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3872 PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3873 } 3874 } 3875 3876 for (i=0; i<m; i++) { 3877 nnz = Ii[i+1]- Ii[i]; 3878 JJ = J + Ii[i]; 3879 nnz_max = PetscMax(nnz_max,nnz); 3880 d = 0; 3881 for (j=0; j<nnz; j++) { 3882 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3883 } 3884 d_nnz[i] = d; 3885 o_nnz[i] = nnz - d; 3886 } 3887 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 3888 PetscCall(PetscFree2(d_nnz,o_nnz)); 3889 3890 for (i=0; i<m; i++) { 3891 ii = i + rstart; 3892 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3893 } 3894 nooffprocentries = B->nooffprocentries; 3895 B->nooffprocentries = PETSC_TRUE; 3896 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3897 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3898 B->nooffprocentries = nooffprocentries; 3899 3900 /* count number of entries below block diagonal */ 3901 PetscCall(PetscFree(Aij->ld)); 3902 PetscCall(PetscCalloc1(m,&ld)); 3903 Aij->ld = ld; 3904 for (i=0; i<m; i++) { 3905 nnz = Ii[i+1] - Ii[i]; 3906 j = 0; 3907 while (j < nnz && J[j] < cstart) {j++;} 3908 ld[i] = j; 3909 J += nnz; 3910 } 3911 3912 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3913 PetscFunctionReturn(0); 3914 } 3915 3916 /*@ 3917 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3918 (the default parallel PETSc format). 3919 3920 Collective 3921 3922 Input Parameters: 3923 + B - the matrix 3924 . i - the indices into j for the start of each local row (starts with zero) 3925 . j - the column indices for each local row (starts with zero) 3926 - v - optional values in the matrix 3927 3928 Level: developer 3929 3930 Notes: 3931 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3932 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3933 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3934 3935 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3936 3937 The format which is used for the sparse matrix input, is equivalent to a 3938 row-major ordering.. i.e for the following matrix, the input data expected is 3939 as shown 3940 3941 $ 1 0 0 3942 $ 2 0 3 P0 3943 $ ------- 3944 $ 4 5 6 P1 3945 $ 3946 $ Process0 [P0]: rows_owned=[0,1] 3947 $ i = {0,1,3} [size = nrow+1 = 2+1] 3948 $ j = {0,0,2} [size = 3] 3949 $ v = {1,2,3} [size = 3] 3950 $ 3951 $ Process1 [P1]: rows_owned=[2] 3952 $ i = {0,3} [size = nrow+1 = 1+1] 3953 $ j = {0,1,2} [size = 3] 3954 $ v = {4,5,6} [size = 3] 3955 3956 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3957 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3958 @*/ 3959 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3960 { 3961 PetscFunctionBegin; 3962 PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3963 PetscFunctionReturn(0); 3964 } 3965 3966 /*@C 3967 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3968 (the default parallel PETSc format). For good matrix assembly performance 3969 the user should preallocate the matrix storage by setting the parameters 3970 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3971 performance can be increased by more than a factor of 50. 3972 3973 Collective 3974 3975 Input Parameters: 3976 + B - the matrix 3977 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3978 (same value is used for all local rows) 3979 . d_nnz - array containing the number of nonzeros in the various rows of the 3980 DIAGONAL portion of the local submatrix (possibly different for each row) 3981 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3982 The size of this array is equal to the number of local rows, i.e 'm'. 3983 For matrices that will be factored, you must leave room for (and set) 3984 the diagonal entry even if it is zero. 3985 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3986 submatrix (same value is used for all local rows). 3987 - o_nnz - array containing the number of nonzeros in the various rows of the 3988 OFF-DIAGONAL portion of the local submatrix (possibly different for 3989 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3990 structure. The size of this array is equal to the number 3991 of local rows, i.e 'm'. 3992 3993 If the *_nnz parameter is given then the *_nz parameter is ignored 3994 3995 The AIJ format (also called the Yale sparse matrix format or 3996 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3997 storage. The stored row and column indices begin with zero. 3998 See Users-Manual: ch_mat for details. 3999 4000 The parallel matrix is partitioned such that the first m0 rows belong to 4001 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4002 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4003 4004 The DIAGONAL portion of the local submatrix of a processor can be defined 4005 as the submatrix which is obtained by extraction the part corresponding to 4006 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4007 first row that belongs to the processor, r2 is the last row belonging to 4008 the this processor, and c1-c2 is range of indices of the local part of a 4009 vector suitable for applying the matrix to. This is an mxn matrix. In the 4010 common case of a square matrix, the row and column ranges are the same and 4011 the DIAGONAL part is also square. The remaining portion of the local 4012 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4013 4014 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4015 4016 You can call MatGetInfo() to get information on how effective the preallocation was; 4017 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4018 You can also run with the option -info and look for messages with the string 4019 malloc in them to see if additional memory allocation was needed. 4020 4021 Example usage: 4022 4023 Consider the following 8x8 matrix with 34 non-zero values, that is 4024 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4025 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4026 as follows: 4027 4028 .vb 4029 1 2 0 | 0 3 0 | 0 4 4030 Proc0 0 5 6 | 7 0 0 | 8 0 4031 9 0 10 | 11 0 0 | 12 0 4032 ------------------------------------- 4033 13 0 14 | 15 16 17 | 0 0 4034 Proc1 0 18 0 | 19 20 21 | 0 0 4035 0 0 0 | 22 23 0 | 24 0 4036 ------------------------------------- 4037 Proc2 25 26 27 | 0 0 28 | 29 0 4038 30 0 0 | 31 32 33 | 0 34 4039 .ve 4040 4041 This can be represented as a collection of submatrices as: 4042 4043 .vb 4044 A B C 4045 D E F 4046 G H I 4047 .ve 4048 4049 Where the submatrices A,B,C are owned by proc0, D,E,F are 4050 owned by proc1, G,H,I are owned by proc2. 4051 4052 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4053 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4054 The 'M','N' parameters are 8,8, and have the same values on all procs. 4055 4056 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4057 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4058 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4059 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4060 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4061 matrix, ans [DF] as another SeqAIJ matrix. 4062 4063 When d_nz, o_nz parameters are specified, d_nz storage elements are 4064 allocated for every row of the local diagonal submatrix, and o_nz 4065 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4066 One way to choose d_nz and o_nz is to use the max nonzerors per local 4067 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4068 In this case, the values of d_nz,o_nz are: 4069 .vb 4070 proc0 : dnz = 2, o_nz = 2 4071 proc1 : dnz = 3, o_nz = 2 4072 proc2 : dnz = 1, o_nz = 4 4073 .ve 4074 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4075 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4076 for proc3. i.e we are using 12+15+10=37 storage locations to store 4077 34 values. 4078 4079 When d_nnz, o_nnz parameters are specified, the storage is specified 4080 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4081 In the above case the values for d_nnz,o_nnz are: 4082 .vb 4083 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4084 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4085 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4086 .ve 4087 Here the space allocated is sum of all the above values i.e 34, and 4088 hence pre-allocation is perfect. 4089 4090 Level: intermediate 4091 4092 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4093 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4094 @*/ 4095 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4096 { 4097 PetscFunctionBegin; 4098 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4099 PetscValidType(B,1); 4100 PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4101 PetscFunctionReturn(0); 4102 } 4103 4104 /*@ 4105 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4106 CSR format for the local rows. 4107 4108 Collective 4109 4110 Input Parameters: 4111 + comm - MPI communicator 4112 . m - number of local rows (Cannot be PETSC_DECIDE) 4113 . n - This value should be the same as the local size used in creating the 4114 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4115 calculated if N is given) For square matrices n is almost always m. 4116 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4117 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4118 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4119 . j - column indices 4120 - a - matrix values 4121 4122 Output Parameter: 4123 . mat - the matrix 4124 4125 Level: intermediate 4126 4127 Notes: 4128 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4129 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4130 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4131 4132 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4133 4134 The format which is used for the sparse matrix input, is equivalent to a 4135 row-major ordering.. i.e for the following matrix, the input data expected is 4136 as shown 4137 4138 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4139 4140 $ 1 0 0 4141 $ 2 0 3 P0 4142 $ ------- 4143 $ 4 5 6 P1 4144 $ 4145 $ Process0 [P0]: rows_owned=[0,1] 4146 $ i = {0,1,3} [size = nrow+1 = 2+1] 4147 $ j = {0,0,2} [size = 3] 4148 $ v = {1,2,3} [size = 3] 4149 $ 4150 $ Process1 [P1]: rows_owned=[2] 4151 $ i = {0,3} [size = nrow+1 = 1+1] 4152 $ j = {0,1,2} [size = 3] 4153 $ v = {4,5,6} [size = 3] 4154 4155 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4156 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4157 @*/ 4158 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4159 { 4160 PetscFunctionBegin; 4161 PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4162 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4163 PetscCall(MatCreate(comm,mat)); 4164 PetscCall(MatSetSizes(*mat,m,n,M,N)); 4165 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4166 PetscCall(MatSetType(*mat,MATMPIAIJ)); 4167 PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 4168 PetscFunctionReturn(0); 4169 } 4170 4171 /*@ 4172 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4173 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from MatCreateMPIAIJWithArrays() 4174 4175 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4176 4177 Collective 4178 4179 Input Parameters: 4180 + mat - the matrix 4181 . m - number of local rows (Cannot be PETSC_DECIDE) 4182 . n - This value should be the same as the local size used in creating the 4183 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4184 calculated if N is given) For square matrices n is almost always m. 4185 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4186 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4187 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4188 . J - column indices 4189 - v - matrix values 4190 4191 Level: intermediate 4192 4193 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4194 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4195 @*/ 4196 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4197 { 4198 PetscInt nnz,i; 4199 PetscBool nooffprocentries; 4200 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4201 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4202 PetscScalar *ad,*ao; 4203 PetscInt ldi,Iii,md; 4204 const PetscInt *Adi = Ad->i; 4205 PetscInt *ld = Aij->ld; 4206 4207 PetscFunctionBegin; 4208 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4209 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4210 PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4211 PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4212 4213 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4214 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4215 4216 for (i=0; i<m; i++) { 4217 nnz = Ii[i+1]- Ii[i]; 4218 Iii = Ii[i]; 4219 ldi = ld[i]; 4220 md = Adi[i+1]-Adi[i]; 4221 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4222 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4223 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4224 ad += md; 4225 ao += nnz - md; 4226 } 4227 nooffprocentries = mat->nooffprocentries; 4228 mat->nooffprocentries = PETSC_TRUE; 4229 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4230 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4231 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4232 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4233 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4234 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4235 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4236 mat->nooffprocentries = nooffprocentries; 4237 PetscFunctionReturn(0); 4238 } 4239 4240 /*@ 4241 MatUpdateMPIAIJWithArray - updates an MPI AIJ matrix using an array that contains the nonzero values 4242 4243 Collective 4244 4245 Input Parameters: 4246 + mat - the matrix 4247 - v - matrix values, stored by row 4248 4249 Level: intermediate 4250 4251 Notes: 4252 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4253 4254 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4255 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4256 @*/ 4257 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat,const PetscScalar v[]) 4258 { 4259 PetscInt nnz,i,m; 4260 PetscBool nooffprocentries; 4261 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4262 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4263 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)Aij->B->data; 4264 PetscScalar *ad,*ao; 4265 const PetscInt *Adi = Ad->i,*Adj = Ao->i; 4266 PetscInt ldi,Iii,md; 4267 PetscInt *ld = Aij->ld; 4268 4269 PetscFunctionBegin; 4270 m = mat->rmap->n; 4271 4272 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4273 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4274 Iii = 0; 4275 for (i=0; i<m; i++) { 4276 nnz = Adi[i+1]-Adi[i] + Adj[i+1]-Adj[i]; 4277 ldi = ld[i]; 4278 md = Adi[i+1]-Adi[i]; 4279 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4280 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4281 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4282 ad += md; 4283 ao += nnz - md; 4284 Iii += nnz; 4285 } 4286 nooffprocentries = mat->nooffprocentries; 4287 mat->nooffprocentries = PETSC_TRUE; 4288 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4289 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4290 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4291 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4292 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4293 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4294 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4295 mat->nooffprocentries = nooffprocentries; 4296 PetscFunctionReturn(0); 4297 } 4298 4299 /*@C 4300 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4301 (the default parallel PETSc format). For good matrix assembly performance 4302 the user should preallocate the matrix storage by setting the parameters 4303 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4304 performance can be increased by more than a factor of 50. 4305 4306 Collective 4307 4308 Input Parameters: 4309 + comm - MPI communicator 4310 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4311 This value should be the same as the local size used in creating the 4312 y vector for the matrix-vector product y = Ax. 4313 . n - This value should be the same as the local size used in creating the 4314 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4315 calculated if N is given) For square matrices n is almost always m. 4316 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4317 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4318 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4319 (same value is used for all local rows) 4320 . d_nnz - array containing the number of nonzeros in the various rows of the 4321 DIAGONAL portion of the local submatrix (possibly different for each row) 4322 or NULL, if d_nz is used to specify the nonzero structure. 4323 The size of this array is equal to the number of local rows, i.e 'm'. 4324 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4325 submatrix (same value is used for all local rows). 4326 - o_nnz - array containing the number of nonzeros in the various rows of the 4327 OFF-DIAGONAL portion of the local submatrix (possibly different for 4328 each row) or NULL, if o_nz is used to specify the nonzero 4329 structure. The size of this array is equal to the number 4330 of local rows, i.e 'm'. 4331 4332 Output Parameter: 4333 . A - the matrix 4334 4335 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4336 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4337 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4338 4339 Notes: 4340 If the *_nnz parameter is given then the *_nz parameter is ignored 4341 4342 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4343 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4344 storage requirements for this matrix. 4345 4346 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4347 processor than it must be used on all processors that share the object for 4348 that argument. 4349 4350 The user MUST specify either the local or global matrix dimensions 4351 (possibly both). 4352 4353 The parallel matrix is partitioned across processors such that the 4354 first m0 rows belong to process 0, the next m1 rows belong to 4355 process 1, the next m2 rows belong to process 2 etc.. where 4356 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4357 values corresponding to [m x N] submatrix. 4358 4359 The columns are logically partitioned with the n0 columns belonging 4360 to 0th partition, the next n1 columns belonging to the next 4361 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4362 4363 The DIAGONAL portion of the local submatrix on any given processor 4364 is the submatrix corresponding to the rows and columns m,n 4365 corresponding to the given processor. i.e diagonal matrix on 4366 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4367 etc. The remaining portion of the local submatrix [m x (N-n)] 4368 constitute the OFF-DIAGONAL portion. The example below better 4369 illustrates this concept. 4370 4371 For a square global matrix we define each processor's diagonal portion 4372 to be its local rows and the corresponding columns (a square submatrix); 4373 each processor's off-diagonal portion encompasses the remainder of the 4374 local matrix (a rectangular submatrix). 4375 4376 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4377 4378 When calling this routine with a single process communicator, a matrix of 4379 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4380 type of communicator, use the construction mechanism 4381 .vb 4382 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4383 .ve 4384 4385 $ MatCreate(...,&A); 4386 $ MatSetType(A,MATMPIAIJ); 4387 $ MatSetSizes(A, m,n,M,N); 4388 $ MatMPIAIJSetPreallocation(A,...); 4389 4390 By default, this format uses inodes (identical nodes) when possible. 4391 We search for consecutive rows with the same nonzero structure, thereby 4392 reusing matrix information to achieve increased efficiency. 4393 4394 Options Database Keys: 4395 + -mat_no_inode - Do not use inodes 4396 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4397 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4398 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4399 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4400 4401 Example usage: 4402 4403 Consider the following 8x8 matrix with 34 non-zero values, that is 4404 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4405 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4406 as follows 4407 4408 .vb 4409 1 2 0 | 0 3 0 | 0 4 4410 Proc0 0 5 6 | 7 0 0 | 8 0 4411 9 0 10 | 11 0 0 | 12 0 4412 ------------------------------------- 4413 13 0 14 | 15 16 17 | 0 0 4414 Proc1 0 18 0 | 19 20 21 | 0 0 4415 0 0 0 | 22 23 0 | 24 0 4416 ------------------------------------- 4417 Proc2 25 26 27 | 0 0 28 | 29 0 4418 30 0 0 | 31 32 33 | 0 34 4419 .ve 4420 4421 This can be represented as a collection of submatrices as 4422 4423 .vb 4424 A B C 4425 D E F 4426 G H I 4427 .ve 4428 4429 Where the submatrices A,B,C are owned by proc0, D,E,F are 4430 owned by proc1, G,H,I are owned by proc2. 4431 4432 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4433 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4434 The 'M','N' parameters are 8,8, and have the same values on all procs. 4435 4436 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4437 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4438 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4439 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4440 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4441 matrix, ans [DF] as another SeqAIJ matrix. 4442 4443 When d_nz, o_nz parameters are specified, d_nz storage elements are 4444 allocated for every row of the local diagonal submatrix, and o_nz 4445 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4446 One way to choose d_nz and o_nz is to use the max nonzerors per local 4447 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4448 In this case, the values of d_nz,o_nz are 4449 .vb 4450 proc0 : dnz = 2, o_nz = 2 4451 proc1 : dnz = 3, o_nz = 2 4452 proc2 : dnz = 1, o_nz = 4 4453 .ve 4454 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4455 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4456 for proc3. i.e we are using 12+15+10=37 storage locations to store 4457 34 values. 4458 4459 When d_nnz, o_nnz parameters are specified, the storage is specified 4460 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4461 In the above case the values for d_nnz,o_nnz are 4462 .vb 4463 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4464 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4465 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4466 .ve 4467 Here the space allocated is sum of all the above values i.e 34, and 4468 hence pre-allocation is perfect. 4469 4470 Level: intermediate 4471 4472 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4473 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4474 @*/ 4475 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4476 { 4477 PetscMPIInt size; 4478 4479 PetscFunctionBegin; 4480 PetscCall(MatCreate(comm,A)); 4481 PetscCall(MatSetSizes(*A,m,n,M,N)); 4482 PetscCallMPI(MPI_Comm_size(comm,&size)); 4483 if (size > 1) { 4484 PetscCall(MatSetType(*A,MATMPIAIJ)); 4485 PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4486 } else { 4487 PetscCall(MatSetType(*A,MATSEQAIJ)); 4488 PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4489 } 4490 PetscFunctionReturn(0); 4491 } 4492 4493 /*@C 4494 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4495 4496 Not collective 4497 4498 Input Parameter: 4499 . A - The MPIAIJ matrix 4500 4501 Output Parameters: 4502 + Ad - The local diagonal block as a SeqAIJ matrix 4503 . Ao - The local off-diagonal block as a SeqAIJ matrix 4504 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4505 4506 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4507 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4508 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4509 local column numbers to global column numbers in the original matrix. 4510 4511 Level: intermediate 4512 4513 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4514 @*/ 4515 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4516 { 4517 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4518 PetscBool flg; 4519 4520 PetscFunctionBegin; 4521 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 4522 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4523 if (Ad) *Ad = a->A; 4524 if (Ao) *Ao = a->B; 4525 if (colmap) *colmap = a->garray; 4526 PetscFunctionReturn(0); 4527 } 4528 4529 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4530 { 4531 PetscInt m,N,i,rstart,nnz,Ii; 4532 PetscInt *indx; 4533 PetscScalar *values; 4534 MatType rootType; 4535 4536 PetscFunctionBegin; 4537 PetscCall(MatGetSize(inmat,&m,&N)); 4538 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4539 PetscInt *dnz,*onz,sum,bs,cbs; 4540 4541 if (n == PETSC_DECIDE) { 4542 PetscCall(PetscSplitOwnership(comm,&n,&N)); 4543 } 4544 /* Check sum(n) = N */ 4545 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 4546 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4547 4548 PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 4549 rstart -= m; 4550 4551 MatPreallocateBegin(comm,m,n,dnz,onz); 4552 for (i=0; i<m; i++) { 4553 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4554 PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 4555 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4556 } 4557 4558 PetscCall(MatCreate(comm,outmat)); 4559 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4560 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 4561 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 4562 PetscCall(MatGetRootType_Private(inmat,&rootType)); 4563 PetscCall(MatSetType(*outmat,rootType)); 4564 PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 4565 PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4566 MatPreallocateEnd(dnz,onz); 4567 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 4568 } 4569 4570 /* numeric phase */ 4571 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 4572 for (i=0; i<m; i++) { 4573 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4574 Ii = i + rstart; 4575 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 4576 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4577 } 4578 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 4579 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4580 PetscFunctionReturn(0); 4581 } 4582 4583 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4584 { 4585 PetscMPIInt rank; 4586 PetscInt m,N,i,rstart,nnz; 4587 size_t len; 4588 const PetscInt *indx; 4589 PetscViewer out; 4590 char *name; 4591 Mat B; 4592 const PetscScalar *values; 4593 4594 PetscFunctionBegin; 4595 PetscCall(MatGetLocalSize(A,&m,NULL)); 4596 PetscCall(MatGetSize(A,NULL,&N)); 4597 /* Should this be the type of the diagonal block of A? */ 4598 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 4599 PetscCall(MatSetSizes(B,m,N,m,N)); 4600 PetscCall(MatSetBlockSizesFromMats(B,A,A)); 4601 PetscCall(MatSetType(B,MATSEQAIJ)); 4602 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 4603 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4604 for (i=0; i<m; i++) { 4605 PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 4606 PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 4607 PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4608 } 4609 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 4610 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4611 4612 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 4613 PetscCall(PetscStrlen(outfile,&len)); 4614 PetscCall(PetscMalloc1(len+6,&name)); 4615 PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 4616 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 4617 PetscCall(PetscFree(name)); 4618 PetscCall(MatView(B,out)); 4619 PetscCall(PetscViewerDestroy(&out)); 4620 PetscCall(MatDestroy(&B)); 4621 PetscFunctionReturn(0); 4622 } 4623 4624 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4625 { 4626 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4627 4628 PetscFunctionBegin; 4629 if (!merge) PetscFunctionReturn(0); 4630 PetscCall(PetscFree(merge->id_r)); 4631 PetscCall(PetscFree(merge->len_s)); 4632 PetscCall(PetscFree(merge->len_r)); 4633 PetscCall(PetscFree(merge->bi)); 4634 PetscCall(PetscFree(merge->bj)); 4635 PetscCall(PetscFree(merge->buf_ri[0])); 4636 PetscCall(PetscFree(merge->buf_ri)); 4637 PetscCall(PetscFree(merge->buf_rj[0])); 4638 PetscCall(PetscFree(merge->buf_rj)); 4639 PetscCall(PetscFree(merge->coi)); 4640 PetscCall(PetscFree(merge->coj)); 4641 PetscCall(PetscFree(merge->owners_co)); 4642 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4643 PetscCall(PetscFree(merge)); 4644 PetscFunctionReturn(0); 4645 } 4646 4647 #include <../src/mat/utils/freespace.h> 4648 #include <petscbt.h> 4649 4650 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4651 { 4652 MPI_Comm comm; 4653 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4654 PetscMPIInt size,rank,taga,*len_s; 4655 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4656 PetscInt proc,m; 4657 PetscInt **buf_ri,**buf_rj; 4658 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4659 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4660 MPI_Request *s_waits,*r_waits; 4661 MPI_Status *status; 4662 const MatScalar *aa,*a_a; 4663 MatScalar **abuf_r,*ba_i; 4664 Mat_Merge_SeqsToMPI *merge; 4665 PetscContainer container; 4666 4667 PetscFunctionBegin; 4668 PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 4669 PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 4670 4671 PetscCallMPI(MPI_Comm_size(comm,&size)); 4672 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4673 4674 PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 4675 PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4676 PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 4677 PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4678 aa = a_a; 4679 4680 bi = merge->bi; 4681 bj = merge->bj; 4682 buf_ri = merge->buf_ri; 4683 buf_rj = merge->buf_rj; 4684 4685 PetscCall(PetscMalloc1(size,&status)); 4686 owners = merge->rowmap->range; 4687 len_s = merge->len_s; 4688 4689 /* send and recv matrix values */ 4690 /*-----------------------------*/ 4691 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 4692 PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 4693 4694 PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 4695 for (proc=0,k=0; proc<size; proc++) { 4696 if (!len_s[proc]) continue; 4697 i = owners[proc]; 4698 PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 4699 k++; 4700 } 4701 4702 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 4703 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 4704 PetscCall(PetscFree(status)); 4705 4706 PetscCall(PetscFree(s_waits)); 4707 PetscCall(PetscFree(r_waits)); 4708 4709 /* insert mat values of mpimat */ 4710 /*----------------------------*/ 4711 PetscCall(PetscMalloc1(N,&ba_i)); 4712 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4713 4714 for (k=0; k<merge->nrecv; k++) { 4715 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4716 nrows = *(buf_ri_k[k]); 4717 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4718 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4719 } 4720 4721 /* set values of ba */ 4722 m = merge->rowmap->n; 4723 for (i=0; i<m; i++) { 4724 arow = owners[rank] + i; 4725 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4726 bnzi = bi[i+1] - bi[i]; 4727 PetscCall(PetscArrayzero(ba_i,bnzi)); 4728 4729 /* add local non-zero vals of this proc's seqmat into ba */ 4730 anzi = ai[arow+1] - ai[arow]; 4731 aj = a->j + ai[arow]; 4732 aa = a_a + ai[arow]; 4733 nextaj = 0; 4734 for (j=0; nextaj<anzi; j++) { 4735 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4736 ba_i[j] += aa[nextaj++]; 4737 } 4738 } 4739 4740 /* add received vals into ba */ 4741 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4742 /* i-th row */ 4743 if (i == *nextrow[k]) { 4744 anzi = *(nextai[k]+1) - *nextai[k]; 4745 aj = buf_rj[k] + *(nextai[k]); 4746 aa = abuf_r[k] + *(nextai[k]); 4747 nextaj = 0; 4748 for (j=0; nextaj<anzi; j++) { 4749 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4750 ba_i[j] += aa[nextaj++]; 4751 } 4752 } 4753 nextrow[k]++; nextai[k]++; 4754 } 4755 } 4756 PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 4757 } 4758 PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 4759 PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 4760 PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 4761 4762 PetscCall(PetscFree(abuf_r[0])); 4763 PetscCall(PetscFree(abuf_r)); 4764 PetscCall(PetscFree(ba_i)); 4765 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4766 PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 4767 PetscFunctionReturn(0); 4768 } 4769 4770 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4771 { 4772 Mat B_mpi; 4773 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4774 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4775 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4776 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4777 PetscInt len,proc,*dnz,*onz,bs,cbs; 4778 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4779 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4780 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4781 MPI_Status *status; 4782 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4783 PetscBT lnkbt; 4784 Mat_Merge_SeqsToMPI *merge; 4785 PetscContainer container; 4786 4787 PetscFunctionBegin; 4788 PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 4789 4790 /* make sure it is a PETSc comm */ 4791 PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 4792 PetscCallMPI(MPI_Comm_size(comm,&size)); 4793 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4794 4795 PetscCall(PetscNew(&merge)); 4796 PetscCall(PetscMalloc1(size,&status)); 4797 4798 /* determine row ownership */ 4799 /*---------------------------------------------------------*/ 4800 PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 4801 PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 4802 PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 4803 PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 4804 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4805 PetscCall(PetscMalloc1(size,&len_si)); 4806 PetscCall(PetscMalloc1(size,&merge->len_s)); 4807 4808 m = merge->rowmap->n; 4809 owners = merge->rowmap->range; 4810 4811 /* determine the number of messages to send, their lengths */ 4812 /*---------------------------------------------------------*/ 4813 len_s = merge->len_s; 4814 4815 len = 0; /* length of buf_si[] */ 4816 merge->nsend = 0; 4817 for (proc=0; proc<size; proc++) { 4818 len_si[proc] = 0; 4819 if (proc == rank) { 4820 len_s[proc] = 0; 4821 } else { 4822 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4823 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4824 } 4825 if (len_s[proc]) { 4826 merge->nsend++; 4827 nrows = 0; 4828 for (i=owners[proc]; i<owners[proc+1]; i++) { 4829 if (ai[i+1] > ai[i]) nrows++; 4830 } 4831 len_si[proc] = 2*(nrows+1); 4832 len += len_si[proc]; 4833 } 4834 } 4835 4836 /* determine the number and length of messages to receive for ij-structure */ 4837 /*-------------------------------------------------------------------------*/ 4838 PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 4839 PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4840 4841 /* post the Irecv of j-structure */ 4842 /*-------------------------------*/ 4843 PetscCall(PetscCommGetNewTag(comm,&tagj)); 4844 PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 4845 4846 /* post the Isend of j-structure */ 4847 /*--------------------------------*/ 4848 PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 4849 4850 for (proc=0, k=0; proc<size; proc++) { 4851 if (!len_s[proc]) continue; 4852 i = owners[proc]; 4853 PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 4854 k++; 4855 } 4856 4857 /* receives and sends of j-structure are complete */ 4858 /*------------------------------------------------*/ 4859 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 4860 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 4861 4862 /* send and recv i-structure */ 4863 /*---------------------------*/ 4864 PetscCall(PetscCommGetNewTag(comm,&tagi)); 4865 PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 4866 4867 PetscCall(PetscMalloc1(len+1,&buf_s)); 4868 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4869 for (proc=0,k=0; proc<size; proc++) { 4870 if (!len_s[proc]) continue; 4871 /* form outgoing message for i-structure: 4872 buf_si[0]: nrows to be sent 4873 [1:nrows]: row index (global) 4874 [nrows+1:2*nrows+1]: i-structure index 4875 */ 4876 /*-------------------------------------------*/ 4877 nrows = len_si[proc]/2 - 1; 4878 buf_si_i = buf_si + nrows+1; 4879 buf_si[0] = nrows; 4880 buf_si_i[0] = 0; 4881 nrows = 0; 4882 for (i=owners[proc]; i<owners[proc+1]; i++) { 4883 anzi = ai[i+1] - ai[i]; 4884 if (anzi) { 4885 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4886 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4887 nrows++; 4888 } 4889 } 4890 PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 4891 k++; 4892 buf_si += len_si[proc]; 4893 } 4894 4895 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 4896 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 4897 4898 PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 4899 for (i=0; i<merge->nrecv; i++) { 4900 PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 4901 } 4902 4903 PetscCall(PetscFree(len_si)); 4904 PetscCall(PetscFree(len_ri)); 4905 PetscCall(PetscFree(rj_waits)); 4906 PetscCall(PetscFree2(si_waits,sj_waits)); 4907 PetscCall(PetscFree(ri_waits)); 4908 PetscCall(PetscFree(buf_s)); 4909 PetscCall(PetscFree(status)); 4910 4911 /* compute a local seq matrix in each processor */ 4912 /*----------------------------------------------*/ 4913 /* allocate bi array and free space for accumulating nonzero column info */ 4914 PetscCall(PetscMalloc1(m+1,&bi)); 4915 bi[0] = 0; 4916 4917 /* create and initialize a linked list */ 4918 nlnk = N+1; 4919 PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 4920 4921 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4922 len = ai[owners[rank+1]] - ai[owners[rank]]; 4923 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 4924 4925 current_space = free_space; 4926 4927 /* determine symbolic info for each local row */ 4928 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4929 4930 for (k=0; k<merge->nrecv; k++) { 4931 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4932 nrows = *buf_ri_k[k]; 4933 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4934 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4935 } 4936 4937 MatPreallocateBegin(comm,m,n,dnz,onz); 4938 len = 0; 4939 for (i=0; i<m; i++) { 4940 bnzi = 0; 4941 /* add local non-zero cols of this proc's seqmat into lnk */ 4942 arow = owners[rank] + i; 4943 anzi = ai[arow+1] - ai[arow]; 4944 aj = a->j + ai[arow]; 4945 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4946 bnzi += nlnk; 4947 /* add received col data into lnk */ 4948 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4949 if (i == *nextrow[k]) { /* i-th row */ 4950 anzi = *(nextai[k]+1) - *nextai[k]; 4951 aj = buf_rj[k] + *nextai[k]; 4952 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4953 bnzi += nlnk; 4954 nextrow[k]++; nextai[k]++; 4955 } 4956 } 4957 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4958 4959 /* if free space is not available, make more free space */ 4960 if (current_space->local_remaining<bnzi) { 4961 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 4962 nspacedouble++; 4963 } 4964 /* copy data into free space, then initialize lnk */ 4965 PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 4966 PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4967 4968 current_space->array += bnzi; 4969 current_space->local_used += bnzi; 4970 current_space->local_remaining -= bnzi; 4971 4972 bi[i+1] = bi[i] + bnzi; 4973 } 4974 4975 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4976 4977 PetscCall(PetscMalloc1(bi[m]+1,&bj)); 4978 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4979 PetscCall(PetscLLDestroy(lnk,lnkbt)); 4980 4981 /* create symbolic parallel matrix B_mpi */ 4982 /*---------------------------------------*/ 4983 PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 4984 PetscCall(MatCreate(comm,&B_mpi)); 4985 if (n==PETSC_DECIDE) { 4986 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 4987 } else { 4988 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4989 } 4990 PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 4991 PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 4992 PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 4993 MatPreallocateEnd(dnz,onz); 4994 PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 4995 4996 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4997 B_mpi->assembled = PETSC_FALSE; 4998 merge->bi = bi; 4999 merge->bj = bj; 5000 merge->buf_ri = buf_ri; 5001 merge->buf_rj = buf_rj; 5002 merge->coi = NULL; 5003 merge->coj = NULL; 5004 merge->owners_co = NULL; 5005 5006 PetscCall(PetscCommDestroy(&comm)); 5007 5008 /* attach the supporting struct to B_mpi for reuse */ 5009 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 5010 PetscCall(PetscContainerSetPointer(container,merge)); 5011 PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 5012 PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 5013 PetscCall(PetscContainerDestroy(&container)); 5014 *mpimat = B_mpi; 5015 5016 PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 5017 PetscFunctionReturn(0); 5018 } 5019 5020 /*@C 5021 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5022 matrices from each processor 5023 5024 Collective 5025 5026 Input Parameters: 5027 + comm - the communicators the parallel matrix will live on 5028 . seqmat - the input sequential matrices 5029 . m - number of local rows (or PETSC_DECIDE) 5030 . n - number of local columns (or PETSC_DECIDE) 5031 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5032 5033 Output Parameter: 5034 . mpimat - the parallel matrix generated 5035 5036 Level: advanced 5037 5038 Notes: 5039 The dimensions of the sequential matrix in each processor MUST be the same. 5040 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5041 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5042 @*/ 5043 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5044 { 5045 PetscMPIInt size; 5046 5047 PetscFunctionBegin; 5048 PetscCallMPI(MPI_Comm_size(comm,&size)); 5049 if (size == 1) { 5050 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 5051 if (scall == MAT_INITIAL_MATRIX) { 5052 PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 5053 } else { 5054 PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 5055 } 5056 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 5057 PetscFunctionReturn(0); 5058 } 5059 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 5060 if (scall == MAT_INITIAL_MATRIX) { 5061 PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 5062 } 5063 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 5064 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 5065 PetscFunctionReturn(0); 5066 } 5067 5068 /*@ 5069 MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5070 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5071 with MatGetSize() 5072 5073 Not Collective 5074 5075 Input Parameters: 5076 + A - the matrix 5077 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5078 5079 Output Parameter: 5080 . A_loc - the local sequential matrix generated 5081 5082 Level: developer 5083 5084 Notes: 5085 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5086 5087 Destroy the matrix with MatDestroy() 5088 5089 .seealso: MatMPIAIJGetLocalMat() 5090 5091 @*/ 5092 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc) 5093 { 5094 PetscBool mpi; 5095 5096 PetscFunctionBegin; 5097 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi)); 5098 if (mpi) { 5099 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc)); 5100 } else { 5101 *A_loc = A; 5102 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5103 } 5104 PetscFunctionReturn(0); 5105 } 5106 5107 /*@ 5108 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5109 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5110 with MatGetSize() 5111 5112 Not Collective 5113 5114 Input Parameters: 5115 + A - the matrix 5116 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5117 5118 Output Parameter: 5119 . A_loc - the local sequential matrix generated 5120 5121 Level: developer 5122 5123 Notes: 5124 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5125 5126 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5127 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5128 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5129 modify the values of the returned A_loc. 5130 5131 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5132 @*/ 5133 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5134 { 5135 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5136 Mat_SeqAIJ *mat,*a,*b; 5137 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5138 const PetscScalar *aa,*ba,*aav,*bav; 5139 PetscScalar *ca,*cam; 5140 PetscMPIInt size; 5141 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5142 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5143 PetscBool match; 5144 5145 PetscFunctionBegin; 5146 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 5147 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5148 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5149 if (size == 1) { 5150 if (scall == MAT_INITIAL_MATRIX) { 5151 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5152 *A_loc = mpimat->A; 5153 } else if (scall == MAT_REUSE_MATRIX) { 5154 PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 5155 } 5156 PetscFunctionReturn(0); 5157 } 5158 5159 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5160 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5161 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5162 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5163 PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 5164 PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5165 aa = aav; 5166 ba = bav; 5167 if (scall == MAT_INITIAL_MATRIX) { 5168 PetscCall(PetscMalloc1(1+am,&ci)); 5169 ci[0] = 0; 5170 for (i=0; i<am; i++) { 5171 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5172 } 5173 PetscCall(PetscMalloc1(1+ci[am],&cj)); 5174 PetscCall(PetscMalloc1(1+ci[am],&ca)); 5175 k = 0; 5176 for (i=0; i<am; i++) { 5177 ncols_o = bi[i+1] - bi[i]; 5178 ncols_d = ai[i+1] - ai[i]; 5179 /* off-diagonal portion of A */ 5180 for (jo=0; jo<ncols_o; jo++) { 5181 col = cmap[*bj]; 5182 if (col >= cstart) break; 5183 cj[k] = col; bj++; 5184 ca[k++] = *ba++; 5185 } 5186 /* diagonal portion of A */ 5187 for (j=0; j<ncols_d; j++) { 5188 cj[k] = cstart + *aj++; 5189 ca[k++] = *aa++; 5190 } 5191 /* off-diagonal portion of A */ 5192 for (j=jo; j<ncols_o; j++) { 5193 cj[k] = cmap[*bj++]; 5194 ca[k++] = *ba++; 5195 } 5196 } 5197 /* put together the new matrix */ 5198 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5199 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5200 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5201 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5202 mat->free_a = PETSC_TRUE; 5203 mat->free_ij = PETSC_TRUE; 5204 mat->nonew = 0; 5205 } else if (scall == MAT_REUSE_MATRIX) { 5206 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5207 ci = mat->i; 5208 cj = mat->j; 5209 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 5210 for (i=0; i<am; i++) { 5211 /* off-diagonal portion of A */ 5212 ncols_o = bi[i+1] - bi[i]; 5213 for (jo=0; jo<ncols_o; jo++) { 5214 col = cmap[*bj]; 5215 if (col >= cstart) break; 5216 *cam++ = *ba++; bj++; 5217 } 5218 /* diagonal portion of A */ 5219 ncols_d = ai[i+1] - ai[i]; 5220 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5221 /* off-diagonal portion of A */ 5222 for (j=jo; j<ncols_o; j++) { 5223 *cam++ = *ba++; bj++; 5224 } 5225 } 5226 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 5227 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5228 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 5229 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 5230 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5231 PetscFunctionReturn(0); 5232 } 5233 5234 /*@ 5235 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5236 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5237 5238 Not Collective 5239 5240 Input Parameters: 5241 + A - the matrix 5242 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5243 5244 Output Parameters: 5245 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5246 - A_loc - the local sequential matrix generated 5247 5248 Level: developer 5249 5250 Notes: 5251 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5252 5253 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5254 5255 @*/ 5256 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5257 { 5258 Mat Ao,Ad; 5259 const PetscInt *cmap; 5260 PetscMPIInt size; 5261 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5262 5263 PetscFunctionBegin; 5264 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 5265 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5266 if (size == 1) { 5267 if (scall == MAT_INITIAL_MATRIX) { 5268 PetscCall(PetscObjectReference((PetscObject)Ad)); 5269 *A_loc = Ad; 5270 } else if (scall == MAT_REUSE_MATRIX) { 5271 PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5272 } 5273 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5274 PetscFunctionReturn(0); 5275 } 5276 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 5277 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5278 if (f) { 5279 PetscCall((*f)(A,scall,glob,A_loc)); 5280 } else { 5281 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5282 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5283 Mat_SeqAIJ *c; 5284 PetscInt *ai = a->i, *aj = a->j; 5285 PetscInt *bi = b->i, *bj = b->j; 5286 PetscInt *ci,*cj; 5287 const PetscScalar *aa,*ba; 5288 PetscScalar *ca; 5289 PetscInt i,j,am,dn,on; 5290 5291 PetscCall(MatGetLocalSize(Ad,&am,&dn)); 5292 PetscCall(MatGetLocalSize(Ao,NULL,&on)); 5293 PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 5294 PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5295 if (scall == MAT_INITIAL_MATRIX) { 5296 PetscInt k; 5297 PetscCall(PetscMalloc1(1+am,&ci)); 5298 PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 5299 PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5300 ci[0] = 0; 5301 for (i=0,k=0; i<am; i++) { 5302 const PetscInt ncols_o = bi[i+1] - bi[i]; 5303 const PetscInt ncols_d = ai[i+1] - ai[i]; 5304 ci[i+1] = ci[i] + ncols_o + ncols_d; 5305 /* diagonal portion of A */ 5306 for (j=0; j<ncols_d; j++,k++) { 5307 cj[k] = *aj++; 5308 ca[k] = *aa++; 5309 } 5310 /* off-diagonal portion of A */ 5311 for (j=0; j<ncols_o; j++,k++) { 5312 cj[k] = dn + *bj++; 5313 ca[k] = *ba++; 5314 } 5315 } 5316 /* put together the new matrix */ 5317 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5318 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5319 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5320 c = (Mat_SeqAIJ*)(*A_loc)->data; 5321 c->free_a = PETSC_TRUE; 5322 c->free_ij = PETSC_TRUE; 5323 c->nonew = 0; 5324 PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5325 } else if (scall == MAT_REUSE_MATRIX) { 5326 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5327 for (i=0; i<am; i++) { 5328 const PetscInt ncols_d = ai[i+1] - ai[i]; 5329 const PetscInt ncols_o = bi[i+1] - bi[i]; 5330 /* diagonal portion of A */ 5331 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5332 /* off-diagonal portion of A */ 5333 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5334 } 5335 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 5336 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5337 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 5338 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5339 if (glob) { 5340 PetscInt cst, *gidx; 5341 5342 PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 5343 PetscCall(PetscMalloc1(dn+on,&gidx)); 5344 for (i=0; i<dn; i++) gidx[i] = cst + i; 5345 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5346 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5347 } 5348 } 5349 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5350 PetscFunctionReturn(0); 5351 } 5352 5353 /*@C 5354 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5355 5356 Not Collective 5357 5358 Input Parameters: 5359 + A - the matrix 5360 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5361 - row, col - index sets of rows and columns to extract (or NULL) 5362 5363 Output Parameter: 5364 . A_loc - the local sequential matrix generated 5365 5366 Level: developer 5367 5368 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5369 5370 @*/ 5371 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5372 { 5373 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5374 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5375 IS isrowa,iscola; 5376 Mat *aloc; 5377 PetscBool match; 5378 5379 PetscFunctionBegin; 5380 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 5381 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5382 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 5383 if (!row) { 5384 start = A->rmap->rstart; end = A->rmap->rend; 5385 PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 5386 } else { 5387 isrowa = *row; 5388 } 5389 if (!col) { 5390 start = A->cmap->rstart; 5391 cmap = a->garray; 5392 nzA = a->A->cmap->n; 5393 nzB = a->B->cmap->n; 5394 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5395 ncols = 0; 5396 for (i=0; i<nzB; i++) { 5397 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5398 else break; 5399 } 5400 imark = i; 5401 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5402 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5403 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 5404 } else { 5405 iscola = *col; 5406 } 5407 if (scall != MAT_INITIAL_MATRIX) { 5408 PetscCall(PetscMalloc1(1,&aloc)); 5409 aloc[0] = *A_loc; 5410 } 5411 PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5412 if (!col) { /* attach global id of condensed columns */ 5413 PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5414 } 5415 *A_loc = aloc[0]; 5416 PetscCall(PetscFree(aloc)); 5417 if (!row) { 5418 PetscCall(ISDestroy(&isrowa)); 5419 } 5420 if (!col) { 5421 PetscCall(ISDestroy(&iscola)); 5422 } 5423 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 5424 PetscFunctionReturn(0); 5425 } 5426 5427 /* 5428 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5429 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5430 * on a global size. 5431 * */ 5432 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5433 { 5434 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5435 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5436 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5437 PetscMPIInt owner; 5438 PetscSFNode *iremote,*oiremote; 5439 const PetscInt *lrowindices; 5440 PetscSF sf,osf; 5441 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5442 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5443 MPI_Comm comm; 5444 ISLocalToGlobalMapping mapping; 5445 const PetscScalar *pd_a,*po_a; 5446 5447 PetscFunctionBegin; 5448 PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 5449 /* plocalsize is the number of roots 5450 * nrows is the number of leaves 5451 * */ 5452 PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 5453 PetscCall(ISGetLocalSize(rows,&nrows)); 5454 PetscCall(PetscCalloc1(nrows,&iremote)); 5455 PetscCall(ISGetIndices(rows,&lrowindices)); 5456 for (i=0;i<nrows;i++) { 5457 /* Find a remote index and an owner for a row 5458 * The row could be local or remote 5459 * */ 5460 owner = 0; 5461 lidx = 0; 5462 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 5463 iremote[i].index = lidx; 5464 iremote[i].rank = owner; 5465 } 5466 /* Create SF to communicate how many nonzero columns for each row */ 5467 PetscCall(PetscSFCreate(comm,&sf)); 5468 /* SF will figure out the number of nonzero colunms for each row, and their 5469 * offsets 5470 * */ 5471 PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5472 PetscCall(PetscSFSetFromOptions(sf)); 5473 PetscCall(PetscSFSetUp(sf)); 5474 5475 PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 5476 PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 5477 PetscCall(PetscCalloc1(nrows,&pnnz)); 5478 roffsets[0] = 0; 5479 roffsets[1] = 0; 5480 for (i=0;i<plocalsize;i++) { 5481 /* diag */ 5482 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5483 /* off diag */ 5484 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5485 /* compute offsets so that we relative location for each row */ 5486 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5487 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5488 } 5489 PetscCall(PetscCalloc1(2*nrows,&nlcols)); 5490 PetscCall(PetscCalloc1(2*nrows,&loffsets)); 5491 /* 'r' means root, and 'l' means leaf */ 5492 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5493 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5494 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5495 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5496 PetscCall(PetscSFDestroy(&sf)); 5497 PetscCall(PetscFree(roffsets)); 5498 PetscCall(PetscFree(nrcols)); 5499 dntotalcols = 0; 5500 ontotalcols = 0; 5501 ncol = 0; 5502 for (i=0;i<nrows;i++) { 5503 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5504 ncol = PetscMax(pnnz[i],ncol); 5505 /* diag */ 5506 dntotalcols += nlcols[i*2+0]; 5507 /* off diag */ 5508 ontotalcols += nlcols[i*2+1]; 5509 } 5510 /* We do not need to figure the right number of columns 5511 * since all the calculations will be done by going through the raw data 5512 * */ 5513 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 5514 PetscCall(MatSetUp(*P_oth)); 5515 PetscCall(PetscFree(pnnz)); 5516 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5517 /* diag */ 5518 PetscCall(PetscCalloc1(dntotalcols,&iremote)); 5519 /* off diag */ 5520 PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 5521 /* diag */ 5522 PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 5523 /* off diag */ 5524 PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 5525 dntotalcols = 0; 5526 ontotalcols = 0; 5527 ntotalcols = 0; 5528 for (i=0;i<nrows;i++) { 5529 owner = 0; 5530 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 5531 /* Set iremote for diag matrix */ 5532 for (j=0;j<nlcols[i*2+0];j++) { 5533 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5534 iremote[dntotalcols].rank = owner; 5535 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5536 ilocal[dntotalcols++] = ntotalcols++; 5537 } 5538 /* off diag */ 5539 for (j=0;j<nlcols[i*2+1];j++) { 5540 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5541 oiremote[ontotalcols].rank = owner; 5542 oilocal[ontotalcols++] = ntotalcols++; 5543 } 5544 } 5545 PetscCall(ISRestoreIndices(rows,&lrowindices)); 5546 PetscCall(PetscFree(loffsets)); 5547 PetscCall(PetscFree(nlcols)); 5548 PetscCall(PetscSFCreate(comm,&sf)); 5549 /* P serves as roots and P_oth is leaves 5550 * Diag matrix 5551 * */ 5552 PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5553 PetscCall(PetscSFSetFromOptions(sf)); 5554 PetscCall(PetscSFSetUp(sf)); 5555 5556 PetscCall(PetscSFCreate(comm,&osf)); 5557 /* Off diag */ 5558 PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 5559 PetscCall(PetscSFSetFromOptions(osf)); 5560 PetscCall(PetscSFSetUp(osf)); 5561 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5562 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5563 /* We operate on the matrix internal data for saving memory */ 5564 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5565 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5566 PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 5567 /* Convert to global indices for diag matrix */ 5568 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5569 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5570 /* We want P_oth store global indices */ 5571 PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 5572 /* Use memory scalable approach */ 5573 PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 5574 PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 5575 PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5576 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5577 /* Convert back to local indices */ 5578 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5579 PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5580 nout = 0; 5581 PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 5582 PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5583 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5584 /* Exchange values */ 5585 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5586 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5587 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5588 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5589 /* Stop PETSc from shrinking memory */ 5590 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5591 PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 5592 PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 5593 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5594 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 5595 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 5596 PetscCall(PetscSFDestroy(&sf)); 5597 PetscCall(PetscSFDestroy(&osf)); 5598 PetscFunctionReturn(0); 5599 } 5600 5601 /* 5602 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5603 * This supports MPIAIJ and MAIJ 5604 * */ 5605 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5606 { 5607 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5608 Mat_SeqAIJ *p_oth; 5609 IS rows,map; 5610 PetscHMapI hamp; 5611 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5612 MPI_Comm comm; 5613 PetscSF sf,osf; 5614 PetscBool has; 5615 5616 PetscFunctionBegin; 5617 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5618 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 5619 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5620 * and then create a submatrix (that often is an overlapping matrix) 5621 * */ 5622 if (reuse == MAT_INITIAL_MATRIX) { 5623 /* Use a hash table to figure out unique keys */ 5624 PetscCall(PetscHMapICreate(&hamp)); 5625 PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 5626 PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5627 count = 0; 5628 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5629 for (i=0;i<a->B->cmap->n;i++) { 5630 key = a->garray[i]/dof; 5631 PetscCall(PetscHMapIHas(hamp,key,&has)); 5632 if (!has) { 5633 mapping[i] = count; 5634 PetscCall(PetscHMapISet(hamp,key,count++)); 5635 } else { 5636 /* Current 'i' has the same value the previous step */ 5637 mapping[i] = count-1; 5638 } 5639 } 5640 PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 5641 PetscCall(PetscHMapIGetSize(hamp,&htsize)); 5642 PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5643 PetscCall(PetscCalloc1(htsize,&rowindices)); 5644 off = 0; 5645 PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 5646 PetscCall(PetscHMapIDestroy(&hamp)); 5647 PetscCall(PetscSortInt(htsize,rowindices)); 5648 PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 5649 /* In case, the matrix was already created but users want to recreate the matrix */ 5650 PetscCall(MatDestroy(P_oth)); 5651 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 5652 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 5653 PetscCall(ISDestroy(&map)); 5654 PetscCall(ISDestroy(&rows)); 5655 } else if (reuse == MAT_REUSE_MATRIX) { 5656 /* If matrix was already created, we simply update values using SF objects 5657 * that as attached to the matrix ealier. 5658 */ 5659 const PetscScalar *pd_a,*po_a; 5660 5661 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 5662 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 5663 PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5664 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5665 /* Update values in place */ 5666 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5667 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5668 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5669 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5670 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5671 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5672 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5673 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5674 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5675 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 5676 PetscFunctionReturn(0); 5677 } 5678 5679 /*@C 5680 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5681 5682 Collective on Mat 5683 5684 Input Parameters: 5685 + A - the first matrix in mpiaij format 5686 . B - the second matrix in mpiaij format 5687 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5688 5689 Output Parameters: 5690 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5691 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5692 - B_seq - the sequential matrix generated 5693 5694 Level: developer 5695 5696 @*/ 5697 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5698 { 5699 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5700 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5701 IS isrowb,iscolb; 5702 Mat *bseq=NULL; 5703 5704 PetscFunctionBegin; 5705 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5706 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5707 } 5708 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 5709 5710 if (scall == MAT_INITIAL_MATRIX) { 5711 start = A->cmap->rstart; 5712 cmap = a->garray; 5713 nzA = a->A->cmap->n; 5714 nzB = a->B->cmap->n; 5715 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5716 ncols = 0; 5717 for (i=0; i<nzB; i++) { /* row < local row index */ 5718 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5719 else break; 5720 } 5721 imark = i; 5722 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5723 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5724 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 5725 PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 5726 } else { 5727 PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5728 isrowb = *rowb; iscolb = *colb; 5729 PetscCall(PetscMalloc1(1,&bseq)); 5730 bseq[0] = *B_seq; 5731 } 5732 PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 5733 *B_seq = bseq[0]; 5734 PetscCall(PetscFree(bseq)); 5735 if (!rowb) { 5736 PetscCall(ISDestroy(&isrowb)); 5737 } else { 5738 *rowb = isrowb; 5739 } 5740 if (!colb) { 5741 PetscCall(ISDestroy(&iscolb)); 5742 } else { 5743 *colb = iscolb; 5744 } 5745 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 5746 PetscFunctionReturn(0); 5747 } 5748 5749 /* 5750 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5751 of the OFF-DIAGONAL portion of local A 5752 5753 Collective on Mat 5754 5755 Input Parameters: 5756 + A,B - the matrices in mpiaij format 5757 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5758 5759 Output Parameter: 5760 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5761 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5762 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5763 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5764 5765 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5766 for this matrix. This is not desirable.. 5767 5768 Level: developer 5769 5770 */ 5771 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5772 { 5773 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5774 Mat_SeqAIJ *b_oth; 5775 VecScatter ctx; 5776 MPI_Comm comm; 5777 const PetscMPIInt *rprocs,*sprocs; 5778 const PetscInt *srow,*rstarts,*sstarts; 5779 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5780 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5781 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5782 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5783 PetscMPIInt size,tag,rank,nreqs; 5784 5785 PetscFunctionBegin; 5786 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5787 PetscCallMPI(MPI_Comm_size(comm,&size)); 5788 5789 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5790 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5791 } 5792 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 5793 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5794 5795 if (size == 1) { 5796 startsj_s = NULL; 5797 bufa_ptr = NULL; 5798 *B_oth = NULL; 5799 PetscFunctionReturn(0); 5800 } 5801 5802 ctx = a->Mvctx; 5803 tag = ((PetscObject)ctx)->tag; 5804 5805 PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5806 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5807 PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 5808 PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 5809 PetscCall(PetscMalloc1(nreqs,&reqs)); 5810 rwaits = reqs; 5811 swaits = reqs + nrecvs; 5812 5813 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5814 if (scall == MAT_INITIAL_MATRIX) { 5815 /* i-array */ 5816 /*---------*/ 5817 /* post receives */ 5818 if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5819 for (i=0; i<nrecvs; i++) { 5820 rowlen = rvalues + rstarts[i]*rbs; 5821 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5822 PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5823 } 5824 5825 /* pack the outgoing message */ 5826 PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 5827 5828 sstartsj[0] = 0; 5829 rstartsj[0] = 0; 5830 len = 0; /* total length of j or a array to be sent */ 5831 if (nsends) { 5832 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5833 PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 5834 } 5835 for (i=0; i<nsends; i++) { 5836 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5837 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5838 for (j=0; j<nrows; j++) { 5839 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5840 for (l=0; l<sbs; l++) { 5841 PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 5842 5843 rowlen[j*sbs+l] = ncols; 5844 5845 len += ncols; 5846 PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5847 } 5848 k++; 5849 } 5850 PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5851 5852 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5853 } 5854 /* recvs and sends of i-array are completed */ 5855 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5856 PetscCall(PetscFree(svalues)); 5857 5858 /* allocate buffers for sending j and a arrays */ 5859 PetscCall(PetscMalloc1(len+1,&bufj)); 5860 PetscCall(PetscMalloc1(len+1,&bufa)); 5861 5862 /* create i-array of B_oth */ 5863 PetscCall(PetscMalloc1(aBn+2,&b_othi)); 5864 5865 b_othi[0] = 0; 5866 len = 0; /* total length of j or a array to be received */ 5867 k = 0; 5868 for (i=0; i<nrecvs; i++) { 5869 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5870 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5871 for (j=0; j<nrows; j++) { 5872 b_othi[k+1] = b_othi[k] + rowlen[j]; 5873 PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5874 k++; 5875 } 5876 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5877 } 5878 PetscCall(PetscFree(rvalues)); 5879 5880 /* allocate space for j and a arrays of B_oth */ 5881 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 5882 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5883 5884 /* j-array */ 5885 /*---------*/ 5886 /* post receives of j-array */ 5887 for (i=0; i<nrecvs; i++) { 5888 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5889 PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5890 } 5891 5892 /* pack the outgoing message j-array */ 5893 if (nsends) k = sstarts[0]; 5894 for (i=0; i<nsends; i++) { 5895 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5896 bufJ = bufj+sstartsj[i]; 5897 for (j=0; j<nrows; j++) { 5898 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5899 for (ll=0; ll<sbs; ll++) { 5900 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5901 for (l=0; l<ncols; l++) { 5902 *bufJ++ = cols[l]; 5903 } 5904 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5905 } 5906 } 5907 PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5908 } 5909 5910 /* recvs and sends of j-array are completed */ 5911 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5912 } else if (scall == MAT_REUSE_MATRIX) { 5913 sstartsj = *startsj_s; 5914 rstartsj = *startsj_r; 5915 bufa = *bufa_ptr; 5916 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5917 PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5918 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5919 5920 /* a-array */ 5921 /*---------*/ 5922 /* post receives of a-array */ 5923 for (i=0; i<nrecvs; i++) { 5924 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5925 PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 5926 } 5927 5928 /* pack the outgoing message a-array */ 5929 if (nsends) k = sstarts[0]; 5930 for (i=0; i<nsends; i++) { 5931 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5932 bufA = bufa+sstartsj[i]; 5933 for (j=0; j<nrows; j++) { 5934 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5935 for (ll=0; ll<sbs; ll++) { 5936 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5937 for (l=0; l<ncols; l++) { 5938 *bufA++ = vals[l]; 5939 } 5940 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5941 } 5942 } 5943 PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5944 } 5945 /* recvs and sends of a-array are completed */ 5946 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5947 PetscCall(PetscFree(reqs)); 5948 5949 if (scall == MAT_INITIAL_MATRIX) { 5950 /* put together the new matrix */ 5951 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5952 5953 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5954 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5955 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5956 b_oth->free_a = PETSC_TRUE; 5957 b_oth->free_ij = PETSC_TRUE; 5958 b_oth->nonew = 0; 5959 5960 PetscCall(PetscFree(bufj)); 5961 if (!startsj_s || !bufa_ptr) { 5962 PetscCall(PetscFree2(sstartsj,rstartsj)); 5963 PetscCall(PetscFree(bufa_ptr)); 5964 } else { 5965 *startsj_s = sstartsj; 5966 *startsj_r = rstartsj; 5967 *bufa_ptr = bufa; 5968 } 5969 } else if (scall == MAT_REUSE_MATRIX) { 5970 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5971 } 5972 5973 PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5974 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 5975 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5976 PetscFunctionReturn(0); 5977 } 5978 5979 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5980 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5981 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5982 #if defined(PETSC_HAVE_MKL_SPARSE) 5983 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5984 #endif 5985 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5986 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5987 #if defined(PETSC_HAVE_ELEMENTAL) 5988 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5989 #endif 5990 #if defined(PETSC_HAVE_SCALAPACK) 5991 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5992 #endif 5993 #if defined(PETSC_HAVE_HYPRE) 5994 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5995 #endif 5996 #if defined(PETSC_HAVE_CUDA) 5997 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5998 #endif 5999 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6000 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 6001 #endif 6002 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 6003 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 6004 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6005 6006 /* 6007 Computes (B'*A')' since computing B*A directly is untenable 6008 6009 n p p 6010 [ ] [ ] [ ] 6011 m [ A ] * n [ B ] = m [ C ] 6012 [ ] [ ] [ ] 6013 6014 */ 6015 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 6016 { 6017 Mat At,Bt,Ct; 6018 6019 PetscFunctionBegin; 6020 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 6021 PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 6022 PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 6023 PetscCall(MatDestroy(&At)); 6024 PetscCall(MatDestroy(&Bt)); 6025 PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 6026 PetscCall(MatDestroy(&Ct)); 6027 PetscFunctionReturn(0); 6028 } 6029 6030 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 6031 { 6032 PetscBool cisdense; 6033 6034 PetscFunctionBegin; 6035 PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 6036 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 6037 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 6038 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 6039 if (!cisdense) { 6040 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 6041 } 6042 PetscCall(MatSetUp(C)); 6043 6044 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6045 PetscFunctionReturn(0); 6046 } 6047 6048 /* ----------------------------------------------------------------*/ 6049 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6050 { 6051 Mat_Product *product = C->product; 6052 Mat A = product->A,B=product->B; 6053 6054 PetscFunctionBegin; 6055 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6056 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6057 6058 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6059 C->ops->productsymbolic = MatProductSymbolic_AB; 6060 PetscFunctionReturn(0); 6061 } 6062 6063 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6064 { 6065 Mat_Product *product = C->product; 6066 6067 PetscFunctionBegin; 6068 if (product->type == MATPRODUCT_AB) { 6069 PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6070 } 6071 PetscFunctionReturn(0); 6072 } 6073 6074 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6075 6076 Input Parameters: 6077 6078 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6079 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6080 6081 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6082 6083 For Set1, j1[] contains column indices of the nonzeros. 6084 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6085 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6086 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6087 6088 Similar for Set2. 6089 6090 This routine merges the two sets of nonzeros row by row and removes repeats. 6091 6092 Output Parameters: (memory is allocated by the caller) 6093 6094 i[],j[]: the CSR of the merged matrix, which has m rows. 6095 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6096 imap2[]: similar to imap1[], but for Set2. 6097 Note we order nonzeros row-by-row and from left to right. 6098 */ 6099 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 6100 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 6101 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 6102 { 6103 PetscInt r,m; /* Row index of mat */ 6104 PetscCount t,t1,t2,b1,e1,b2,e2; 6105 6106 PetscFunctionBegin; 6107 PetscCall(MatGetLocalSize(mat,&m,NULL)); 6108 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6109 i[0] = 0; 6110 for (r=0; r<m; r++) { /* Do row by row merging */ 6111 b1 = rowBegin1[r]; 6112 e1 = rowEnd1[r]; 6113 b2 = rowBegin2[r]; 6114 e2 = rowEnd2[r]; 6115 while (b1 < e1 && b2 < e2) { 6116 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6117 j[t] = j1[b1]; 6118 imap1[t1] = t; 6119 imap2[t2] = t; 6120 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6121 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6122 t1++; t2++; t++; 6123 } else if (j1[b1] < j2[b2]) { 6124 j[t] = j1[b1]; 6125 imap1[t1] = t; 6126 b1 += jmap1[t1+1] - jmap1[t1]; 6127 t1++; t++; 6128 } else { 6129 j[t] = j2[b2]; 6130 imap2[t2] = t; 6131 b2 += jmap2[t2+1] - jmap2[t2]; 6132 t2++; t++; 6133 } 6134 } 6135 /* Merge the remaining in either j1[] or j2[] */ 6136 while (b1 < e1) { 6137 j[t] = j1[b1]; 6138 imap1[t1] = t; 6139 b1 += jmap1[t1+1] - jmap1[t1]; 6140 t1++; t++; 6141 } 6142 while (b2 < e2) { 6143 j[t] = j2[b2]; 6144 imap2[t2] = t; 6145 b2 += jmap2[t2+1] - jmap2[t2]; 6146 t2++; t++; 6147 } 6148 i[r+1] = t; 6149 } 6150 PetscFunctionReturn(0); 6151 } 6152 6153 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6154 6155 Input Parameters: 6156 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6157 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6158 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6159 6160 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6161 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6162 6163 Output Parameters: 6164 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6165 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6166 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6167 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6168 6169 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6170 Atot: number of entries belonging to the diagonal block. 6171 Annz: number of unique nonzeros belonging to the diagonal block. 6172 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6173 repeats (i.e., same 'i,j' pair). 6174 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6175 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6176 6177 Atot: number of entries belonging to the diagonal block 6178 Annz: number of unique nonzeros belonging to the diagonal block. 6179 6180 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6181 6182 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6183 */ 6184 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6185 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6186 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6187 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6188 { 6189 PetscInt cstart,cend,rstart,rend,row,col; 6190 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6191 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6192 PetscCount k,m,p,q,r,s,mid; 6193 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6194 6195 PetscFunctionBegin; 6196 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6197 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6198 m = rend - rstart; 6199 6200 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6201 6202 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6203 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6204 */ 6205 while (k<n) { 6206 row = i[k]; 6207 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6208 for (s=k; s<n; s++) if (i[s] != row) break; 6209 for (p=k; p<s; p++) { 6210 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6211 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6212 } 6213 PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6214 PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6215 rowBegin[row-rstart] = k; 6216 rowMid[row-rstart] = mid; 6217 rowEnd[row-rstart] = s; 6218 6219 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6220 Atot += mid - k; 6221 Btot += s - mid; 6222 6223 /* Count unique nonzeros of this diag/offdiag row */ 6224 for (p=k; p<mid;) { 6225 col = j[p]; 6226 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6227 Annz++; 6228 } 6229 6230 for (p=mid; p<s;) { 6231 col = j[p]; 6232 do {p++;} while (p<s && j[p] == col); 6233 Bnnz++; 6234 } 6235 k = s; 6236 } 6237 6238 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6239 PetscCall(PetscMalloc1(Atot,&Aperm)); 6240 PetscCall(PetscMalloc1(Btot,&Bperm)); 6241 PetscCall(PetscMalloc1(Annz+1,&Ajmap)); 6242 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap)); 6243 6244 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6245 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6246 for (r=0; r<m; r++) { 6247 k = rowBegin[r]; 6248 mid = rowMid[r]; 6249 s = rowEnd[r]; 6250 PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 6251 PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6252 Atot += mid - k; 6253 Btot += s - mid; 6254 6255 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6256 for (p=k; p<mid;) { 6257 col = j[p]; 6258 q = p; 6259 do {p++;} while (p<mid && j[p] == col); 6260 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6261 Annz++; 6262 } 6263 6264 for (p=mid; p<s;) { 6265 col = j[p]; 6266 q = p; 6267 do {p++;} while (p<s && j[p] == col); 6268 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6269 Bnnz++; 6270 } 6271 } 6272 /* Output */ 6273 *Aperm_ = Aperm; 6274 *Annz_ = Annz; 6275 *Atot_ = Atot; 6276 *Ajmap_ = Ajmap; 6277 *Bperm_ = Bperm; 6278 *Bnnz_ = Bnnz; 6279 *Btot_ = Btot; 6280 *Bjmap_ = Bjmap; 6281 PetscFunctionReturn(0); 6282 } 6283 6284 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6285 6286 Input Parameters: 6287 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6288 nnz: number of unique nonzeros in the merged matrix 6289 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6290 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6291 6292 Output Parameter: (memory is allocated by the caller) 6293 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6294 6295 Example: 6296 nnz1 = 4 6297 nnz = 6 6298 imap = [1,3,4,5] 6299 jmap = [0,3,5,6,7] 6300 then, 6301 jmap_new = [0,0,3,3,5,6,7] 6302 */ 6303 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[]) 6304 { 6305 PetscCount k,p; 6306 6307 PetscFunctionBegin; 6308 jmap_new[0] = 0; 6309 p = nnz; /* p loops over jmap_new[] backwards */ 6310 for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */ 6311 for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1]; 6312 } 6313 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6314 PetscFunctionReturn(0); 6315 } 6316 6317 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6318 { 6319 MPI_Comm comm; 6320 PetscMPIInt rank,size; 6321 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6322 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6323 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6324 6325 PetscFunctionBegin; 6326 PetscCall(PetscFree(mpiaij->garray)); 6327 PetscCall(VecDestroy(&mpiaij->lvec)); 6328 #if defined(PETSC_USE_CTABLE) 6329 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6330 #else 6331 PetscCall(PetscFree(mpiaij->colmap)); 6332 #endif 6333 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6334 mat->assembled = PETSC_FALSE; 6335 mat->was_assembled = PETSC_FALSE; 6336 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6337 6338 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 6339 PetscCallMPI(MPI_Comm_size(comm,&size)); 6340 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 6341 PetscCall(PetscLayoutSetUp(mat->rmap)); 6342 PetscCall(PetscLayoutSetUp(mat->cmap)); 6343 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6344 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6345 PetscCall(MatGetLocalSize(mat,&m,&n)); 6346 PetscCall(MatGetSize(mat,&M,&N)); 6347 6348 /* ---------------------------------------------------------------------------*/ 6349 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6350 /* entries come first, then local rows, then remote rows. */ 6351 /* ---------------------------------------------------------------------------*/ 6352 PetscCount n1 = coo_n,*perm1; 6353 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6354 PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1)); 6355 PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */ 6356 PetscCall(PetscArraycpy(j1,coo_j,n1)); 6357 for (k=0; k<n1; k++) perm1[k] = k; 6358 6359 /* Manipulate indices so that entries with negative row or col indices will have smallest 6360 row indices, local entries will have greater but negative row indices, and remote entries 6361 will have positive row indices. 6362 */ 6363 for (k=0; k<n1; k++) { 6364 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6365 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6366 else { 6367 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6368 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6369 } 6370 } 6371 6372 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6373 PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6374 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6375 PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6376 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6377 6378 /* ---------------------------------------------------------------------------*/ 6379 /* Split local rows into diag/offdiag portions */ 6380 /* ---------------------------------------------------------------------------*/ 6381 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6382 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6383 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6384 6385 PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 6386 PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 6387 PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6388 6389 /* ---------------------------------------------------------------------------*/ 6390 /* Send remote rows to their owner */ 6391 /* ---------------------------------------------------------------------------*/ 6392 /* Find which rows should be sent to which remote ranks*/ 6393 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6394 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6395 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6396 const PetscInt *ranges; 6397 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6398 6399 PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 6400 PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6401 for (k=rem; k<n1;) { 6402 PetscMPIInt owner; 6403 PetscInt firstRow,lastRow; 6404 6405 /* Locate a row range */ 6406 firstRow = i1[k]; /* first row of this owner */ 6407 PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6408 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6409 6410 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6411 PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6412 6413 /* All entries in [k,p) belong to this remote owner */ 6414 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6415 PetscMPIInt *sendto2; 6416 PetscInt *nentries2; 6417 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6418 6419 PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 6420 PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 6421 PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 6422 PetscCall(PetscFree2(sendto,nentries2)); 6423 sendto = sendto2; 6424 nentries = nentries2; 6425 maxNsend = maxNsend2; 6426 } 6427 sendto[nsend] = owner; 6428 nentries[nsend] = p - k; 6429 PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6430 nsend++; 6431 k = p; 6432 } 6433 6434 /* Build 1st SF to know offsets on remote to send data */ 6435 PetscSF sf1; 6436 PetscInt nroots = 1,nroots2 = 0; 6437 PetscInt nleaves = nsend,nleaves2 = 0; 6438 PetscInt *offsets; 6439 PetscSFNode *iremote; 6440 6441 PetscCall(PetscSFCreate(comm,&sf1)); 6442 PetscCall(PetscMalloc1(nsend,&iremote)); 6443 PetscCall(PetscMalloc1(nsend,&offsets)); 6444 for (k=0; k<nsend; k++) { 6445 iremote[k].rank = sendto[k]; 6446 iremote[k].index = 0; 6447 nleaves2 += nentries[k]; 6448 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6449 } 6450 PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6451 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 6452 PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6453 PetscCall(PetscSFDestroy(&sf1)); 6454 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem); 6455 6456 /* Build 2nd SF to send remote COOs to their owner */ 6457 PetscSF sf2; 6458 nroots = nroots2; 6459 nleaves = nleaves2; 6460 PetscCall(PetscSFCreate(comm,&sf2)); 6461 PetscCall(PetscSFSetFromOptions(sf2)); 6462 PetscCall(PetscMalloc1(nleaves,&iremote)); 6463 p = 0; 6464 for (k=0; k<nsend; k++) { 6465 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6466 for (q=0; q<nentries[k]; q++,p++) { 6467 iremote[p].rank = sendto[k]; 6468 iremote[p].index = offsets[k] + q; 6469 } 6470 } 6471 PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6472 6473 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6474 PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6475 6476 /* Send the remote COOs to their owner */ 6477 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6478 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6479 PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 6480 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 6481 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 6482 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 6483 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6484 6485 PetscCall(PetscFree(offsets)); 6486 PetscCall(PetscFree2(sendto,nentries)); 6487 6488 /* ---------------------------------------------------------------*/ 6489 /* Sort received COOs by row along with the permutation array */ 6490 /* ---------------------------------------------------------------*/ 6491 for (k=0; k<n2; k++) perm2[k] = k; 6492 PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6493 6494 /* ---------------------------------------------------------------*/ 6495 /* Split received COOs into diag/offdiag portions */ 6496 /* ---------------------------------------------------------------*/ 6497 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6498 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6499 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6500 6501 PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 6502 PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6503 6504 /* --------------------------------------------------------------------------*/ 6505 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6506 /* --------------------------------------------------------------------------*/ 6507 PetscInt *Ai,*Bi; 6508 PetscInt *Aj,*Bj; 6509 6510 PetscCall(PetscMalloc1(m+1,&Ai)); 6511 PetscCall(PetscMalloc1(m+1,&Bi)); 6512 PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6513 PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6514 6515 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6516 PetscCall(PetscMalloc1(Annz1,&Aimap1)); 6517 PetscCall(PetscMalloc1(Bnnz1,&Bimap1)); 6518 PetscCall(PetscMalloc1(Annz2,&Aimap2)); 6519 PetscCall(PetscMalloc1(Bnnz2,&Bimap2)); 6520 6521 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 6522 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6523 6524 /* --------------------------------------------------------------------------*/ 6525 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6526 /* expect nonzeros in A/B most likely have local contributing entries */ 6527 /* --------------------------------------------------------------------------*/ 6528 PetscInt Annz = Ai[m]; 6529 PetscInt Bnnz = Bi[m]; 6530 PetscCount *Ajmap1_new,*Bjmap1_new; 6531 6532 PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new)); 6533 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new)); 6534 6535 PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new)); 6536 PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new)); 6537 6538 PetscCall(PetscFree(Aimap1)); 6539 PetscCall(PetscFree(Ajmap1)); 6540 PetscCall(PetscFree(Bimap1)); 6541 PetscCall(PetscFree(Bjmap1)); 6542 PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 6543 PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 6544 PetscCall(PetscFree3(i1,j1,perm1)); 6545 PetscCall(PetscFree3(i2,j2,perm2)); 6546 6547 Ajmap1 = Ajmap1_new; 6548 Bjmap1 = Bjmap1_new; 6549 6550 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6551 if (Annz < Annz1 + Annz2) { 6552 PetscInt *Aj_new; 6553 PetscCall(PetscMalloc1(Annz,&Aj_new)); 6554 PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 6555 PetscCall(PetscFree(Aj)); 6556 Aj = Aj_new; 6557 } 6558 6559 if (Bnnz < Bnnz1 + Bnnz2) { 6560 PetscInt *Bj_new; 6561 PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 6562 PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 6563 PetscCall(PetscFree(Bj)); 6564 Bj = Bj_new; 6565 } 6566 6567 /* --------------------------------------------------------------------------------*/ 6568 /* Create new submatrices for on-process and off-process coupling */ 6569 /* --------------------------------------------------------------------------------*/ 6570 PetscScalar *Aa,*Ba; 6571 MatType rtype; 6572 Mat_SeqAIJ *a,*b; 6573 PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 6574 PetscCall(PetscCalloc1(Bnnz,&Ba)); 6575 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6576 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6577 PetscCall(MatDestroy(&mpiaij->A)); 6578 PetscCall(MatDestroy(&mpiaij->B)); 6579 PetscCall(MatGetRootType_Private(mat,&rtype)); 6580 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 6581 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 6582 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6583 6584 a = (Mat_SeqAIJ*)mpiaij->A->data; 6585 b = (Mat_SeqAIJ*)mpiaij->B->data; 6586 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6587 a->free_a = b->free_a = PETSC_TRUE; 6588 a->free_ij = b->free_ij = PETSC_TRUE; 6589 6590 /* conversion must happen AFTER multiply setup */ 6591 PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 6592 PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 6593 PetscCall(VecDestroy(&mpiaij->lvec)); 6594 PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 6595 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6596 6597 mpiaij->coo_n = coo_n; 6598 mpiaij->coo_sf = sf2; 6599 mpiaij->sendlen = nleaves; 6600 mpiaij->recvlen = nroots; 6601 6602 mpiaij->Annz = Annz; 6603 mpiaij->Bnnz = Bnnz; 6604 6605 mpiaij->Annz2 = Annz2; 6606 mpiaij->Bnnz2 = Bnnz2; 6607 6608 mpiaij->Atot1 = Atot1; 6609 mpiaij->Atot2 = Atot2; 6610 mpiaij->Btot1 = Btot1; 6611 mpiaij->Btot2 = Btot2; 6612 6613 mpiaij->Ajmap1 = Ajmap1; 6614 mpiaij->Aperm1 = Aperm1; 6615 6616 mpiaij->Bjmap1 = Bjmap1; 6617 mpiaij->Bperm1 = Bperm1; 6618 6619 mpiaij->Aimap2 = Aimap2; 6620 mpiaij->Ajmap2 = Ajmap2; 6621 mpiaij->Aperm2 = Aperm2; 6622 6623 mpiaij->Bimap2 = Bimap2; 6624 mpiaij->Bjmap2 = Bjmap2; 6625 mpiaij->Bperm2 = Bperm2; 6626 6627 mpiaij->Cperm1 = Cperm1; 6628 6629 /* Allocate in preallocation. If not used, it has zero cost on host */ 6630 PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6631 PetscFunctionReturn(0); 6632 } 6633 6634 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6635 { 6636 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6637 Mat A = mpiaij->A,B = mpiaij->B; 6638 PetscCount Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2; 6639 PetscScalar *Aa,*Ba; 6640 PetscScalar *sendbuf = mpiaij->sendbuf; 6641 PetscScalar *recvbuf = mpiaij->recvbuf; 6642 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2; 6643 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2; 6644 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6645 const PetscCount *Cperm1 = mpiaij->Cperm1; 6646 6647 PetscFunctionBegin; 6648 PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 6649 PetscCall(MatSeqAIJGetArray(B,&Ba)); 6650 6651 /* Pack entries to be sent to remote */ 6652 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6653 6654 /* Send remote entries to their owner and overlap the communication with local computation */ 6655 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6656 /* Add local entries to A and B */ 6657 for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6658 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6659 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]]; 6660 Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum; 6661 } 6662 for (PetscCount i=0; i<Bnnz; i++) { 6663 PetscScalar sum = 0.0; 6664 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]]; 6665 Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum; 6666 } 6667 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6668 6669 /* Add received remote entries to A and B */ 6670 for (PetscCount i=0; i<Annz2; i++) { 6671 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6672 } 6673 for (PetscCount i=0; i<Bnnz2; i++) { 6674 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6675 } 6676 PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 6677 PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6678 PetscFunctionReturn(0); 6679 } 6680 6681 /* ----------------------------------------------------------------*/ 6682 6683 /*MC 6684 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6685 6686 Options Database Keys: 6687 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6688 6689 Level: beginner 6690 6691 Notes: 6692 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6693 in this case the values associated with the rows and columns one passes in are set to zero 6694 in the matrix 6695 6696 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6697 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6698 6699 .seealso: `MatCreateAIJ()` 6700 M*/ 6701 6702 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6703 { 6704 Mat_MPIAIJ *b; 6705 PetscMPIInt size; 6706 6707 PetscFunctionBegin; 6708 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 6709 6710 PetscCall(PetscNewLog(B,&b)); 6711 B->data = (void*)b; 6712 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6713 B->assembled = PETSC_FALSE; 6714 B->insertmode = NOT_SET_VALUES; 6715 b->size = size; 6716 6717 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6718 6719 /* build cache for off array entries formed */ 6720 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 6721 6722 b->donotstash = PETSC_FALSE; 6723 b->colmap = NULL; 6724 b->garray = NULL; 6725 b->roworiented = PETSC_TRUE; 6726 6727 /* stuff used for matrix vector multiply */ 6728 b->lvec = NULL; 6729 b->Mvctx = NULL; 6730 6731 /* stuff for MatGetRow() */ 6732 b->rowindices = NULL; 6733 b->rowvalues = NULL; 6734 b->getrowactive = PETSC_FALSE; 6735 6736 /* flexible pointer used in CUSPARSE classes */ 6737 b->spptr = NULL; 6738 6739 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6740 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 6741 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 6742 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 6743 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 6744 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 6745 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6746 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 6747 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 6748 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 6749 #if defined(PETSC_HAVE_CUDA) 6750 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6751 #endif 6752 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6753 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 6754 #endif 6755 #if defined(PETSC_HAVE_MKL_SPARSE) 6756 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6757 #endif 6758 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 6759 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 6760 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 6761 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 6762 #if defined(PETSC_HAVE_ELEMENTAL) 6763 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 6764 #endif 6765 #if defined(PETSC_HAVE_SCALAPACK) 6766 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6767 #endif 6768 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 6769 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 6770 #if defined(PETSC_HAVE_HYPRE) 6771 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 6772 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6773 #endif 6774 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 6775 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 6776 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 6777 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 6778 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6779 PetscFunctionReturn(0); 6780 } 6781 6782 /*@C 6783 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6784 and "off-diagonal" part of the matrix in CSR format. 6785 6786 Collective 6787 6788 Input Parameters: 6789 + comm - MPI communicator 6790 . m - number of local rows (Cannot be PETSC_DECIDE) 6791 . n - This value should be the same as the local size used in creating the 6792 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6793 calculated if N is given) For square matrices n is almost always m. 6794 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6795 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6796 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6797 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6798 . a - matrix values 6799 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6800 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6801 - oa - matrix values 6802 6803 Output Parameter: 6804 . mat - the matrix 6805 6806 Level: advanced 6807 6808 Notes: 6809 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6810 must free the arrays once the matrix has been destroyed and not before. 6811 6812 The i and j indices are 0 based 6813 6814 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6815 6816 This sets local rows and cannot be used to set off-processor values. 6817 6818 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6819 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6820 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6821 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6822 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6823 communication if it is known that only local entries will be set. 6824 6825 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6826 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6827 @*/ 6828 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6829 { 6830 Mat_MPIAIJ *maij; 6831 6832 PetscFunctionBegin; 6833 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6834 PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6835 PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6836 PetscCall(MatCreate(comm,mat)); 6837 PetscCall(MatSetSizes(*mat,m,n,M,N)); 6838 PetscCall(MatSetType(*mat,MATMPIAIJ)); 6839 maij = (Mat_MPIAIJ*) (*mat)->data; 6840 6841 (*mat)->preallocated = PETSC_TRUE; 6842 6843 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6844 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6845 6846 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 6847 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 6848 6849 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 6850 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 6851 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 6852 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 6853 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 6854 PetscFunctionReturn(0); 6855 } 6856 6857 typedef struct { 6858 Mat *mp; /* intermediate products */ 6859 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6860 PetscInt cp; /* number of intermediate products */ 6861 6862 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6863 PetscInt *startsj_s,*startsj_r; 6864 PetscScalar *bufa; 6865 Mat P_oth; 6866 6867 /* may take advantage of merging product->B */ 6868 Mat Bloc; /* B-local by merging diag and off-diag */ 6869 6870 /* cusparse does not have support to split between symbolic and numeric phases. 6871 When api_user is true, we don't need to update the numerical values 6872 of the temporary storage */ 6873 PetscBool reusesym; 6874 6875 /* support for COO values insertion */ 6876 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6877 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6878 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6879 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6880 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6881 PetscMemType mtype; 6882 6883 /* customization */ 6884 PetscBool abmerge; 6885 PetscBool P_oth_bind; 6886 } MatMatMPIAIJBACKEND; 6887 6888 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6889 { 6890 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6891 PetscInt i; 6892 6893 PetscFunctionBegin; 6894 PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 6895 PetscCall(PetscFree(mmdata->bufa)); 6896 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 6897 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 6898 PetscCall(MatDestroy(&mmdata->P_oth)); 6899 PetscCall(MatDestroy(&mmdata->Bloc)); 6900 PetscCall(PetscSFDestroy(&mmdata->sf)); 6901 for (i = 0; i < mmdata->cp; i++) { 6902 PetscCall(MatDestroy(&mmdata->mp[i])); 6903 } 6904 PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 6905 PetscCall(PetscFree(mmdata->own[0])); 6906 PetscCall(PetscFree(mmdata->own)); 6907 PetscCall(PetscFree(mmdata->off[0])); 6908 PetscCall(PetscFree(mmdata->off)); 6909 PetscCall(PetscFree(mmdata)); 6910 PetscFunctionReturn(0); 6911 } 6912 6913 /* Copy selected n entries with indices in idx[] of A to v[]. 6914 If idx is NULL, copy the whole data array of A to v[] 6915 */ 6916 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6917 { 6918 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6919 6920 PetscFunctionBegin; 6921 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6922 if (f) { 6923 PetscCall((*f)(A,n,idx,v)); 6924 } else { 6925 const PetscScalar *vv; 6926 6927 PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6928 if (n && idx) { 6929 PetscScalar *w = v; 6930 const PetscInt *oi = idx; 6931 PetscInt j; 6932 6933 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6934 } else { 6935 PetscCall(PetscArraycpy(v,vv,n)); 6936 } 6937 PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6938 } 6939 PetscFunctionReturn(0); 6940 } 6941 6942 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6943 { 6944 MatMatMPIAIJBACKEND *mmdata; 6945 PetscInt i,n_d,n_o; 6946 6947 PetscFunctionBegin; 6948 MatCheckProduct(C,1); 6949 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6950 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6951 if (!mmdata->reusesym) { /* update temporary matrices */ 6952 if (mmdata->P_oth) { 6953 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6954 } 6955 if (mmdata->Bloc) { 6956 PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 6957 } 6958 } 6959 mmdata->reusesym = PETSC_FALSE; 6960 6961 for (i = 0; i < mmdata->cp; i++) { 6962 PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6963 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6964 } 6965 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6966 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6967 6968 if (mmdata->mptmp[i]) continue; 6969 if (noff) { 6970 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6971 6972 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 6973 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 6974 n_o += noff; 6975 n_d += nown; 6976 } else { 6977 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6978 6979 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 6980 n_d += mm->nz; 6981 } 6982 } 6983 if (mmdata->hasoffproc) { /* offprocess insertion */ 6984 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6985 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6986 } 6987 PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 6988 PetscFunctionReturn(0); 6989 } 6990 6991 /* Support for Pt * A, A * P, or Pt * A * P */ 6992 #define MAX_NUMBER_INTERMEDIATE 4 6993 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6994 { 6995 Mat_Product *product = C->product; 6996 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6997 Mat_MPIAIJ *a,*p; 6998 MatMatMPIAIJBACKEND *mmdata; 6999 ISLocalToGlobalMapping P_oth_l2g = NULL; 7000 IS glob = NULL; 7001 const char *prefix; 7002 char pprefix[256]; 7003 const PetscInt *globidx,*P_oth_idx; 7004 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 7005 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 7006 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7007 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7008 /* a base offset; type-2: sparse with a local to global map table */ 7009 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7010 7011 MatProductType ptype; 7012 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 7013 PetscMPIInt size; 7014 7015 PetscFunctionBegin; 7016 MatCheckProduct(C,1); 7017 PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 7018 ptype = product->type; 7019 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 7020 ptype = MATPRODUCT_AB; 7021 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7022 } 7023 switch (ptype) { 7024 case MATPRODUCT_AB: 7025 A = product->A; 7026 P = product->B; 7027 m = A->rmap->n; 7028 n = P->cmap->n; 7029 M = A->rmap->N; 7030 N = P->cmap->N; 7031 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7032 break; 7033 case MATPRODUCT_AtB: 7034 P = product->A; 7035 A = product->B; 7036 m = P->cmap->n; 7037 n = A->cmap->n; 7038 M = P->cmap->N; 7039 N = A->cmap->N; 7040 hasoffproc = PETSC_TRUE; 7041 break; 7042 case MATPRODUCT_PtAP: 7043 A = product->A; 7044 P = product->B; 7045 m = P->cmap->n; 7046 n = P->cmap->n; 7047 M = P->cmap->N; 7048 N = P->cmap->N; 7049 hasoffproc = PETSC_TRUE; 7050 break; 7051 default: 7052 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7053 } 7054 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 7055 if (size == 1) hasoffproc = PETSC_FALSE; 7056 7057 /* defaults */ 7058 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 7059 mp[i] = NULL; 7060 mptmp[i] = PETSC_FALSE; 7061 rmapt[i] = -1; 7062 cmapt[i] = -1; 7063 rmapa[i] = NULL; 7064 cmapa[i] = NULL; 7065 } 7066 7067 /* customization */ 7068 PetscCall(PetscNew(&mmdata)); 7069 mmdata->reusesym = product->api_user; 7070 if (ptype == MATPRODUCT_AB) { 7071 if (product->api_user) { 7072 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 7073 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 7074 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7075 PetscOptionsEnd(); 7076 } else { 7077 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 7078 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 7079 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7080 PetscOptionsEnd(); 7081 } 7082 } else if (ptype == MATPRODUCT_PtAP) { 7083 if (product->api_user) { 7084 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 7085 PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7086 PetscOptionsEnd(); 7087 } else { 7088 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 7089 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7090 PetscOptionsEnd(); 7091 } 7092 } 7093 a = (Mat_MPIAIJ*)A->data; 7094 p = (Mat_MPIAIJ*)P->data; 7095 PetscCall(MatSetSizes(C,m,n,M,N)); 7096 PetscCall(PetscLayoutSetUp(C->rmap)); 7097 PetscCall(PetscLayoutSetUp(C->cmap)); 7098 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 7099 PetscCall(MatGetOptionsPrefix(C,&prefix)); 7100 7101 cp = 0; 7102 switch (ptype) { 7103 case MATPRODUCT_AB: /* A * P */ 7104 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7105 7106 /* A_diag * P_local (merged or not) */ 7107 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7108 /* P is product->B */ 7109 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7110 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7111 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7112 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7113 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7114 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7115 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7116 mp[cp]->product->api_user = product->api_user; 7117 PetscCall(MatProductSetFromOptions(mp[cp])); 7118 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7119 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7120 PetscCall(ISGetIndices(glob,&globidx)); 7121 rmapt[cp] = 1; 7122 cmapt[cp] = 2; 7123 cmapa[cp] = globidx; 7124 mptmp[cp] = PETSC_FALSE; 7125 cp++; 7126 } else { /* A_diag * P_diag and A_diag * P_off */ 7127 PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 7128 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7129 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7130 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7131 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7132 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7133 mp[cp]->product->api_user = product->api_user; 7134 PetscCall(MatProductSetFromOptions(mp[cp])); 7135 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7136 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7137 rmapt[cp] = 1; 7138 cmapt[cp] = 1; 7139 mptmp[cp] = PETSC_FALSE; 7140 cp++; 7141 PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 7142 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7143 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7144 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7145 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7146 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7147 mp[cp]->product->api_user = product->api_user; 7148 PetscCall(MatProductSetFromOptions(mp[cp])); 7149 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7150 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7151 rmapt[cp] = 1; 7152 cmapt[cp] = 2; 7153 cmapa[cp] = p->garray; 7154 mptmp[cp] = PETSC_FALSE; 7155 cp++; 7156 } 7157 7158 /* A_off * P_other */ 7159 if (mmdata->P_oth) { 7160 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 7161 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7162 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7163 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7164 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7165 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7166 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7167 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7168 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7169 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7170 mp[cp]->product->api_user = product->api_user; 7171 PetscCall(MatProductSetFromOptions(mp[cp])); 7172 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7173 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7174 rmapt[cp] = 1; 7175 cmapt[cp] = 2; 7176 cmapa[cp] = P_oth_idx; 7177 mptmp[cp] = PETSC_FALSE; 7178 cp++; 7179 } 7180 break; 7181 7182 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7183 /* A is product->B */ 7184 PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7185 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7186 PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 7187 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7188 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7189 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7190 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7191 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7192 mp[cp]->product->api_user = product->api_user; 7193 PetscCall(MatProductSetFromOptions(mp[cp])); 7194 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7195 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7196 PetscCall(ISGetIndices(glob,&globidx)); 7197 rmapt[cp] = 2; 7198 rmapa[cp] = globidx; 7199 cmapt[cp] = 2; 7200 cmapa[cp] = globidx; 7201 mptmp[cp] = PETSC_FALSE; 7202 cp++; 7203 } else { 7204 PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 7205 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7206 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7207 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7208 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7209 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7210 mp[cp]->product->api_user = product->api_user; 7211 PetscCall(MatProductSetFromOptions(mp[cp])); 7212 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7213 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7214 PetscCall(ISGetIndices(glob,&globidx)); 7215 rmapt[cp] = 1; 7216 cmapt[cp] = 2; 7217 cmapa[cp] = globidx; 7218 mptmp[cp] = PETSC_FALSE; 7219 cp++; 7220 PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 7221 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7222 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7223 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7224 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7225 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7226 mp[cp]->product->api_user = product->api_user; 7227 PetscCall(MatProductSetFromOptions(mp[cp])); 7228 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7229 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7230 rmapt[cp] = 2; 7231 rmapa[cp] = p->garray; 7232 cmapt[cp] = 2; 7233 cmapa[cp] = globidx; 7234 mptmp[cp] = PETSC_FALSE; 7235 cp++; 7236 } 7237 break; 7238 case MATPRODUCT_PtAP: 7239 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7240 /* P is product->B */ 7241 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7242 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7243 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 7244 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7245 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7246 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7247 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7248 mp[cp]->product->api_user = product->api_user; 7249 PetscCall(MatProductSetFromOptions(mp[cp])); 7250 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7251 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7252 PetscCall(ISGetIndices(glob,&globidx)); 7253 rmapt[cp] = 2; 7254 rmapa[cp] = globidx; 7255 cmapt[cp] = 2; 7256 cmapa[cp] = globidx; 7257 mptmp[cp] = PETSC_FALSE; 7258 cp++; 7259 if (mmdata->P_oth) { 7260 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 7261 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7262 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7263 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7264 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7265 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7266 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7267 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7268 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7269 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7270 mp[cp]->product->api_user = product->api_user; 7271 PetscCall(MatProductSetFromOptions(mp[cp])); 7272 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7273 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7274 mptmp[cp] = PETSC_TRUE; 7275 cp++; 7276 PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 7277 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7278 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7279 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7280 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7281 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7282 mp[cp]->product->api_user = product->api_user; 7283 PetscCall(MatProductSetFromOptions(mp[cp])); 7284 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7285 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7286 rmapt[cp] = 2; 7287 rmapa[cp] = globidx; 7288 cmapt[cp] = 2; 7289 cmapa[cp] = P_oth_idx; 7290 mptmp[cp] = PETSC_FALSE; 7291 cp++; 7292 } 7293 break; 7294 default: 7295 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7296 } 7297 /* sanity check */ 7298 if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7299 7300 PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7301 for (i = 0; i < cp; i++) { 7302 mmdata->mp[i] = mp[i]; 7303 mmdata->mptmp[i] = mptmp[i]; 7304 } 7305 mmdata->cp = cp; 7306 C->product->data = mmdata; 7307 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7308 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7309 7310 /* memory type */ 7311 mmdata->mtype = PETSC_MEMTYPE_HOST; 7312 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 7313 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7314 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7315 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7316 7317 /* prepare coo coordinates for values insertion */ 7318 7319 /* count total nonzeros of those intermediate seqaij Mats 7320 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7321 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7322 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7323 */ 7324 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7325 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7326 if (mptmp[cp]) continue; 7327 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7328 const PetscInt *rmap = rmapa[cp]; 7329 const PetscInt mr = mp[cp]->rmap->n; 7330 const PetscInt rs = C->rmap->rstart; 7331 const PetscInt re = C->rmap->rend; 7332 const PetscInt *ii = mm->i; 7333 for (i = 0; i < mr; i++) { 7334 const PetscInt gr = rmap[i]; 7335 const PetscInt nz = ii[i+1] - ii[i]; 7336 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7337 else ncoo_oown += nz; /* this row is local */ 7338 } 7339 } else ncoo_d += mm->nz; 7340 } 7341 7342 /* 7343 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7344 7345 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7346 7347 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7348 7349 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7350 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7351 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7352 7353 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7354 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7355 */ 7356 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 7357 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7358 7359 /* gather (i,j) of nonzeros inserted by remote procs */ 7360 if (hasoffproc) { 7361 PetscSF msf; 7362 PetscInt ncoo2,*coo_i2,*coo_j2; 7363 7364 PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 7365 PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 7366 PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7367 7368 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7369 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7370 PetscInt *idxoff = mmdata->off[cp]; 7371 PetscInt *idxown = mmdata->own[cp]; 7372 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7373 const PetscInt *rmap = rmapa[cp]; 7374 const PetscInt *cmap = cmapa[cp]; 7375 const PetscInt *ii = mm->i; 7376 PetscInt *coi = coo_i + ncoo_o; 7377 PetscInt *coj = coo_j + ncoo_o; 7378 const PetscInt mr = mp[cp]->rmap->n; 7379 const PetscInt rs = C->rmap->rstart; 7380 const PetscInt re = C->rmap->rend; 7381 const PetscInt cs = C->cmap->rstart; 7382 for (i = 0; i < mr; i++) { 7383 const PetscInt *jj = mm->j + ii[i]; 7384 const PetscInt gr = rmap[i]; 7385 const PetscInt nz = ii[i+1] - ii[i]; 7386 if (gr < rs || gr >= re) { /* this is an offproc row */ 7387 for (j = ii[i]; j < ii[i+1]; j++) { 7388 *coi++ = gr; 7389 *idxoff++ = j; 7390 } 7391 if (!cmapt[cp]) { /* already global */ 7392 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7393 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7394 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7395 } else { /* offdiag */ 7396 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7397 } 7398 ncoo_o += nz; 7399 } else { /* this is a local row */ 7400 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7401 } 7402 } 7403 } 7404 mmdata->off[cp + 1] = idxoff; 7405 mmdata->own[cp + 1] = idxown; 7406 } 7407 7408 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7409 PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 7410 PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 7411 PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 7412 ncoo = ncoo_d + ncoo_oown + ncoo2; 7413 PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 7414 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7415 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 7416 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7417 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7418 PetscCall(PetscFree2(coo_i,coo_j)); 7419 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7420 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 7421 coo_i = coo_i2; 7422 coo_j = coo_j2; 7423 } else { /* no offproc values insertion */ 7424 ncoo = ncoo_d; 7425 PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7426 7427 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7428 PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 7429 PetscCall(PetscSFSetUp(mmdata->sf)); 7430 } 7431 mmdata->hasoffproc = hasoffproc; 7432 7433 /* gather (i,j) of nonzeros inserted locally */ 7434 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7435 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7436 PetscInt *coi = coo_i + ncoo_d; 7437 PetscInt *coj = coo_j + ncoo_d; 7438 const PetscInt *jj = mm->j; 7439 const PetscInt *ii = mm->i; 7440 const PetscInt *cmap = cmapa[cp]; 7441 const PetscInt *rmap = rmapa[cp]; 7442 const PetscInt mr = mp[cp]->rmap->n; 7443 const PetscInt rs = C->rmap->rstart; 7444 const PetscInt re = C->rmap->rend; 7445 const PetscInt cs = C->cmap->rstart; 7446 7447 if (mptmp[cp]) continue; 7448 if (rmapt[cp] == 1) { /* consecutive rows */ 7449 /* fill coo_i */ 7450 for (i = 0; i < mr; i++) { 7451 const PetscInt gr = i + rs; 7452 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7453 } 7454 /* fill coo_j */ 7455 if (!cmapt[cp]) { /* type-0, already global */ 7456 PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7457 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7458 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7459 } else { /* type-2, local to global for sparse columns */ 7460 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7461 } 7462 ncoo_d += mm->nz; 7463 } else if (rmapt[cp] == 2) { /* sparse rows */ 7464 for (i = 0; i < mr; i++) { 7465 const PetscInt *jj = mm->j + ii[i]; 7466 const PetscInt gr = rmap[i]; 7467 const PetscInt nz = ii[i+1] - ii[i]; 7468 if (gr >= rs && gr < re) { /* local rows */ 7469 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7470 if (!cmapt[cp]) { /* type-0, already global */ 7471 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7472 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7473 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7474 } else { /* type-2, local to global for sparse columns */ 7475 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7476 } 7477 ncoo_d += nz; 7478 } 7479 } 7480 } 7481 } 7482 if (glob) { 7483 PetscCall(ISRestoreIndices(glob,&globidx)); 7484 } 7485 PetscCall(ISDestroy(&glob)); 7486 if (P_oth_l2g) { 7487 PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 7488 } 7489 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7490 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7491 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 7492 7493 /* preallocate with COO data */ 7494 PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 7495 PetscCall(PetscFree2(coo_i,coo_j)); 7496 PetscFunctionReturn(0); 7497 } 7498 7499 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7500 { 7501 Mat_Product *product = mat->product; 7502 #if defined(PETSC_HAVE_DEVICE) 7503 PetscBool match = PETSC_FALSE; 7504 PetscBool usecpu = PETSC_FALSE; 7505 #else 7506 PetscBool match = PETSC_TRUE; 7507 #endif 7508 7509 PetscFunctionBegin; 7510 MatCheckProduct(mat,1); 7511 #if defined(PETSC_HAVE_DEVICE) 7512 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7513 PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 7514 } 7515 if (match) { /* we can always fallback to the CPU if requested */ 7516 switch (product->type) { 7517 case MATPRODUCT_AB: 7518 if (product->api_user) { 7519 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat"); 7520 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7521 PetscOptionsEnd(); 7522 } else { 7523 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat"); 7524 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7525 PetscOptionsEnd(); 7526 } 7527 break; 7528 case MATPRODUCT_AtB: 7529 if (product->api_user) { 7530 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat"); 7531 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7532 PetscOptionsEnd(); 7533 } else { 7534 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat"); 7535 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7536 PetscOptionsEnd(); 7537 } 7538 break; 7539 case MATPRODUCT_PtAP: 7540 if (product->api_user) { 7541 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat"); 7542 PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7543 PetscOptionsEnd(); 7544 } else { 7545 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat"); 7546 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7547 PetscOptionsEnd(); 7548 } 7549 break; 7550 default: 7551 break; 7552 } 7553 match = (PetscBool)!usecpu; 7554 } 7555 #endif 7556 if (match) { 7557 switch (product->type) { 7558 case MATPRODUCT_AB: 7559 case MATPRODUCT_AtB: 7560 case MATPRODUCT_PtAP: 7561 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7562 break; 7563 default: 7564 break; 7565 } 7566 } 7567 /* fallback to MPIAIJ ops */ 7568 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7569 PetscFunctionReturn(0); 7570 } 7571 7572 /* 7573 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7574 7575 n - the number of block indices in cc[] 7576 cc - the block indices (must be large enough to contain the indices) 7577 */ 7578 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc) 7579 { 7580 PetscInt cnt = -1,nidx,j; 7581 const PetscInt *idx; 7582 7583 PetscFunctionBegin; 7584 PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL)); 7585 if (nidx) { 7586 cnt = 0; 7587 cc[cnt] = idx[0]/bs; 7588 for (j=1; j<nidx; j++) { 7589 if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs; 7590 } 7591 } 7592 PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL)); 7593 *n = cnt+1; 7594 PetscFunctionReturn(0); 7595 } 7596 7597 /* 7598 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7599 7600 ncollapsed - the number of block indices 7601 collapsed - the block indices (must be large enough to contain the indices) 7602 */ 7603 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed) 7604 { 7605 PetscInt i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp; 7606 7607 PetscFunctionBegin; 7608 PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev)); 7609 for (i=start+1; i<start+bs; i++) { 7610 PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur)); 7611 PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged)); 7612 cprevtmp = cprev; cprev = merged; merged = cprevtmp; 7613 } 7614 *ncollapsed = nprev; 7615 if (collapsed) *collapsed = cprev; 7616 PetscFunctionReturn(0); 7617 } 7618 7619 /* -------------------------------------------------------------------------- */ 7620 /* 7621 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7622 7623 Input Parameter: 7624 . Amat - matrix 7625 - symmetrize - make the result symmetric 7626 + scale - scale with diagonal 7627 7628 Output Parameter: 7629 . a_Gmat - output scalar graph >= 0 7630 7631 */ 7632 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat) 7633 { 7634 PetscInt Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs; 7635 MPI_Comm comm; 7636 Mat Gmat; 7637 PetscBool ismpiaij,isseqaij; 7638 Mat a, b, c; 7639 MatType jtype; 7640 7641 PetscFunctionBegin; 7642 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 7643 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7644 PetscCall(MatGetSize(Amat, &MM, &NN)); 7645 PetscCall(MatGetBlockSize(Amat, &bs)); 7646 nloc = (Iend-Istart)/bs; 7647 7648 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij)); 7649 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij)); 7650 PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type"); 7651 7652 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7653 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7654 implementation */ 7655 if (bs > 1) { 7656 PetscCall(MatGetType(Amat,&jtype)); 7657 PetscCall(MatCreate(comm, &Gmat)); 7658 PetscCall(MatSetType(Gmat, jtype)); 7659 PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE)); 7660 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7661 if (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray) { 7662 PetscInt *d_nnz, *o_nnz; 7663 MatScalar *aa,val,AA[4096]; 7664 PetscInt *aj,*ai,AJ[4096],nc; 7665 if (isseqaij) { a = Amat; b = NULL; } 7666 else { 7667 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data; 7668 a = d->A; b = d->B; 7669 } 7670 PetscCall(PetscInfo(Amat,"New bs>1 Graph. nloc=%" PetscInt_FMT "\n",nloc)); 7671 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7672 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7673 PetscInt *nnz = (c==a) ? d_nnz : o_nnz, nmax=0; 7674 const PetscInt *cols; 7675 for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows 7676 PetscCall(MatGetRow(c,brow,&jj,&cols,NULL)); 7677 nnz[brow/bs] = jj/bs; 7678 if (jj%bs) ok = 0; 7679 if (cols) j0 = cols[0]; 7680 else j0 = -1; 7681 PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL)); 7682 if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs]; 7683 for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks 7684 PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL)); 7685 if (jj%bs) ok = 0; 7686 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7687 if (nnz[brow/bs] != jj/bs) ok = 0; 7688 PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL)); 7689 } 7690 if (!ok) { 7691 PetscCall(PetscFree2(d_nnz,o_nnz)); 7692 goto old_bs; 7693 } 7694 } 7695 PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax); 7696 } 7697 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7698 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7699 PetscCall(PetscFree2(d_nnz,o_nnz)); 7700 // diag 7701 for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows 7702 Mat_SeqAIJ *aseq = (Mat_SeqAIJ*)a->data; 7703 ai = aseq->i; 7704 n = ai[brow+1] - ai[brow]; 7705 aj = aseq->j + ai[brow]; 7706 for (int k=0; k<n; k += bs) { // block columns 7707 AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart) 7708 val = 0; 7709 for (int ii=0; ii<bs; ii++) { // rows in block 7710 aa = aseq->a + ai[brow+ii] + k; 7711 for (int jj=0; jj<bs; jj++) { // columns in block 7712 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7713 } 7714 } 7715 AA[k/bs] = val; 7716 } 7717 grow = Istart/bs + brow/bs; 7718 PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES)); 7719 } 7720 // off-diag 7721 if (ismpiaij) { 7722 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)Amat->data; 7723 const PetscScalar *vals; 7724 const PetscInt *cols, *garray = aij->garray; 7725 PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?"); 7726 for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows 7727 PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL)); 7728 for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) { 7729 AA[k/bs] = 0; 7730 AJ[cidx] = garray[cols[k]]/bs; 7731 } 7732 nc = ncols/bs; 7733 PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL)); 7734 for (int ii=0; ii<bs; ii++) { // rows in block 7735 PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals)); 7736 for (int k=0; k<ncols; k += bs) { 7737 for (int jj=0; jj<bs; jj++) { // cols in block 7738 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj])); 7739 } 7740 } 7741 PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals)); 7742 } 7743 grow = Istart/bs + brow/bs; 7744 PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES)); 7745 } 7746 } 7747 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7748 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7749 } else { 7750 const PetscScalar *vals; 7751 const PetscInt *idx; 7752 PetscInt *d_nnz, *o_nnz,*w0,*w1,*w2; 7753 old_bs: 7754 /* 7755 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7756 */ 7757 PetscCall(PetscInfo(Amat,"OLD bs>1 CreateGraph\n")); 7758 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7759 if (isseqaij) { 7760 PetscInt max_d_nnz; 7761 /* 7762 Determine exact preallocation count for (sequential) scalar matrix 7763 */ 7764 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz)); 7765 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7766 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7767 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) { 7768 PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7769 } 7770 PetscCall(PetscFree3(w0,w1,w2)); 7771 } else if (ismpiaij) { 7772 Mat Daij,Oaij; 7773 const PetscInt *garray; 7774 PetscInt max_d_nnz; 7775 PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray)); 7776 /* 7777 Determine exact preallocation count for diagonal block portion of scalar matrix 7778 */ 7779 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz)); 7780 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7781 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7782 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7783 PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7784 } 7785 PetscCall(PetscFree3(w0,w1,w2)); 7786 /* 7787 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7788 */ 7789 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7790 o_nnz[jj] = 0; 7791 for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */ 7792 PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7793 o_nnz[jj] += ncols; 7794 PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7795 } 7796 if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc; 7797 } 7798 } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type"); 7799 /* get scalar copy (norms) of matrix */ 7800 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7801 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7802 PetscCall(PetscFree2(d_nnz,o_nnz)); 7803 for (Ii = Istart; Ii < Iend; Ii++) { 7804 PetscInt dest_row = Ii/bs; 7805 PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals)); 7806 for (jj=0; jj<ncols; jj++) { 7807 PetscInt dest_col = idx[jj]/bs; 7808 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7809 PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES)); 7810 } 7811 PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals)); 7812 } 7813 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7814 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7815 } 7816 } else { 7817 /* TODO GPU: optimization proposal, each class provides fast implementation of this 7818 procedure via MatAbs API */ 7819 /* just copy scalar matrix & abs() */ 7820 PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7821 if (isseqaij) { a = Gmat; b = NULL; } 7822 else { 7823 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7824 a = d->A; b = d->B; 7825 } 7826 /* abs */ 7827 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7828 MatInfo info; 7829 PetscScalar *avals; 7830 PetscCall(MatGetInfo(c,MAT_LOCAL,&info)); 7831 PetscCall(MatSeqAIJGetArray(c,&avals)); 7832 for (int jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7833 PetscCall(MatSeqAIJRestoreArray(c,&avals)); 7834 } 7835 } 7836 if (symmetrize) { 7837 PetscBool issym; 7838 PetscCall(MatGetOption(Amat,MAT_SYMMETRIC,&issym)); 7839 if (!issym) { 7840 Mat matTrans; 7841 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7842 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7843 PetscCall(MatDestroy(&matTrans)); 7844 } 7845 PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE)); 7846 } else { 7847 PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7848 } 7849 if (scale) { 7850 /* scale c for all diagonal values = 1 or -1 */ 7851 Vec diag; 7852 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7853 PetscCall(MatGetDiagonal(Gmat, diag)); 7854 PetscCall(VecReciprocal(diag)); 7855 PetscCall(VecSqrtAbs(diag)); 7856 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7857 PetscCall(VecDestroy(&diag)); 7858 } 7859 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7860 *a_Gmat = Gmat; 7861 PetscFunctionReturn(0); 7862 } 7863 7864 /* -------------------------------------------------------------------------- */ 7865 /*@C 7866 MatFilter_AIJ - filter values with small absolute values 7867 With vfilter < 0 does nothing so should not be called. 7868 7869 Collective on Mat 7870 7871 Input Parameters: 7872 + Gmat - the graph 7873 . vfilter - threshold parameter [0,1) 7874 7875 Output Parameter: 7876 . filteredG - output filtered scalar graph 7877 7878 Level: developer 7879 7880 Notes: 7881 This is called before graph coarsers are called. 7882 This could go into Mat, move 'symm' to GAMG 7883 7884 .seealso: `PCGAMGSetThreshold()` 7885 @*/ 7886 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat,PetscReal vfilter, Mat *filteredG) 7887 { 7888 PetscInt Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc; 7889 Mat tGmat; 7890 MPI_Comm comm; 7891 const PetscScalar *vals; 7892 const PetscInt *idx; 7893 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols=0; 7894 MatScalar *AA; // this is checked in graph 7895 PetscBool isseqaij; 7896 Mat a, b, c; 7897 MatType jtype; 7898 7899 PetscFunctionBegin; 7900 PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm)); 7901 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij)); 7902 PetscCall(MatGetType(Gmat,&jtype)); 7903 PetscCall(MatCreate(comm, &tGmat)); 7904 PetscCall(MatSetType(tGmat, jtype)); 7905 7906 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7907 Also, if the matrix is symmetric, can we skip this 7908 operation? It can be very expensive on large matrices. */ 7909 7910 // global sizes 7911 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7912 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7913 nloc = Iend - Istart; 7914 PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz)); 7915 if (isseqaij) { a = Gmat; b = NULL; } 7916 else { 7917 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7918 a = d->A; b = d->B; 7919 garray = d->garray; 7920 } 7921 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7922 for (PetscInt row=0; row < nloc; row++) { 7923 PetscCall(MatGetRow(a,row,&ncols,NULL,NULL)); 7924 d_nnz[row] = ncols; 7925 if (ncols>maxcols) maxcols=ncols; 7926 PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL)); 7927 } 7928 if (b) { 7929 for (PetscInt row=0; row < nloc; row++) { 7930 PetscCall(MatGetRow(b,row,&ncols,NULL,NULL)); 7931 o_nnz[row] = ncols; 7932 if (ncols>maxcols) maxcols=ncols; 7933 PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL)); 7934 } 7935 } 7936 PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM)); 7937 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7938 PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz)); 7939 PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz)); 7940 PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 7941 PetscCall(PetscFree2(d_nnz,o_nnz)); 7942 // 7943 PetscCall(PetscMalloc2(maxcols, &AA,maxcols, &AJ)); 7944 nnz0 = nnz1 = 0; 7945 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7946 for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) { 7947 PetscCall(MatGetRow(c,row,&ncols,&idx,&vals)); 7948 for (ncol_row=jj=0; jj<ncols; jj++,nnz0++) { 7949 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7950 if (PetscRealPart(sv) > vfilter) { 7951 nnz1++; 7952 PetscInt cid = idx[jj] + Istart; //diag 7953 if (c!=a) cid = garray[idx[jj]]; 7954 AA[ncol_row] = vals[jj]; 7955 AJ[ncol_row] = cid; 7956 ncol_row++; 7957 } 7958 } 7959 PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals)); 7960 PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES)); 7961 } 7962 } 7963 PetscCall(PetscFree2(AA,AJ)); 7964 PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY)); 7965 PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY)); 7966 PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */ 7967 7968 PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", 7969 (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter, 7970 (!nloc) ? 1. : (double)nnz0/(double)nloc,MM,(int)maxcols)); 7971 7972 *filteredG = tGmat; 7973 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7974 PetscFunctionReturn(0); 7975 } 7976 7977 /* 7978 Special version for direct calls from Fortran 7979 */ 7980 #include <petsc/private/fortranimpl.h> 7981 7982 /* Change these macros so can be used in void function */ 7983 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7984 #undef PetscCall 7985 #define PetscCall(...) do { \ 7986 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7987 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7988 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7989 return; \ 7990 } \ 7991 } while (0) 7992 7993 #undef SETERRQ 7994 #define SETERRQ(comm,ierr,...) do { \ 7995 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 7996 return; \ 7997 } while (0) 7998 7999 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8000 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8001 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8002 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8003 #else 8004 #endif 8005 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 8006 { 8007 Mat mat = *mmat; 8008 PetscInt m = *mm, n = *mn; 8009 InsertMode addv = *maddv; 8010 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 8011 PetscScalar value; 8012 8013 MatCheckPreallocated(mat,1); 8014 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8015 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 8016 { 8017 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 8018 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 8019 PetscBool roworiented = aij->roworiented; 8020 8021 /* Some Variables required in the macro */ 8022 Mat A = aij->A; 8023 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 8024 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 8025 MatScalar *aa; 8026 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8027 Mat B = aij->B; 8028 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 8029 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 8030 MatScalar *ba; 8031 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8032 * cannot use "#if defined" inside a macro. */ 8033 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8034 8035 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 8036 PetscInt nonew = a->nonew; 8037 MatScalar *ap1,*ap2; 8038 8039 PetscFunctionBegin; 8040 PetscCall(MatSeqAIJGetArray(A,&aa)); 8041 PetscCall(MatSeqAIJGetArray(B,&ba)); 8042 for (i=0; i<m; i++) { 8043 if (im[i] < 0) continue; 8044 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 8045 if (im[i] >= rstart && im[i] < rend) { 8046 row = im[i] - rstart; 8047 lastcol1 = -1; 8048 rp1 = aj + ai[row]; 8049 ap1 = aa + ai[row]; 8050 rmax1 = aimax[row]; 8051 nrow1 = ailen[row]; 8052 low1 = 0; 8053 high1 = nrow1; 8054 lastcol2 = -1; 8055 rp2 = bj + bi[row]; 8056 ap2 = ba + bi[row]; 8057 rmax2 = bimax[row]; 8058 nrow2 = bilen[row]; 8059 low2 = 0; 8060 high2 = nrow2; 8061 8062 for (j=0; j<n; j++) { 8063 if (roworiented) value = v[i*n+j]; 8064 else value = v[i+j*m]; 8065 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8066 if (in[j] >= cstart && in[j] < cend) { 8067 col = in[j] - cstart; 8068 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 8069 } else if (in[j] < 0) continue; 8070 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8071 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 8072 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 8073 } else { 8074 if (mat->was_assembled) { 8075 if (!aij->colmap) { 8076 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8077 } 8078 #if defined(PETSC_USE_CTABLE) 8079 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 8080 col--; 8081 #else 8082 col = aij->colmap[in[j]] - 1; 8083 #endif 8084 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 8085 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8086 col = in[j]; 8087 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8088 B = aij->B; 8089 b = (Mat_SeqAIJ*)B->data; 8090 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 8091 rp2 = bj + bi[row]; 8092 ap2 = ba + bi[row]; 8093 rmax2 = bimax[row]; 8094 nrow2 = bilen[row]; 8095 low2 = 0; 8096 high2 = nrow2; 8097 bm = aij->B->rmap->n; 8098 ba = b->a; 8099 inserted = PETSC_FALSE; 8100 } 8101 } else col = in[j]; 8102 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 8103 } 8104 } 8105 } else if (!aij->donotstash) { 8106 if (roworiented) { 8107 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8108 } else { 8109 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8110 } 8111 } 8112 } 8113 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 8114 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 8115 } 8116 PetscFunctionReturnVoid(); 8117 } 8118 8119 /* Undefining these here since they were redefined from their original definition above! No 8120 * other PETSc functions should be defined past this point, as it is impossible to recover the 8121 * original definitions */ 8122 #undef PetscCall 8123 #undef SETERRQ 8124