1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B)); 15 PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B)); 16 PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 17 PetscFunctionReturn(0); 18 } 19 20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 21 { 22 Mat B; 23 24 PetscFunctionBegin; 25 PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B)); 26 PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 27 PetscCall(MatDestroy(&B)); 28 PetscFunctionReturn(0); 29 } 30 31 /*MC 32 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 33 34 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 35 and MATMPIAIJ otherwise. As a result, for single process communicators, 36 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 37 for communicators controlling multiple processes. It is recommended that you call both of 38 the above preallocation routines for simplicity. 39 40 Options Database Keys: 41 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 42 43 Developer Notes: 44 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 45 enough exist. 46 47 Level: beginner 48 49 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 50 M*/ 51 52 /*MC 53 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 54 55 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 56 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 57 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 58 for communicators controlling multiple processes. It is recommended that you call both of 59 the above preallocation routines for simplicity. 60 61 Options Database Keys: 62 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 63 64 Level: beginner 65 66 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 67 M*/ 68 69 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 70 { 71 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 72 73 PetscFunctionBegin; 74 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 75 A->boundtocpu = flg; 76 #endif 77 if (a->A) PetscCall(MatBindToCPU(a->A,flg)); 78 if (a->B) PetscCall(MatBindToCPU(a->B,flg)); 79 80 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 81 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 82 * to differ from the parent matrix. */ 83 if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg)); 84 if (a->diag) PetscCall(VecBindToCPU(a->diag,flg)); 85 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 90 { 91 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 92 93 PetscFunctionBegin; 94 if (mat->A) { 95 PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 96 PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 97 } 98 PetscFunctionReturn(0); 99 } 100 101 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 102 { 103 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 104 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 105 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 106 const PetscInt *ia,*ib; 107 const MatScalar *aa,*bb,*aav,*bav; 108 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 109 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 110 111 PetscFunctionBegin; 112 *keptrows = NULL; 113 114 ia = a->i; 115 ib = b->i; 116 PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 117 PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 118 for (i=0; i<m; i++) { 119 na = ia[i+1] - ia[i]; 120 nb = ib[i+1] - ib[i]; 121 if (!na && !nb) { 122 cnt++; 123 goto ok1; 124 } 125 aa = aav + ia[i]; 126 for (j=0; j<na; j++) { 127 if (aa[j] != 0.0) goto ok1; 128 } 129 bb = bav + ib[i]; 130 for (j=0; j <nb; j++) { 131 if (bb[j] != 0.0) goto ok1; 132 } 133 cnt++; 134 ok1:; 135 } 136 PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 137 if (!n0rows) { 138 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 139 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 140 PetscFunctionReturn(0); 141 } 142 PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 143 cnt = 0; 144 for (i=0; i<m; i++) { 145 na = ia[i+1] - ia[i]; 146 nb = ib[i+1] - ib[i]; 147 if (!na && !nb) continue; 148 aa = aav + ia[i]; 149 for (j=0; j<na;j++) { 150 if (aa[j] != 0.0) { 151 rows[cnt++] = rstart + i; 152 goto ok2; 153 } 154 } 155 bb = bav + ib[i]; 156 for (j=0; j<nb; j++) { 157 if (bb[j] != 0.0) { 158 rows[cnt++] = rstart + i; 159 goto ok2; 160 } 161 } 162 ok2:; 163 } 164 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 165 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 166 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 167 PetscFunctionReturn(0); 168 } 169 170 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 171 { 172 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 173 PetscBool cong; 174 175 PetscFunctionBegin; 176 PetscCall(MatHasCongruentLayouts(Y,&cong)); 177 if (Y->assembled && cong) { 178 PetscCall(MatDiagonalSet(aij->A,D,is)); 179 } else { 180 PetscCall(MatDiagonalSet_Default(Y,D,is)); 181 } 182 PetscFunctionReturn(0); 183 } 184 185 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 188 PetscInt i,rstart,nrows,*rows; 189 190 PetscFunctionBegin; 191 *zrows = NULL; 192 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 193 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 194 for (i=0; i<nrows; i++) rows[i] += rstart; 195 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 200 { 201 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 202 PetscInt i,m,n,*garray = aij->garray; 203 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 204 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 205 PetscReal *work; 206 const PetscScalar *dummy; 207 208 PetscFunctionBegin; 209 PetscCall(MatGetSize(A,&m,&n)); 210 PetscCall(PetscCalloc1(n,&work)); 211 PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 212 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 213 PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 214 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 215 if (type == NORM_2) { 216 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 217 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 218 } 219 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 220 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 221 } 222 } else if (type == NORM_1) { 223 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 224 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 225 } 226 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 227 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 228 } 229 } else if (type == NORM_INFINITY) { 230 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 231 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 232 } 233 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 234 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 235 } 236 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 237 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 238 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 239 } 240 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 241 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 242 } 243 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 244 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 245 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 246 } 247 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 248 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 249 } 250 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 251 if (type == NORM_INFINITY) { 252 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 253 } else { 254 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 255 } 256 PetscCall(PetscFree(work)); 257 if (type == NORM_2) { 258 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 259 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 260 for (i=0; i<n; i++) reductions[i] /= m; 261 } 262 PetscFunctionReturn(0); 263 } 264 265 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 266 { 267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 268 IS sis,gis; 269 const PetscInt *isis,*igis; 270 PetscInt n,*iis,nsis,ngis,rstart,i; 271 272 PetscFunctionBegin; 273 PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 274 PetscCall(MatFindNonzeroRows(a->B,&gis)); 275 PetscCall(ISGetSize(gis,&ngis)); 276 PetscCall(ISGetSize(sis,&nsis)); 277 PetscCall(ISGetIndices(sis,&isis)); 278 PetscCall(ISGetIndices(gis,&igis)); 279 280 PetscCall(PetscMalloc1(ngis+nsis,&iis)); 281 PetscCall(PetscArraycpy(iis,igis,ngis)); 282 PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 283 n = ngis + nsis; 284 PetscCall(PetscSortRemoveDupsInt(&n,iis)); 285 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 286 for (i=0; i<n; i++) iis[i] += rstart; 287 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 288 289 PetscCall(ISRestoreIndices(sis,&isis)); 290 PetscCall(ISRestoreIndices(gis,&igis)); 291 PetscCall(ISDestroy(&sis)); 292 PetscCall(ISDestroy(&gis)); 293 PetscFunctionReturn(0); 294 } 295 296 /* 297 Local utility routine that creates a mapping from the global column 298 number to the local number in the off-diagonal part of the local 299 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 300 a slightly higher hash table cost; without it it is not scalable (each processor 301 has an order N integer array but is fast to access. 302 */ 303 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscInt n = aij->B->cmap->n,i; 307 308 PetscFunctionBegin; 309 PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 310 #if defined(PETSC_USE_CTABLE) 311 PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 312 for (i=0; i<n; i++) { 313 PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 314 } 315 #else 316 PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 317 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 318 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 319 #endif 320 PetscFunctionReturn(0); 321 } 322 323 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 324 { \ 325 if (col <= lastcol1) low1 = 0; \ 326 else high1 = nrow1; \ 327 lastcol1 = col;\ 328 while (high1-low1 > 5) { \ 329 t = (low1+high1)/2; \ 330 if (rp1[t] > col) high1 = t; \ 331 else low1 = t; \ 332 } \ 333 for (_i=low1; _i<high1; _i++) { \ 334 if (rp1[_i] > col) break; \ 335 if (rp1[_i] == col) { \ 336 if (addv == ADD_VALUES) { \ 337 ap1[_i] += value; \ 338 /* Not sure LogFlops will slow dow the code or not */ \ 339 (void)PetscLogFlops(1.0); \ 340 } \ 341 else ap1[_i] = value; \ 342 goto a_noinsert; \ 343 } \ 344 } \ 345 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 346 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 347 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 348 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 349 N = nrow1++ - 1; a->nz++; high1++; \ 350 /* shift up all the later entries in this row */ \ 351 PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 352 PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 353 rp1[_i] = col; \ 354 ap1[_i] = value; \ 355 A->nonzerostate++;\ 356 a_noinsert: ; \ 357 ailen[row] = nrow1; \ 358 } 359 360 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 361 { \ 362 if (col <= lastcol2) low2 = 0; \ 363 else high2 = nrow2; \ 364 lastcol2 = col; \ 365 while (high2-low2 > 5) { \ 366 t = (low2+high2)/2; \ 367 if (rp2[t] > col) high2 = t; \ 368 else low2 = t; \ 369 } \ 370 for (_i=low2; _i<high2; _i++) { \ 371 if (rp2[_i] > col) break; \ 372 if (rp2[_i] == col) { \ 373 if (addv == ADD_VALUES) { \ 374 ap2[_i] += value; \ 375 (void)PetscLogFlops(1.0); \ 376 } \ 377 else ap2[_i] = value; \ 378 goto b_noinsert; \ 379 } \ 380 } \ 381 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 382 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 383 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 384 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 385 N = nrow2++ - 1; b->nz++; high2++; \ 386 /* shift up all the later entries in this row */ \ 387 PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 388 PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 389 rp2[_i] = col; \ 390 ap2[_i] = value; \ 391 B->nonzerostate++; \ 392 b_noinsert: ; \ 393 bilen[row] = nrow2; \ 394 } 395 396 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 397 { 398 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 399 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 400 PetscInt l,*garray = mat->garray,diag; 401 PetscScalar *aa,*ba; 402 403 PetscFunctionBegin; 404 /* code only works for square matrices A */ 405 406 /* find size of row to the left of the diagonal part */ 407 PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 408 row = row - diag; 409 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 410 if (garray[b->j[b->i[row]+l]] > diag) break; 411 } 412 if (l) { 413 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 414 PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 415 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 416 } 417 418 /* diagonal part */ 419 if (a->i[row+1]-a->i[row]) { 420 PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 421 PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 422 PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 423 } 424 425 /* right of diagonal part */ 426 if (b->i[row+1]-b->i[row]-l) { 427 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 428 PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 429 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 430 } 431 PetscFunctionReturn(0); 432 } 433 434 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 435 { 436 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 437 PetscScalar value = 0.0; 438 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 439 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 440 PetscBool roworiented = aij->roworiented; 441 442 /* Some Variables required in the macro */ 443 Mat A = aij->A; 444 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 445 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 446 PetscBool ignorezeroentries = a->ignorezeroentries; 447 Mat B = aij->B; 448 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 449 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 450 MatScalar *aa,*ba; 451 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 452 PetscInt nonew; 453 MatScalar *ap1,*ap2; 454 455 PetscFunctionBegin; 456 PetscCall(MatSeqAIJGetArray(A,&aa)); 457 PetscCall(MatSeqAIJGetArray(B,&ba)); 458 for (i=0; i<m; i++) { 459 if (im[i] < 0) continue; 460 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 461 if (im[i] >= rstart && im[i] < rend) { 462 row = im[i] - rstart; 463 lastcol1 = -1; 464 rp1 = aj + ai[row]; 465 ap1 = aa + ai[row]; 466 rmax1 = aimax[row]; 467 nrow1 = ailen[row]; 468 low1 = 0; 469 high1 = nrow1; 470 lastcol2 = -1; 471 rp2 = bj + bi[row]; 472 ap2 = ba + bi[row]; 473 rmax2 = bimax[row]; 474 nrow2 = bilen[row]; 475 low2 = 0; 476 high2 = nrow2; 477 478 for (j=0; j<n; j++) { 479 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 480 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 481 if (in[j] >= cstart && in[j] < cend) { 482 col = in[j] - cstart; 483 nonew = a->nonew; 484 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 485 } else if (in[j] < 0) { 486 continue; 487 } else { 488 PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 489 if (mat->was_assembled) { 490 if (!aij->colmap) { 491 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 492 } 493 #if defined(PETSC_USE_CTABLE) 494 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 495 col--; 496 #else 497 col = aij->colmap[in[j]] - 1; 498 #endif 499 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 500 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 501 col = in[j]; 502 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 503 B = aij->B; 504 b = (Mat_SeqAIJ*)B->data; 505 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 506 rp2 = bj + bi[row]; 507 ap2 = ba + bi[row]; 508 rmax2 = bimax[row]; 509 nrow2 = bilen[row]; 510 low2 = 0; 511 high2 = nrow2; 512 bm = aij->B->rmap->n; 513 ba = b->a; 514 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 515 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 516 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 517 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 518 } 519 } else col = in[j]; 520 nonew = b->nonew; 521 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 522 } 523 } 524 } else { 525 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 526 if (!aij->donotstash) { 527 mat->assembled = PETSC_FALSE; 528 if (roworiented) { 529 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 530 } else { 531 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 532 } 533 } 534 } 535 } 536 PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 537 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 538 PetscFunctionReturn(0); 539 } 540 541 /* 542 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 543 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 544 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 545 */ 546 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 547 { 548 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 549 Mat A = aij->A; /* diagonal part of the matrix */ 550 Mat B = aij->B; /* offdiagonal part of the matrix */ 551 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 552 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 553 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 554 PetscInt *ailen = a->ilen,*aj = a->j; 555 PetscInt *bilen = b->ilen,*bj = b->j; 556 PetscInt am = aij->A->rmap->n,j; 557 PetscInt diag_so_far = 0,dnz; 558 PetscInt offd_so_far = 0,onz; 559 560 PetscFunctionBegin; 561 /* Iterate over all rows of the matrix */ 562 for (j=0; j<am; j++) { 563 dnz = onz = 0; 564 /* Iterate over all non-zero columns of the current row */ 565 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 566 /* If column is in the diagonal */ 567 if (mat_j[col] >= cstart && mat_j[col] < cend) { 568 aj[diag_so_far++] = mat_j[col] - cstart; 569 dnz++; 570 } else { /* off-diagonal entries */ 571 bj[offd_so_far++] = mat_j[col]; 572 onz++; 573 } 574 } 575 ailen[j] = dnz; 576 bilen[j] = onz; 577 } 578 PetscFunctionReturn(0); 579 } 580 581 /* 582 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 583 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 584 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 585 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 586 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 587 */ 588 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 589 { 590 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 591 Mat A = aij->A; /* diagonal part of the matrix */ 592 Mat B = aij->B; /* offdiagonal part of the matrix */ 593 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 594 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 595 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 596 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 597 PetscInt *ailen = a->ilen,*aj = a->j; 598 PetscInt *bilen = b->ilen,*bj = b->j; 599 PetscInt am = aij->A->rmap->n,j; 600 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 601 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 602 PetscScalar *aa = a->a,*ba = b->a; 603 604 PetscFunctionBegin; 605 /* Iterate over all rows of the matrix */ 606 for (j=0; j<am; j++) { 607 dnz_row = onz_row = 0; 608 rowstart_offd = full_offd_i[j]; 609 rowstart_diag = full_diag_i[j]; 610 /* Iterate over all non-zero columns of the current row */ 611 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 612 /* If column is in the diagonal */ 613 if (mat_j[col] >= cstart && mat_j[col] < cend) { 614 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 615 aa[rowstart_diag+dnz_row] = mat_a[col]; 616 dnz_row++; 617 } else { /* off-diagonal entries */ 618 bj[rowstart_offd+onz_row] = mat_j[col]; 619 ba[rowstart_offd+onz_row] = mat_a[col]; 620 onz_row++; 621 } 622 } 623 ailen[j] = dnz_row; 624 bilen[j] = onz_row; 625 } 626 PetscFunctionReturn(0); 627 } 628 629 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* negative row */ 638 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= rstart && idxm[i] < rend) { 640 row = idxm[i] - rstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* negative column */ 643 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= cstart && idxn[j] < cend) { 645 col = idxn[j] - cstart; 646 PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 647 } else { 648 if (!aij->colmap) { 649 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 653 col--; 654 #else 655 col = aij->colmap[idxn[j]] - 1; 656 #endif 657 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 658 else { 659 PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 660 } 661 } 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 671 PetscInt nstash,reallocs; 672 673 PetscFunctionBegin; 674 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 675 676 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 677 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 678 PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 679 PetscFunctionReturn(0); 680 } 681 682 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 PetscMPIInt n; 686 PetscInt i,j,rstart,ncols,flg; 687 PetscInt *row,*col; 688 PetscBool other_disassembled; 689 PetscScalar *val; 690 691 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 692 693 PetscFunctionBegin; 694 if (!aij->donotstash && !mat->nooffprocentries) { 695 while (1) { 696 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 697 if (!flg) break; 698 699 for (i=0; i<n;) { 700 /* Now identify the consecutive vals belonging to the same row */ 701 for (j=i,rstart=row[j]; j<n; j++) { 702 if (row[j] != rstart) break; 703 } 704 if (j < n) ncols = j-i; 705 else ncols = n-i; 706 /* Now assemble all these values with a single function call */ 707 PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 708 i = j; 709 } 710 } 711 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 712 } 713 #if defined(PETSC_HAVE_DEVICE) 714 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 715 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 716 if (mat->boundtocpu) { 717 PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 718 PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 719 } 720 #endif 721 PetscCall(MatAssemblyBegin(aij->A,mode)); 722 PetscCall(MatAssemblyEnd(aij->A,mode)); 723 724 /* determine if any processor has disassembled, if so we must 725 also disassemble ourself, in order that we may reassemble. */ 726 /* 727 if nonzero structure of submatrix B cannot change then we know that 728 no processor disassembled thus we can skip this stuff 729 */ 730 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 731 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 732 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 733 PetscCall(MatDisAssemble_MPIAIJ(mat)); 734 } 735 } 736 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 737 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 738 } 739 PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 740 #if defined(PETSC_HAVE_DEVICE) 741 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 742 #endif 743 PetscCall(MatAssemblyBegin(aij->B,mode)); 744 PetscCall(MatAssemblyEnd(aij->B,mode)); 745 746 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 747 748 aij->rowvalues = NULL; 749 750 PetscCall(VecDestroy(&aij->diag)); 751 752 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 753 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 754 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 755 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 756 } 757 #if defined(PETSC_HAVE_DEVICE) 758 mat->offloadmask = PETSC_OFFLOAD_BOTH; 759 #endif 760 PetscFunctionReturn(0); 761 } 762 763 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 764 { 765 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 766 767 PetscFunctionBegin; 768 PetscCall(MatZeroEntries(l->A)); 769 PetscCall(MatZeroEntries(l->B)); 770 PetscFunctionReturn(0); 771 } 772 773 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 774 { 775 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 776 PetscObjectState sA, sB; 777 PetscInt *lrows; 778 PetscInt r, len; 779 PetscBool cong, lch, gch; 780 781 PetscFunctionBegin; 782 /* get locally owned rows */ 783 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 784 PetscCall(MatHasCongruentLayouts(A,&cong)); 785 /* fix right hand side if needed */ 786 if (x && b) { 787 const PetscScalar *xx; 788 PetscScalar *bb; 789 790 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 791 PetscCall(VecGetArrayRead(x, &xx)); 792 PetscCall(VecGetArray(b, &bb)); 793 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 794 PetscCall(VecRestoreArrayRead(x, &xx)); 795 PetscCall(VecRestoreArray(b, &bb)); 796 } 797 798 sA = mat->A->nonzerostate; 799 sB = mat->B->nonzerostate; 800 801 if (diag != 0.0 && cong) { 802 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 803 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 804 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 805 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 806 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 807 PetscInt nnwA, nnwB; 808 PetscBool nnzA, nnzB; 809 810 nnwA = aijA->nonew; 811 nnwB = aijB->nonew; 812 nnzA = aijA->keepnonzeropattern; 813 nnzB = aijB->keepnonzeropattern; 814 if (!nnzA) { 815 PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 816 aijA->nonew = 0; 817 } 818 if (!nnzB) { 819 PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 820 aijB->nonew = 0; 821 } 822 /* Must zero here before the next loop */ 823 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 824 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 825 for (r = 0; r < len; ++r) { 826 const PetscInt row = lrows[r] + A->rmap->rstart; 827 if (row >= A->cmap->N) continue; 828 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 829 } 830 aijA->nonew = nnwA; 831 aijB->nonew = nnwB; 832 } else { 833 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 834 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 835 } 836 PetscCall(PetscFree(lrows)); 837 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 838 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 839 840 /* reduce nonzerostate */ 841 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 842 PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 843 if (gch) A->nonzerostate++; 844 PetscFunctionReturn(0); 845 } 846 847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 848 { 849 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 850 PetscMPIInt n = A->rmap->n; 851 PetscInt i,j,r,m,len = 0; 852 PetscInt *lrows,*owners = A->rmap->range; 853 PetscMPIInt p = 0; 854 PetscSFNode *rrows; 855 PetscSF sf; 856 const PetscScalar *xx; 857 PetscScalar *bb,*mask,*aij_a; 858 Vec xmask,lmask; 859 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 860 const PetscInt *aj, *ii,*ridx; 861 PetscScalar *aa; 862 863 PetscFunctionBegin; 864 /* Create SF where leaves are input rows and roots are owned rows */ 865 PetscCall(PetscMalloc1(n, &lrows)); 866 for (r = 0; r < n; ++r) lrows[r] = -1; 867 PetscCall(PetscMalloc1(N, &rrows)); 868 for (r = 0; r < N; ++r) { 869 const PetscInt idx = rows[r]; 870 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 871 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 872 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 873 } 874 rrows[r].rank = p; 875 rrows[r].index = rows[r] - owners[p]; 876 } 877 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 878 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 879 /* Collect flags for rows to be zeroed */ 880 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 881 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 882 PetscCall(PetscSFDestroy(&sf)); 883 /* Compress and put in row numbers */ 884 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 885 /* zero diagonal part of matrix */ 886 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 887 /* handle off diagonal part of matrix */ 888 PetscCall(MatCreateVecs(A,&xmask,NULL)); 889 PetscCall(VecDuplicate(l->lvec,&lmask)); 890 PetscCall(VecGetArray(xmask,&bb)); 891 for (i=0; i<len; i++) bb[lrows[i]] = 1; 892 PetscCall(VecRestoreArray(xmask,&bb)); 893 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 894 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 895 PetscCall(VecDestroy(&xmask)); 896 if (x && b) { /* this code is buggy when the row and column layout don't match */ 897 PetscBool cong; 898 899 PetscCall(MatHasCongruentLayouts(A,&cong)); 900 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 901 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 902 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 903 PetscCall(VecGetArrayRead(l->lvec,&xx)); 904 PetscCall(VecGetArray(b,&bb)); 905 } 906 PetscCall(VecGetArray(lmask,&mask)); 907 /* remove zeroed rows of off diagonal matrix */ 908 PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 909 ii = aij->i; 910 for (i=0; i<len; i++) { 911 PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 912 } 913 /* loop over all elements of off process part of matrix zeroing removed columns*/ 914 if (aij->compressedrow.use) { 915 m = aij->compressedrow.nrows; 916 ii = aij->compressedrow.i; 917 ridx = aij->compressedrow.rindex; 918 for (i=0; i<m; i++) { 919 n = ii[i+1] - ii[i]; 920 aj = aij->j + ii[i]; 921 aa = aij_a + ii[i]; 922 923 for (j=0; j<n; j++) { 924 if (PetscAbsScalar(mask[*aj])) { 925 if (b) bb[*ridx] -= *aa*xx[*aj]; 926 *aa = 0.0; 927 } 928 aa++; 929 aj++; 930 } 931 ridx++; 932 } 933 } else { /* do not use compressed row format */ 934 m = l->B->rmap->n; 935 for (i=0; i<m; i++) { 936 n = ii[i+1] - ii[i]; 937 aj = aij->j + ii[i]; 938 aa = aij_a + ii[i]; 939 for (j=0; j<n; j++) { 940 if (PetscAbsScalar(mask[*aj])) { 941 if (b) bb[i] -= *aa*xx[*aj]; 942 *aa = 0.0; 943 } 944 aa++; 945 aj++; 946 } 947 } 948 } 949 if (x && b) { 950 PetscCall(VecRestoreArray(b,&bb)); 951 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 952 } 953 PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 954 PetscCall(VecRestoreArray(lmask,&mask)); 955 PetscCall(VecDestroy(&lmask)); 956 PetscCall(PetscFree(lrows)); 957 958 /* only change matrix nonzero state if pattern was allowed to be changed */ 959 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 960 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 961 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 962 } 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 967 { 968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 969 PetscInt nt; 970 VecScatter Mvctx = a->Mvctx; 971 972 PetscFunctionBegin; 973 PetscCall(VecGetLocalSize(xx,&nt)); 974 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 975 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 976 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 977 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 978 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 979 PetscFunctionReturn(0); 980 } 981 982 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 985 986 PetscFunctionBegin; 987 PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 988 PetscFunctionReturn(0); 989 } 990 991 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 992 { 993 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 994 VecScatter Mvctx = a->Mvctx; 995 996 PetscFunctionBegin; 997 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 998 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 999 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1000 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1005 { 1006 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1007 1008 PetscFunctionBegin; 1009 /* do nondiagonal part */ 1010 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1011 /* do local part */ 1012 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1013 /* add partial results together */ 1014 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1015 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1020 { 1021 MPI_Comm comm; 1022 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1023 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1024 IS Me,Notme; 1025 PetscInt M,N,first,last,*notme,i; 1026 PetscBool lf; 1027 PetscMPIInt size; 1028 1029 PetscFunctionBegin; 1030 /* Easy test: symmetric diagonal block */ 1031 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1032 PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 1033 PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1034 if (!*f) PetscFunctionReturn(0); 1035 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 1036 PetscCallMPI(MPI_Comm_size(comm,&size)); 1037 if (size == 1) PetscFunctionReturn(0); 1038 1039 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1040 PetscCall(MatGetSize(Amat,&M,&N)); 1041 PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 1042 PetscCall(PetscMalloc1(N-last+first,¬me)); 1043 for (i=0; i<first; i++) notme[i] = i; 1044 for (i=last; i<M; i++) notme[i-last+first] = i; 1045 PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 1046 PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 1047 PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 1048 Aoff = Aoffs[0]; 1049 PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 1050 Boff = Boffs[0]; 1051 PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 1052 PetscCall(MatDestroyMatrices(1,&Aoffs)); 1053 PetscCall(MatDestroyMatrices(1,&Boffs)); 1054 PetscCall(ISDestroy(&Me)); 1055 PetscCall(ISDestroy(&Notme)); 1056 PetscCall(PetscFree(notme)); 1057 PetscFunctionReturn(0); 1058 } 1059 1060 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1061 { 1062 PetscFunctionBegin; 1063 PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 1071 PetscFunctionBegin; 1072 /* do nondiagonal part */ 1073 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1074 /* do local part */ 1075 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1076 /* add partial results together */ 1077 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1078 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 /* 1083 This only works correctly for square matrices where the subblock A->A is the 1084 diagonal block 1085 */ 1086 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1087 { 1088 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1089 1090 PetscFunctionBegin; 1091 PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1092 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1093 PetscCall(MatGetDiagonal(a->A,v)); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1098 { 1099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1100 1101 PetscFunctionBegin; 1102 PetscCall(MatScale(a->A,aa)); 1103 PetscCall(MatScale(a->B,aa)); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1108 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1109 { 1110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1111 1112 PetscFunctionBegin; 1113 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1114 PetscCall(PetscFree(aij->Aperm1)); 1115 PetscCall(PetscFree(aij->Bperm1)); 1116 PetscCall(PetscFree(aij->Ajmap1)); 1117 PetscCall(PetscFree(aij->Bjmap1)); 1118 1119 PetscCall(PetscFree(aij->Aimap2)); 1120 PetscCall(PetscFree(aij->Bimap2)); 1121 PetscCall(PetscFree(aij->Aperm2)); 1122 PetscCall(PetscFree(aij->Bperm2)); 1123 PetscCall(PetscFree(aij->Ajmap2)); 1124 PetscCall(PetscFree(aij->Bjmap2)); 1125 1126 PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 1127 PetscCall(PetscFree(aij->Cperm1)); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1132 { 1133 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1134 1135 PetscFunctionBegin; 1136 #if defined(PETSC_USE_LOG) 1137 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1138 #endif 1139 PetscCall(MatStashDestroy_Private(&mat->stash)); 1140 PetscCall(VecDestroy(&aij->diag)); 1141 PetscCall(MatDestroy(&aij->A)); 1142 PetscCall(MatDestroy(&aij->B)); 1143 #if defined(PETSC_USE_CTABLE) 1144 PetscCall(PetscTableDestroy(&aij->colmap)); 1145 #else 1146 PetscCall(PetscFree(aij->colmap)); 1147 #endif 1148 PetscCall(PetscFree(aij->garray)); 1149 PetscCall(VecDestroy(&aij->lvec)); 1150 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1151 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 1152 PetscCall(PetscFree(aij->ld)); 1153 1154 /* Free COO */ 1155 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1156 1157 PetscCall(PetscFree(mat->data)); 1158 1159 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1160 PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 1161 1162 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1164 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 1166 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 1167 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 1169 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 1171 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 1172 #if defined(PETSC_HAVE_CUDA) 1173 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 1174 #endif 1175 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1176 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 1177 #endif 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 1179 #if defined(PETSC_HAVE_ELEMENTAL) 1180 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 1181 #endif 1182 #if defined(PETSC_HAVE_SCALAPACK) 1183 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1184 #endif 1185 #if defined(PETSC_HAVE_HYPRE) 1186 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 1187 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 1188 #endif 1189 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1190 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 1192 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 1195 #if defined(PETSC_HAVE_MKL_SPARSE) 1196 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 1197 #endif 1198 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 1199 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1200 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 1201 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 1202 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1207 { 1208 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1209 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1210 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1211 const PetscInt *garray = aij->garray; 1212 const PetscScalar *aa,*ba; 1213 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1214 PetscInt *rowlens; 1215 PetscInt *colidxs; 1216 PetscScalar *matvals; 1217 1218 PetscFunctionBegin; 1219 PetscCall(PetscViewerSetUp(viewer)); 1220 1221 M = mat->rmap->N; 1222 N = mat->cmap->N; 1223 m = mat->rmap->n; 1224 rs = mat->rmap->rstart; 1225 cs = mat->cmap->rstart; 1226 nz = A->nz + B->nz; 1227 1228 /* write matrix header */ 1229 header[0] = MAT_FILE_CLASSID; 1230 header[1] = M; header[2] = N; header[3] = nz; 1231 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1232 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1233 1234 /* fill in and store row lengths */ 1235 PetscCall(PetscMalloc1(m,&rowlens)); 1236 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1237 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1238 PetscCall(PetscFree(rowlens)); 1239 1240 /* fill in and store column indices */ 1241 PetscCall(PetscMalloc1(nz,&colidxs)); 1242 for (cnt=0, i=0; i<m; i++) { 1243 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1244 if (garray[B->j[jb]] > cs) break; 1245 colidxs[cnt++] = garray[B->j[jb]]; 1246 } 1247 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1248 colidxs[cnt++] = A->j[ja] + cs; 1249 for (; jb<B->i[i+1]; jb++) 1250 colidxs[cnt++] = garray[B->j[jb]]; 1251 } 1252 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 1254 PetscCall(PetscFree(colidxs)); 1255 1256 /* fill in and store nonzero values */ 1257 PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 1258 PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 1259 PetscCall(PetscMalloc1(nz,&matvals)); 1260 for (cnt=0, i=0; i<m; i++) { 1261 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1262 if (garray[B->j[jb]] > cs) break; 1263 matvals[cnt++] = ba[jb]; 1264 } 1265 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1266 matvals[cnt++] = aa[ja]; 1267 for (; jb<B->i[i+1]; jb++) 1268 matvals[cnt++] = ba[jb]; 1269 } 1270 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 1271 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 1272 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1273 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 1274 PetscCall(PetscFree(matvals)); 1275 1276 /* write block size option to the viewer's .info file */ 1277 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #include <petscdraw.h> 1282 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1283 { 1284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1285 PetscMPIInt rank = aij->rank,size = aij->size; 1286 PetscBool isdraw,iascii,isbinary; 1287 PetscViewer sviewer; 1288 PetscViewerFormat format; 1289 1290 PetscFunctionBegin; 1291 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1292 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1293 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1294 if (iascii) { 1295 PetscCall(PetscViewerGetFormat(viewer,&format)); 1296 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1297 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1298 PetscCall(PetscMalloc1(size,&nz)); 1299 PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1300 for (i=0; i<(PetscInt)size; i++) { 1301 nmax = PetscMax(nmax,nz[i]); 1302 nmin = PetscMin(nmin,nz[i]); 1303 navg += nz[i]; 1304 } 1305 PetscCall(PetscFree(nz)); 1306 navg = navg/size; 1307 PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1308 PetscFunctionReturn(0); 1309 } 1310 PetscCall(PetscViewerGetFormat(viewer,&format)); 1311 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1312 MatInfo info; 1313 PetscInt *inodes=NULL; 1314 1315 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 1316 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 1317 PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 1318 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1319 if (!inodes) { 1320 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1321 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1322 } else { 1323 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1324 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1325 } 1326 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 1327 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1328 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 1329 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1330 PetscCall(PetscViewerFlush(viewer)); 1331 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1332 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 1333 PetscCall(VecScatterView(aij->Mvctx,viewer)); 1334 PetscFunctionReturn(0); 1335 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1336 PetscInt inodecount,inodelimit,*inodes; 1337 PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1338 if (inodes) { 1339 PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1340 } else { 1341 PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1342 } 1343 PetscFunctionReturn(0); 1344 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1345 PetscFunctionReturn(0); 1346 } 1347 } else if (isbinary) { 1348 if (size == 1) { 1349 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1350 PetscCall(MatView(aij->A,viewer)); 1351 } else { 1352 PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 1353 } 1354 PetscFunctionReturn(0); 1355 } else if (iascii && size == 1) { 1356 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1357 PetscCall(MatView(aij->A,viewer)); 1358 PetscFunctionReturn(0); 1359 } else if (isdraw) { 1360 PetscDraw draw; 1361 PetscBool isnull; 1362 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1363 PetscCall(PetscDrawIsNull(draw,&isnull)); 1364 if (isnull) PetscFunctionReturn(0); 1365 } 1366 1367 { /* assemble the entire matrix onto first processor */ 1368 Mat A = NULL, Av; 1369 IS isrow,iscol; 1370 1371 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1372 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1373 PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 1374 PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 1375 /* The commented code uses MatCreateSubMatrices instead */ 1376 /* 1377 Mat *AA, A = NULL, Av; 1378 IS isrow,iscol; 1379 1380 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1381 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1382 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1383 if (rank == 0) { 1384 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1385 A = AA[0]; 1386 Av = AA[0]; 1387 } 1388 PetscCall(MatDestroySubMatrices(1,&AA)); 1389 */ 1390 PetscCall(ISDestroy(&iscol)); 1391 PetscCall(ISDestroy(&isrow)); 1392 /* 1393 Everyone has to call to draw the matrix since the graphics waits are 1394 synchronized across all processors that share the PetscDraw object 1395 */ 1396 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1397 if (rank == 0) { 1398 if (((PetscObject)mat)->name) { 1399 PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 1400 } 1401 PetscCall(MatView_SeqAIJ(Av,sviewer)); 1402 } 1403 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1404 PetscCall(PetscViewerFlush(viewer)); 1405 PetscCall(MatDestroy(&A)); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1411 { 1412 PetscBool iascii,isdraw,issocket,isbinary; 1413 1414 PetscFunctionBegin; 1415 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1416 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1417 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1418 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1419 if (iascii || isdraw || isbinary || issocket) { 1420 PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1421 } 1422 PetscFunctionReturn(0); 1423 } 1424 1425 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1426 { 1427 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1428 Vec bb1 = NULL; 1429 PetscBool hasop; 1430 1431 PetscFunctionBegin; 1432 if (flag == SOR_APPLY_UPPER) { 1433 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1438 PetscCall(VecDuplicate(bb,&bb1)); 1439 } 1440 1441 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1442 if (flag & SOR_ZERO_INITIAL_GUESS) { 1443 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1444 its--; 1445 } 1446 1447 while (its--) { 1448 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1449 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1450 1451 /* update rhs: bb1 = bb - B*x */ 1452 PetscCall(VecScale(mat->lvec,-1.0)); 1453 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1454 1455 /* local sweep */ 1456 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 1457 } 1458 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1459 if (flag & SOR_ZERO_INITIAL_GUESS) { 1460 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1461 its--; 1462 } 1463 while (its--) { 1464 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1465 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1466 1467 /* update rhs: bb1 = bb - B*x */ 1468 PetscCall(VecScale(mat->lvec,-1.0)); 1469 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1470 1471 /* local sweep */ 1472 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 1473 } 1474 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1475 if (flag & SOR_ZERO_INITIAL_GUESS) { 1476 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1477 its--; 1478 } 1479 while (its--) { 1480 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1481 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1482 1483 /* update rhs: bb1 = bb - B*x */ 1484 PetscCall(VecScale(mat->lvec,-1.0)); 1485 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1486 1487 /* local sweep */ 1488 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 1489 } 1490 } else if (flag & SOR_EISENSTAT) { 1491 Vec xx1; 1492 1493 PetscCall(VecDuplicate(bb,&xx1)); 1494 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1495 1496 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1497 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1498 if (!mat->diag) { 1499 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 1500 PetscCall(MatGetDiagonal(matin,mat->diag)); 1501 } 1502 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1503 if (hasop) { 1504 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1505 } else { 1506 PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1507 } 1508 PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1509 1510 PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1511 1512 /* local sweep */ 1513 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 1514 PetscCall(VecAXPY(xx,1.0,xx1)); 1515 PetscCall(VecDestroy(&xx1)); 1516 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1517 1518 PetscCall(VecDestroy(&bb1)); 1519 1520 matin->factorerrortype = mat->A->factorerrortype; 1521 PetscFunctionReturn(0); 1522 } 1523 1524 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1525 { 1526 Mat aA,aB,Aperm; 1527 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1528 PetscScalar *aa,*ba; 1529 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1530 PetscSF rowsf,sf; 1531 IS parcolp = NULL; 1532 PetscBool done; 1533 1534 PetscFunctionBegin; 1535 PetscCall(MatGetLocalSize(A,&m,&n)); 1536 PetscCall(ISGetIndices(rowp,&rwant)); 1537 PetscCall(ISGetIndices(colp,&cwant)); 1538 PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 1539 1540 /* Invert row permutation to find out where my rows should go */ 1541 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 1542 PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 1543 PetscCall(PetscSFSetFromOptions(rowsf)); 1544 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1545 PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1546 PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1547 1548 /* Invert column permutation to find out where my columns should go */ 1549 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1550 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 1551 PetscCall(PetscSFSetFromOptions(sf)); 1552 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1553 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1554 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1555 PetscCall(PetscSFDestroy(&sf)); 1556 1557 PetscCall(ISRestoreIndices(rowp,&rwant)); 1558 PetscCall(ISRestoreIndices(colp,&cwant)); 1559 PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 1560 1561 /* Find out where my gcols should go */ 1562 PetscCall(MatGetSize(aB,NULL,&ng)); 1563 PetscCall(PetscMalloc1(ng,&gcdest)); 1564 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1565 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 1566 PetscCall(PetscSFSetFromOptions(sf)); 1567 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1568 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1569 PetscCall(PetscSFDestroy(&sf)); 1570 1571 PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 1572 PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1573 PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1574 for (i=0; i<m; i++) { 1575 PetscInt row = rdest[i]; 1576 PetscMPIInt rowner; 1577 PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 1578 for (j=ai[i]; j<ai[i+1]; j++) { 1579 PetscInt col = cdest[aj[j]]; 1580 PetscMPIInt cowner; 1581 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 1582 if (rowner == cowner) dnnz[i]++; 1583 else onnz[i]++; 1584 } 1585 for (j=bi[i]; j<bi[i+1]; j++) { 1586 PetscInt col = gcdest[bj[j]]; 1587 PetscMPIInt cowner; 1588 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 1589 if (rowner == cowner) dnnz[i]++; 1590 else onnz[i]++; 1591 } 1592 } 1593 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1594 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1595 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1596 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1597 PetscCall(PetscSFDestroy(&rowsf)); 1598 1599 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 1600 PetscCall(MatSeqAIJGetArray(aA,&aa)); 1601 PetscCall(MatSeqAIJGetArray(aB,&ba)); 1602 for (i=0; i<m; i++) { 1603 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1604 PetscInt j0,rowlen; 1605 rowlen = ai[i+1] - ai[i]; 1606 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1607 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1608 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1609 } 1610 rowlen = bi[i+1] - bi[i]; 1611 for (j0=j=0; j<rowlen; j0=j) { 1612 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1613 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1614 } 1615 } 1616 PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 1617 PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 1618 PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1619 PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1620 PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 1621 PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 1622 PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 1623 PetscCall(PetscFree3(work,rdest,cdest)); 1624 PetscCall(PetscFree(gcdest)); 1625 if (parcolp) PetscCall(ISDestroy(&colp)); 1626 *B = Aperm; 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1631 { 1632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1633 1634 PetscFunctionBegin; 1635 PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1636 if (ghosts) *ghosts = aij->garray; 1637 PetscFunctionReturn(0); 1638 } 1639 1640 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1641 { 1642 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1643 Mat A = mat->A,B = mat->B; 1644 PetscLogDouble isend[5],irecv[5]; 1645 1646 PetscFunctionBegin; 1647 info->block_size = 1.0; 1648 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1649 1650 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1651 isend[3] = info->memory; isend[4] = info->mallocs; 1652 1653 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1654 1655 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1656 isend[3] += info->memory; isend[4] += info->mallocs; 1657 if (flag == MAT_LOCAL) { 1658 info->nz_used = isend[0]; 1659 info->nz_allocated = isend[1]; 1660 info->nz_unneeded = isend[2]; 1661 info->memory = isend[3]; 1662 info->mallocs = isend[4]; 1663 } else if (flag == MAT_GLOBAL_MAX) { 1664 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1665 1666 info->nz_used = irecv[0]; 1667 info->nz_allocated = irecv[1]; 1668 info->nz_unneeded = irecv[2]; 1669 info->memory = irecv[3]; 1670 info->mallocs = irecv[4]; 1671 } else if (flag == MAT_GLOBAL_SUM) { 1672 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1673 1674 info->nz_used = irecv[0]; 1675 info->nz_allocated = irecv[1]; 1676 info->nz_unneeded = irecv[2]; 1677 info->memory = irecv[3]; 1678 info->mallocs = irecv[4]; 1679 } 1680 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1681 info->fill_ratio_needed = 0; 1682 info->factor_mallocs = 0; 1683 PetscFunctionReturn(0); 1684 } 1685 1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1687 { 1688 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1689 1690 PetscFunctionBegin; 1691 switch (op) { 1692 case MAT_NEW_NONZERO_LOCATIONS: 1693 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1694 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1695 case MAT_KEEP_NONZERO_PATTERN: 1696 case MAT_NEW_NONZERO_LOCATION_ERR: 1697 case MAT_USE_INODES: 1698 case MAT_IGNORE_ZERO_ENTRIES: 1699 case MAT_FORM_EXPLICIT_TRANSPOSE: 1700 MatCheckPreallocated(A,1); 1701 PetscCall(MatSetOption(a->A,op,flg)); 1702 PetscCall(MatSetOption(a->B,op,flg)); 1703 break; 1704 case MAT_ROW_ORIENTED: 1705 MatCheckPreallocated(A,1); 1706 a->roworiented = flg; 1707 1708 PetscCall(MatSetOption(a->A,op,flg)); 1709 PetscCall(MatSetOption(a->B,op,flg)); 1710 break; 1711 case MAT_FORCE_DIAGONAL_ENTRIES: 1712 case MAT_SORTED_FULL: 1713 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1714 break; 1715 case MAT_IGNORE_OFF_PROC_ENTRIES: 1716 a->donotstash = flg; 1717 break; 1718 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1719 case MAT_SPD: 1720 case MAT_SYMMETRIC: 1721 case MAT_STRUCTURALLY_SYMMETRIC: 1722 case MAT_HERMITIAN: 1723 case MAT_SYMMETRY_ETERNAL: 1724 break; 1725 case MAT_SUBMAT_SINGLEIS: 1726 A->submat_singleis = flg; 1727 break; 1728 case MAT_STRUCTURE_ONLY: 1729 /* The option is handled directly by MatSetOption() */ 1730 break; 1731 default: 1732 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1733 } 1734 PetscFunctionReturn(0); 1735 } 1736 1737 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1738 { 1739 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1740 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1741 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1742 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1743 PetscInt *cmap,*idx_p; 1744 1745 PetscFunctionBegin; 1746 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1747 mat->getrowactive = PETSC_TRUE; 1748 1749 if (!mat->rowvalues && (idx || v)) { 1750 /* 1751 allocate enough space to hold information from the longest row. 1752 */ 1753 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1754 PetscInt max = 1,tmp; 1755 for (i=0; i<matin->rmap->n; i++) { 1756 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1757 if (max < tmp) max = tmp; 1758 } 1759 PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 1760 } 1761 1762 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1763 lrow = row - rstart; 1764 1765 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1766 if (!v) {pvA = NULL; pvB = NULL;} 1767 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1768 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1769 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1770 nztot = nzA + nzB; 1771 1772 cmap = mat->garray; 1773 if (v || idx) { 1774 if (nztot) { 1775 /* Sort by increasing column numbers, assuming A and B already sorted */ 1776 PetscInt imark = -1; 1777 if (v) { 1778 *v = v_p = mat->rowvalues; 1779 for (i=0; i<nzB; i++) { 1780 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1781 else break; 1782 } 1783 imark = i; 1784 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1785 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1786 } 1787 if (idx) { 1788 *idx = idx_p = mat->rowindices; 1789 if (imark > -1) { 1790 for (i=0; i<imark; i++) { 1791 idx_p[i] = cmap[cworkB[i]]; 1792 } 1793 } else { 1794 for (i=0; i<nzB; i++) { 1795 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1796 else break; 1797 } 1798 imark = i; 1799 } 1800 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1801 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1802 } 1803 } else { 1804 if (idx) *idx = NULL; 1805 if (v) *v = NULL; 1806 } 1807 } 1808 *nz = nztot; 1809 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1810 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1811 PetscFunctionReturn(0); 1812 } 1813 1814 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1815 { 1816 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1817 1818 PetscFunctionBegin; 1819 PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1820 aij->getrowactive = PETSC_FALSE; 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1825 { 1826 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1827 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1828 PetscInt i,j,cstart = mat->cmap->rstart; 1829 PetscReal sum = 0.0; 1830 const MatScalar *v,*amata,*bmata; 1831 1832 PetscFunctionBegin; 1833 if (aij->size == 1) { 1834 PetscCall(MatNorm(aij->A,type,norm)); 1835 } else { 1836 PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 1837 PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 1838 if (type == NORM_FROBENIUS) { 1839 v = amata; 1840 for (i=0; i<amat->nz; i++) { 1841 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1842 } 1843 v = bmata; 1844 for (i=0; i<bmat->nz; i++) { 1845 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1846 } 1847 PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1848 *norm = PetscSqrtReal(*norm); 1849 PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 1850 } else if (type == NORM_1) { /* max column norm */ 1851 PetscReal *tmp,*tmp2; 1852 PetscInt *jj,*garray = aij->garray; 1853 PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 1854 PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 1855 *norm = 0.0; 1856 v = amata; jj = amat->j; 1857 for (j=0; j<amat->nz; j++) { 1858 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1859 } 1860 v = bmata; jj = bmat->j; 1861 for (j=0; j<bmat->nz; j++) { 1862 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1863 } 1864 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1865 for (j=0; j<mat->cmap->N; j++) { 1866 if (tmp2[j] > *norm) *norm = tmp2[j]; 1867 } 1868 PetscCall(PetscFree(tmp)); 1869 PetscCall(PetscFree(tmp2)); 1870 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1871 } else if (type == NORM_INFINITY) { /* max row norm */ 1872 PetscReal ntemp = 0.0; 1873 for (j=0; j<aij->A->rmap->n; j++) { 1874 v = amata + amat->i[j]; 1875 sum = 0.0; 1876 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1877 sum += PetscAbsScalar(*v); v++; 1878 } 1879 v = bmata + bmat->i[j]; 1880 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1881 sum += PetscAbsScalar(*v); v++; 1882 } 1883 if (sum > ntemp) ntemp = sum; 1884 } 1885 PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 1886 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1887 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1888 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 1889 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 1890 } 1891 PetscFunctionReturn(0); 1892 } 1893 1894 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1895 { 1896 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1897 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1898 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1899 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1900 Mat B,A_diag,*B_diag; 1901 const MatScalar *pbv,*bv; 1902 1903 PetscFunctionBegin; 1904 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1905 ai = Aloc->i; aj = Aloc->j; 1906 bi = Bloc->i; bj = Bloc->j; 1907 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1908 PetscInt *d_nnz,*g_nnz,*o_nnz; 1909 PetscSFNode *oloc; 1910 PETSC_UNUSED PetscSF sf; 1911 1912 PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 1913 /* compute d_nnz for preallocation */ 1914 PetscCall(PetscArrayzero(d_nnz,na)); 1915 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1916 /* compute local off-diagonal contributions */ 1917 PetscCall(PetscArrayzero(g_nnz,nb)); 1918 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1919 /* map those to global */ 1920 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1921 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 1922 PetscCall(PetscSFSetFromOptions(sf)); 1923 PetscCall(PetscArrayzero(o_nnz,na)); 1924 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1925 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1926 PetscCall(PetscSFDestroy(&sf)); 1927 1928 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1929 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1930 PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1931 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1932 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 1933 PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1934 } else { 1935 B = *matout; 1936 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1937 } 1938 1939 b = (Mat_MPIAIJ*)B->data; 1940 A_diag = a->A; 1941 B_diag = &b->A; 1942 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1943 A_diag_ncol = A_diag->cmap->N; 1944 B_diag_ilen = sub_B_diag->ilen; 1945 B_diag_i = sub_B_diag->i; 1946 1947 /* Set ilen for diagonal of B */ 1948 for (i=0; i<A_diag_ncol; i++) { 1949 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1950 } 1951 1952 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1953 very quickly (=without using MatSetValues), because all writes are local. */ 1954 PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1955 1956 /* copy over the B part */ 1957 PetscCall(PetscMalloc1(bi[mb],&cols)); 1958 PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1959 pbv = bv; 1960 row = A->rmap->rstart; 1961 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1962 cols_tmp = cols; 1963 for (i=0; i<mb; i++) { 1964 ncol = bi[i+1]-bi[i]; 1965 PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 1966 row++; 1967 pbv += ncol; cols_tmp += ncol; 1968 } 1969 PetscCall(PetscFree(cols)); 1970 PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1971 1972 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1973 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1974 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1975 *matout = B; 1976 } else { 1977 PetscCall(MatHeaderMerge(A,&B)); 1978 } 1979 PetscFunctionReturn(0); 1980 } 1981 1982 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1983 { 1984 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1985 Mat a = aij->A,b = aij->B; 1986 PetscInt s1,s2,s3; 1987 1988 PetscFunctionBegin; 1989 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1990 if (rr) { 1991 PetscCall(VecGetLocalSize(rr,&s1)); 1992 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1993 /* Overlap communication with computation. */ 1994 PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1995 } 1996 if (ll) { 1997 PetscCall(VecGetLocalSize(ll,&s1)); 1998 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1999 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 2000 } 2001 /* scale the diagonal block */ 2002 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 2003 2004 if (rr) { 2005 /* Do a scatter end and then right scale the off-diagonal block */ 2006 PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2007 PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 2008 } 2009 PetscFunctionReturn(0); 2010 } 2011 2012 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2013 { 2014 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2015 2016 PetscFunctionBegin; 2017 PetscCall(MatSetUnfactored(a->A)); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2022 { 2023 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2024 Mat a,b,c,d; 2025 PetscBool flg; 2026 2027 PetscFunctionBegin; 2028 a = matA->A; b = matA->B; 2029 c = matB->A; d = matB->B; 2030 2031 PetscCall(MatEqual(a,c,&flg)); 2032 if (flg) { 2033 PetscCall(MatEqual(b,d,&flg)); 2034 } 2035 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2036 PetscFunctionReturn(0); 2037 } 2038 2039 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2040 { 2041 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2042 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2043 2044 PetscFunctionBegin; 2045 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2046 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2047 /* because of the column compression in the off-processor part of the matrix a->B, 2048 the number of columns in a->B and b->B may be different, hence we cannot call 2049 the MatCopy() directly on the two parts. If need be, we can provide a more 2050 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2051 then copying the submatrices */ 2052 PetscCall(MatCopy_Basic(A,B,str)); 2053 } else { 2054 PetscCall(MatCopy(a->A,b->A,str)); 2055 PetscCall(MatCopy(a->B,b->B,str)); 2056 } 2057 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2058 PetscFunctionReturn(0); 2059 } 2060 2061 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2062 { 2063 PetscFunctionBegin; 2064 PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /* 2069 Computes the number of nonzeros per row needed for preallocation when X and Y 2070 have different nonzero structure. 2071 */ 2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2073 { 2074 PetscInt i,j,k,nzx,nzy; 2075 2076 PetscFunctionBegin; 2077 /* Set the number of nonzeros in the new matrix */ 2078 for (i=0; i<m; i++) { 2079 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2080 nzx = xi[i+1] - xi[i]; 2081 nzy = yi[i+1] - yi[i]; 2082 nnz[i] = 0; 2083 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2084 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2085 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2086 nnz[i]++; 2087 } 2088 for (; k<nzy; k++) nnz[i]++; 2089 } 2090 PetscFunctionReturn(0); 2091 } 2092 2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2095 { 2096 PetscInt m = Y->rmap->N; 2097 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2098 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2099 2100 PetscFunctionBegin; 2101 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2106 { 2107 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2108 2109 PetscFunctionBegin; 2110 if (str == SAME_NONZERO_PATTERN) { 2111 PetscCall(MatAXPY(yy->A,a,xx->A,str)); 2112 PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2113 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2114 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2115 } else { 2116 Mat B; 2117 PetscInt *nnz_d,*nnz_o; 2118 2119 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 2120 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 2121 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2122 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2123 PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 2124 PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 2125 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 2126 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 2127 PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 2128 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2129 PetscCall(MatHeaderMerge(Y,&B)); 2130 PetscCall(PetscFree(nnz_d)); 2131 PetscCall(PetscFree(nnz_o)); 2132 } 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2137 2138 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2139 { 2140 PetscFunctionBegin; 2141 if (PetscDefined(USE_COMPLEX)) { 2142 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2143 2144 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2145 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2146 } 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2151 { 2152 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2153 2154 PetscFunctionBegin; 2155 PetscCall(MatRealPart(a->A)); 2156 PetscCall(MatRealPart(a->B)); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2161 { 2162 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2163 2164 PetscFunctionBegin; 2165 PetscCall(MatImaginaryPart(a->A)); 2166 PetscCall(MatImaginaryPart(a->B)); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2171 { 2172 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2173 PetscInt i,*idxb = NULL,m = A->rmap->n; 2174 PetscScalar *va,*vv; 2175 Vec vB,vA; 2176 const PetscScalar *vb; 2177 2178 PetscFunctionBegin; 2179 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 2180 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2181 2182 PetscCall(VecGetArrayWrite(vA,&va)); 2183 if (idx) { 2184 for (i=0; i<m; i++) { 2185 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2186 } 2187 } 2188 2189 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 2190 PetscCall(PetscMalloc1(m,&idxb)); 2191 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2192 2193 PetscCall(VecGetArrayWrite(v,&vv)); 2194 PetscCall(VecGetArrayRead(vB,&vb)); 2195 for (i=0; i<m; i++) { 2196 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2197 vv[i] = vb[i]; 2198 if (idx) idx[i] = a->garray[idxb[i]]; 2199 } else { 2200 vv[i] = va[i]; 2201 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2202 idx[i] = a->garray[idxb[i]]; 2203 } 2204 } 2205 PetscCall(VecRestoreArrayWrite(vA,&vv)); 2206 PetscCall(VecRestoreArrayWrite(vA,&va)); 2207 PetscCall(VecRestoreArrayRead(vB,&vb)); 2208 PetscCall(PetscFree(idxb)); 2209 PetscCall(VecDestroy(&vA)); 2210 PetscCall(VecDestroy(&vB)); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2215 { 2216 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2217 PetscInt m = A->rmap->n,n = A->cmap->n; 2218 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2219 PetscInt *cmap = mat->garray; 2220 PetscInt *diagIdx, *offdiagIdx; 2221 Vec diagV, offdiagV; 2222 PetscScalar *a, *diagA, *offdiagA; 2223 const PetscScalar *ba,*bav; 2224 PetscInt r,j,col,ncols,*bi,*bj; 2225 Mat B = mat->B; 2226 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2227 2228 PetscFunctionBegin; 2229 /* When a process holds entire A and other processes have no entry */ 2230 if (A->cmap->N == n) { 2231 PetscCall(VecGetArrayWrite(v,&diagA)); 2232 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2233 PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 2234 PetscCall(VecDestroy(&diagV)); 2235 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2236 PetscFunctionReturn(0); 2237 } else if (n == 0) { 2238 if (m) { 2239 PetscCall(VecGetArrayWrite(v,&a)); 2240 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2241 PetscCall(VecRestoreArrayWrite(v,&a)); 2242 } 2243 PetscFunctionReturn(0); 2244 } 2245 2246 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2248 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2249 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2250 2251 /* Get offdiagIdx[] for implicit 0.0 */ 2252 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2253 ba = bav; 2254 bi = b->i; 2255 bj = b->j; 2256 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2257 for (r = 0; r < m; r++) { 2258 ncols = bi[r+1] - bi[r]; 2259 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2260 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2261 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2262 offdiagA[r] = 0.0; 2263 2264 /* Find first hole in the cmap */ 2265 for (j=0; j<ncols; j++) { 2266 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2267 if (col > j && j < cstart) { 2268 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2269 break; 2270 } else if (col > j + n && j >= cstart) { 2271 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2272 break; 2273 } 2274 } 2275 if (j == ncols && ncols < A->cmap->N - n) { 2276 /* a hole is outside compressed Bcols */ 2277 if (ncols == 0) { 2278 if (cstart) { 2279 offdiagIdx[r] = 0; 2280 } else offdiagIdx[r] = cend; 2281 } else { /* ncols > 0 */ 2282 offdiagIdx[r] = cmap[ncols-1] + 1; 2283 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2284 } 2285 } 2286 } 2287 2288 for (j=0; j<ncols; j++) { 2289 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2290 ba++; bj++; 2291 } 2292 } 2293 2294 PetscCall(VecGetArrayWrite(v, &a)); 2295 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2296 for (r = 0; r < m; ++r) { 2297 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2298 a[r] = diagA[r]; 2299 if (idx) idx[r] = cstart + diagIdx[r]; 2300 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2301 a[r] = diagA[r]; 2302 if (idx) { 2303 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2304 idx[r] = cstart + diagIdx[r]; 2305 } else idx[r] = offdiagIdx[r]; 2306 } 2307 } else { 2308 a[r] = offdiagA[r]; 2309 if (idx) idx[r] = offdiagIdx[r]; 2310 } 2311 } 2312 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2313 PetscCall(VecRestoreArrayWrite(v, &a)); 2314 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2315 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2316 PetscCall(VecDestroy(&diagV)); 2317 PetscCall(VecDestroy(&offdiagV)); 2318 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2323 { 2324 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2325 PetscInt m = A->rmap->n,n = A->cmap->n; 2326 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2327 PetscInt *cmap = mat->garray; 2328 PetscInt *diagIdx, *offdiagIdx; 2329 Vec diagV, offdiagV; 2330 PetscScalar *a, *diagA, *offdiagA; 2331 const PetscScalar *ba,*bav; 2332 PetscInt r,j,col,ncols,*bi,*bj; 2333 Mat B = mat->B; 2334 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2335 2336 PetscFunctionBegin; 2337 /* When a process holds entire A and other processes have no entry */ 2338 if (A->cmap->N == n) { 2339 PetscCall(VecGetArrayWrite(v,&diagA)); 2340 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2341 PetscCall(MatGetRowMin(mat->A,diagV,idx)); 2342 PetscCall(VecDestroy(&diagV)); 2343 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2344 PetscFunctionReturn(0); 2345 } else if (n == 0) { 2346 if (m) { 2347 PetscCall(VecGetArrayWrite(v,&a)); 2348 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2349 PetscCall(VecRestoreArrayWrite(v,&a)); 2350 } 2351 PetscFunctionReturn(0); 2352 } 2353 2354 PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 2355 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2356 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2357 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2358 2359 /* Get offdiagIdx[] for implicit 0.0 */ 2360 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2361 ba = bav; 2362 bi = b->i; 2363 bj = b->j; 2364 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2365 for (r = 0; r < m; r++) { 2366 ncols = bi[r+1] - bi[r]; 2367 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2368 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2369 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2370 offdiagA[r] = 0.0; 2371 2372 /* Find first hole in the cmap */ 2373 for (j=0; j<ncols; j++) { 2374 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2375 if (col > j && j < cstart) { 2376 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2377 break; 2378 } else if (col > j + n && j >= cstart) { 2379 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2380 break; 2381 } 2382 } 2383 if (j == ncols && ncols < A->cmap->N - n) { 2384 /* a hole is outside compressed Bcols */ 2385 if (ncols == 0) { 2386 if (cstart) { 2387 offdiagIdx[r] = 0; 2388 } else offdiagIdx[r] = cend; 2389 } else { /* ncols > 0 */ 2390 offdiagIdx[r] = cmap[ncols-1] + 1; 2391 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2392 } 2393 } 2394 } 2395 2396 for (j=0; j<ncols; j++) { 2397 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2398 ba++; bj++; 2399 } 2400 } 2401 2402 PetscCall(VecGetArrayWrite(v, &a)); 2403 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2404 for (r = 0; r < m; ++r) { 2405 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2406 a[r] = diagA[r]; 2407 if (idx) idx[r] = cstart + diagIdx[r]; 2408 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2409 a[r] = diagA[r]; 2410 if (idx) { 2411 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2412 idx[r] = cstart + diagIdx[r]; 2413 } else idx[r] = offdiagIdx[r]; 2414 } 2415 } else { 2416 a[r] = offdiagA[r]; 2417 if (idx) idx[r] = offdiagIdx[r]; 2418 } 2419 } 2420 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2421 PetscCall(VecRestoreArrayWrite(v, &a)); 2422 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2423 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2424 PetscCall(VecDestroy(&diagV)); 2425 PetscCall(VecDestroy(&offdiagV)); 2426 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2431 { 2432 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2433 PetscInt m = A->rmap->n,n = A->cmap->n; 2434 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2435 PetscInt *cmap = mat->garray; 2436 PetscInt *diagIdx, *offdiagIdx; 2437 Vec diagV, offdiagV; 2438 PetscScalar *a, *diagA, *offdiagA; 2439 const PetscScalar *ba,*bav; 2440 PetscInt r,j,col,ncols,*bi,*bj; 2441 Mat B = mat->B; 2442 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2443 2444 PetscFunctionBegin; 2445 /* When a process holds entire A and other processes have no entry */ 2446 if (A->cmap->N == n) { 2447 PetscCall(VecGetArrayWrite(v,&diagA)); 2448 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2449 PetscCall(MatGetRowMax(mat->A,diagV,idx)); 2450 PetscCall(VecDestroy(&diagV)); 2451 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2452 PetscFunctionReturn(0); 2453 } else if (n == 0) { 2454 if (m) { 2455 PetscCall(VecGetArrayWrite(v,&a)); 2456 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2457 PetscCall(VecRestoreArrayWrite(v,&a)); 2458 } 2459 PetscFunctionReturn(0); 2460 } 2461 2462 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2463 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2464 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2465 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2466 2467 /* Get offdiagIdx[] for implicit 0.0 */ 2468 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2469 ba = bav; 2470 bi = b->i; 2471 bj = b->j; 2472 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2473 for (r = 0; r < m; r++) { 2474 ncols = bi[r+1] - bi[r]; 2475 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2476 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2477 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2478 offdiagA[r] = 0.0; 2479 2480 /* Find first hole in the cmap */ 2481 for (j=0; j<ncols; j++) { 2482 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2483 if (col > j && j < cstart) { 2484 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2485 break; 2486 } else if (col > j + n && j >= cstart) { 2487 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2488 break; 2489 } 2490 } 2491 if (j == ncols && ncols < A->cmap->N - n) { 2492 /* a hole is outside compressed Bcols */ 2493 if (ncols == 0) { 2494 if (cstart) { 2495 offdiagIdx[r] = 0; 2496 } else offdiagIdx[r] = cend; 2497 } else { /* ncols > 0 */ 2498 offdiagIdx[r] = cmap[ncols-1] + 1; 2499 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2500 } 2501 } 2502 } 2503 2504 for (j=0; j<ncols; j++) { 2505 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2506 ba++; bj++; 2507 } 2508 } 2509 2510 PetscCall(VecGetArrayWrite(v, &a)); 2511 PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 2512 for (r = 0; r < m; ++r) { 2513 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2514 a[r] = diagA[r]; 2515 if (idx) idx[r] = cstart + diagIdx[r]; 2516 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2517 a[r] = diagA[r]; 2518 if (idx) { 2519 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2520 idx[r] = cstart + diagIdx[r]; 2521 } else idx[r] = offdiagIdx[r]; 2522 } 2523 } else { 2524 a[r] = offdiagA[r]; 2525 if (idx) idx[r] = offdiagIdx[r]; 2526 } 2527 } 2528 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2529 PetscCall(VecRestoreArrayWrite(v, &a)); 2530 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2531 PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 2532 PetscCall(VecDestroy(&diagV)); 2533 PetscCall(VecDestroy(&offdiagV)); 2534 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2539 { 2540 Mat *dummy; 2541 2542 PetscFunctionBegin; 2543 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2544 *newmat = *dummy; 2545 PetscCall(PetscFree(dummy)); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2550 { 2551 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2552 2553 PetscFunctionBegin; 2554 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2555 A->factorerrortype = a->A->factorerrortype; 2556 PetscFunctionReturn(0); 2557 } 2558 2559 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2560 { 2561 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2562 2563 PetscFunctionBegin; 2564 PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2565 PetscCall(MatSetRandom(aij->A,rctx)); 2566 if (x->assembled) { 2567 PetscCall(MatSetRandom(aij->B,rctx)); 2568 } else { 2569 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2570 } 2571 PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 2572 PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2577 { 2578 PetscFunctionBegin; 2579 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2580 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2581 PetscFunctionReturn(0); 2582 } 2583 2584 /*@ 2585 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2586 2587 Not collective 2588 2589 Input Parameter: 2590 . A - the matrix 2591 2592 Output Parameter: 2593 . nz - the number of nonzeros 2594 2595 Level: advanced 2596 2597 @*/ 2598 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A,PetscCount *nz) 2599 { 2600 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)A->data; 2601 Mat_SeqAIJ *aaij = (Mat_SeqAIJ*)maij->A->data, *baij = (Mat_SeqAIJ*)maij->B->data; 2602 2603 PetscFunctionBegin; 2604 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2605 PetscFunctionReturn(0); 2606 } 2607 2608 /*@ 2609 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2610 2611 Collective on Mat 2612 2613 Input Parameters: 2614 + A - the matrix 2615 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2616 2617 Level: advanced 2618 2619 @*/ 2620 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2621 { 2622 PetscFunctionBegin; 2623 PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2624 PetscFunctionReturn(0); 2625 } 2626 2627 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2628 { 2629 PetscBool sc = PETSC_FALSE,flg; 2630 2631 PetscFunctionBegin; 2632 PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options"); 2633 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2634 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2635 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2636 PetscOptionsHeadEnd(); 2637 PetscFunctionReturn(0); 2638 } 2639 2640 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2641 { 2642 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2643 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2644 2645 PetscFunctionBegin; 2646 if (!Y->preallocated) { 2647 PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2648 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2649 PetscInt nonew = aij->nonew; 2650 PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2651 aij->nonew = nonew; 2652 } 2653 PetscCall(MatShift_Basic(Y,a)); 2654 PetscFunctionReturn(0); 2655 } 2656 2657 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2658 { 2659 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2660 2661 PetscFunctionBegin; 2662 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2663 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2664 if (d) { 2665 PetscInt rstart; 2666 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2667 *d += rstart; 2668 2669 } 2670 PetscFunctionReturn(0); 2671 } 2672 2673 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2674 { 2675 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2676 2677 PetscFunctionBegin; 2678 PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2679 PetscFunctionReturn(0); 2680 } 2681 2682 /* -------------------------------------------------------------------*/ 2683 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2684 MatGetRow_MPIAIJ, 2685 MatRestoreRow_MPIAIJ, 2686 MatMult_MPIAIJ, 2687 /* 4*/ MatMultAdd_MPIAIJ, 2688 MatMultTranspose_MPIAIJ, 2689 MatMultTransposeAdd_MPIAIJ, 2690 NULL, 2691 NULL, 2692 NULL, 2693 /*10*/ NULL, 2694 NULL, 2695 NULL, 2696 MatSOR_MPIAIJ, 2697 MatTranspose_MPIAIJ, 2698 /*15*/ MatGetInfo_MPIAIJ, 2699 MatEqual_MPIAIJ, 2700 MatGetDiagonal_MPIAIJ, 2701 MatDiagonalScale_MPIAIJ, 2702 MatNorm_MPIAIJ, 2703 /*20*/ MatAssemblyBegin_MPIAIJ, 2704 MatAssemblyEnd_MPIAIJ, 2705 MatSetOption_MPIAIJ, 2706 MatZeroEntries_MPIAIJ, 2707 /*24*/ MatZeroRows_MPIAIJ, 2708 NULL, 2709 NULL, 2710 NULL, 2711 NULL, 2712 /*29*/ MatSetUp_MPIAIJ, 2713 NULL, 2714 NULL, 2715 MatGetDiagonalBlock_MPIAIJ, 2716 NULL, 2717 /*34*/ MatDuplicate_MPIAIJ, 2718 NULL, 2719 NULL, 2720 NULL, 2721 NULL, 2722 /*39*/ MatAXPY_MPIAIJ, 2723 MatCreateSubMatrices_MPIAIJ, 2724 MatIncreaseOverlap_MPIAIJ, 2725 MatGetValues_MPIAIJ, 2726 MatCopy_MPIAIJ, 2727 /*44*/ MatGetRowMax_MPIAIJ, 2728 MatScale_MPIAIJ, 2729 MatShift_MPIAIJ, 2730 MatDiagonalSet_MPIAIJ, 2731 MatZeroRowsColumns_MPIAIJ, 2732 /*49*/ MatSetRandom_MPIAIJ, 2733 MatGetRowIJ_MPIAIJ, 2734 MatRestoreRowIJ_MPIAIJ, 2735 NULL, 2736 NULL, 2737 /*54*/ MatFDColoringCreate_MPIXAIJ, 2738 NULL, 2739 MatSetUnfactored_MPIAIJ, 2740 MatPermute_MPIAIJ, 2741 NULL, 2742 /*59*/ MatCreateSubMatrix_MPIAIJ, 2743 MatDestroy_MPIAIJ, 2744 MatView_MPIAIJ, 2745 NULL, 2746 NULL, 2747 /*64*/ NULL, 2748 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2749 NULL, 2750 NULL, 2751 NULL, 2752 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2753 MatGetRowMinAbs_MPIAIJ, 2754 NULL, 2755 NULL, 2756 NULL, 2757 NULL, 2758 /*75*/ MatFDColoringApply_AIJ, 2759 MatSetFromOptions_MPIAIJ, 2760 NULL, 2761 NULL, 2762 MatFindZeroDiagonals_MPIAIJ, 2763 /*80*/ NULL, 2764 NULL, 2765 NULL, 2766 /*83*/ MatLoad_MPIAIJ, 2767 MatIsSymmetric_MPIAIJ, 2768 NULL, 2769 NULL, 2770 NULL, 2771 NULL, 2772 /*89*/ NULL, 2773 NULL, 2774 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2775 NULL, 2776 NULL, 2777 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2778 NULL, 2779 NULL, 2780 NULL, 2781 MatBindToCPU_MPIAIJ, 2782 /*99*/ MatProductSetFromOptions_MPIAIJ, 2783 NULL, 2784 NULL, 2785 MatConjugate_MPIAIJ, 2786 NULL, 2787 /*104*/MatSetValuesRow_MPIAIJ, 2788 MatRealPart_MPIAIJ, 2789 MatImaginaryPart_MPIAIJ, 2790 NULL, 2791 NULL, 2792 /*109*/NULL, 2793 NULL, 2794 MatGetRowMin_MPIAIJ, 2795 NULL, 2796 MatMissingDiagonal_MPIAIJ, 2797 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2798 NULL, 2799 MatGetGhosts_MPIAIJ, 2800 NULL, 2801 NULL, 2802 /*119*/MatMultDiagonalBlock_MPIAIJ, 2803 NULL, 2804 NULL, 2805 NULL, 2806 MatGetMultiProcBlock_MPIAIJ, 2807 /*124*/MatFindNonzeroRows_MPIAIJ, 2808 MatGetColumnReductions_MPIAIJ, 2809 MatInvertBlockDiagonal_MPIAIJ, 2810 MatInvertVariableBlockDiagonal_MPIAIJ, 2811 MatCreateSubMatricesMPI_MPIAIJ, 2812 /*129*/NULL, 2813 NULL, 2814 NULL, 2815 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2816 NULL, 2817 /*134*/NULL, 2818 NULL, 2819 NULL, 2820 NULL, 2821 NULL, 2822 /*139*/MatSetBlockSizes_MPIAIJ, 2823 NULL, 2824 NULL, 2825 MatFDColoringSetUp_MPIXAIJ, 2826 MatFindOffBlockDiagonalEntries_MPIAIJ, 2827 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2828 /*145*/NULL, 2829 NULL, 2830 NULL, 2831 MatCreateGraph_Simple_AIJ, 2832 MatFilter_AIJ 2833 }; 2834 2835 /* ----------------------------------------------------------------------------------------*/ 2836 2837 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2838 { 2839 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2840 2841 PetscFunctionBegin; 2842 PetscCall(MatStoreValues(aij->A)); 2843 PetscCall(MatStoreValues(aij->B)); 2844 PetscFunctionReturn(0); 2845 } 2846 2847 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2848 { 2849 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2850 2851 PetscFunctionBegin; 2852 PetscCall(MatRetrieveValues(aij->A)); 2853 PetscCall(MatRetrieveValues(aij->B)); 2854 PetscFunctionReturn(0); 2855 } 2856 2857 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2858 { 2859 Mat_MPIAIJ *b; 2860 PetscMPIInt size; 2861 2862 PetscFunctionBegin; 2863 PetscCall(PetscLayoutSetUp(B->rmap)); 2864 PetscCall(PetscLayoutSetUp(B->cmap)); 2865 b = (Mat_MPIAIJ*)B->data; 2866 2867 #if defined(PETSC_USE_CTABLE) 2868 PetscCall(PetscTableDestroy(&b->colmap)); 2869 #else 2870 PetscCall(PetscFree(b->colmap)); 2871 #endif 2872 PetscCall(PetscFree(b->garray)); 2873 PetscCall(VecDestroy(&b->lvec)); 2874 PetscCall(VecScatterDestroy(&b->Mvctx)); 2875 2876 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2877 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2878 PetscCall(MatDestroy(&b->B)); 2879 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2880 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2881 PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 2882 PetscCall(MatSetType(b->B,MATSEQAIJ)); 2883 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2884 2885 if (!B->preallocated) { 2886 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2887 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2888 PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 2889 PetscCall(MatSetType(b->A,MATSEQAIJ)); 2890 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2891 } 2892 2893 PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 2894 PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2895 B->preallocated = PETSC_TRUE; 2896 B->was_assembled = PETSC_FALSE; 2897 B->assembled = PETSC_FALSE; 2898 PetscFunctionReturn(0); 2899 } 2900 2901 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2902 { 2903 Mat_MPIAIJ *b; 2904 2905 PetscFunctionBegin; 2906 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2907 PetscCall(PetscLayoutSetUp(B->rmap)); 2908 PetscCall(PetscLayoutSetUp(B->cmap)); 2909 b = (Mat_MPIAIJ*)B->data; 2910 2911 #if defined(PETSC_USE_CTABLE) 2912 PetscCall(PetscTableDestroy(&b->colmap)); 2913 #else 2914 PetscCall(PetscFree(b->colmap)); 2915 #endif 2916 PetscCall(PetscFree(b->garray)); 2917 PetscCall(VecDestroy(&b->lvec)); 2918 PetscCall(VecScatterDestroy(&b->Mvctx)); 2919 2920 PetscCall(MatResetPreallocation(b->A)); 2921 PetscCall(MatResetPreallocation(b->B)); 2922 B->preallocated = PETSC_TRUE; 2923 B->was_assembled = PETSC_FALSE; 2924 B->assembled = PETSC_FALSE; 2925 PetscFunctionReturn(0); 2926 } 2927 2928 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2929 { 2930 Mat mat; 2931 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2932 2933 PetscFunctionBegin; 2934 *newmat = NULL; 2935 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2936 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2937 PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 2938 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2939 a = (Mat_MPIAIJ*)mat->data; 2940 2941 mat->factortype = matin->factortype; 2942 mat->assembled = matin->assembled; 2943 mat->insertmode = NOT_SET_VALUES; 2944 mat->preallocated = matin->preallocated; 2945 2946 a->size = oldmat->size; 2947 a->rank = oldmat->rank; 2948 a->donotstash = oldmat->donotstash; 2949 a->roworiented = oldmat->roworiented; 2950 a->rowindices = NULL; 2951 a->rowvalues = NULL; 2952 a->getrowactive = PETSC_FALSE; 2953 2954 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2955 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2956 2957 if (oldmat->colmap) { 2958 #if defined(PETSC_USE_CTABLE) 2959 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2960 #else 2961 PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 2962 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 2963 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2964 #endif 2965 } else a->colmap = NULL; 2966 if (oldmat->garray) { 2967 PetscInt len; 2968 len = oldmat->B->cmap->n; 2969 PetscCall(PetscMalloc1(len+1,&a->garray)); 2970 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2971 if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2972 } else a->garray = NULL; 2973 2974 /* It may happen MatDuplicate is called with a non-assembled matrix 2975 In fact, MatDuplicate only requires the matrix to be preallocated 2976 This may happen inside a DMCreateMatrix_Shell */ 2977 if (oldmat->lvec) { 2978 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2979 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2980 } 2981 if (oldmat->Mvctx) { 2982 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2983 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2984 } 2985 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2986 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2987 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2988 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2989 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2990 *newmat = mat; 2991 PetscFunctionReturn(0); 2992 } 2993 2994 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2995 { 2996 PetscBool isbinary, ishdf5; 2997 2998 PetscFunctionBegin; 2999 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3000 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3001 /* force binary viewer to load .info file if it has not yet done so */ 3002 PetscCall(PetscViewerSetUp(viewer)); 3003 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 3004 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 3005 if (isbinary) { 3006 PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 3007 } else if (ishdf5) { 3008 #if defined(PETSC_HAVE_HDF5) 3009 PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 3010 #else 3011 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3012 #endif 3013 } else { 3014 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3015 } 3016 PetscFunctionReturn(0); 3017 } 3018 3019 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3020 { 3021 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3022 PetscInt *rowidxs,*colidxs; 3023 PetscScalar *matvals; 3024 3025 PetscFunctionBegin; 3026 PetscCall(PetscViewerSetUp(viewer)); 3027 3028 /* read in matrix header */ 3029 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3030 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3031 M = header[1]; N = header[2]; nz = header[3]; 3032 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3033 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3034 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3035 3036 /* set block sizes from the viewer's .info file */ 3037 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3038 /* set global sizes if not set already */ 3039 if (mat->rmap->N < 0) mat->rmap->N = M; 3040 if (mat->cmap->N < 0) mat->cmap->N = N; 3041 PetscCall(PetscLayoutSetUp(mat->rmap)); 3042 PetscCall(PetscLayoutSetUp(mat->cmap)); 3043 3044 /* check if the matrix sizes are correct */ 3045 PetscCall(MatGetSize(mat,&rows,&cols)); 3046 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3047 3048 /* read in row lengths and build row indices */ 3049 PetscCall(MatGetLocalSize(mat,&m,NULL)); 3050 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3051 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3052 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3053 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3054 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3055 /* read in column indices and matrix values */ 3056 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3057 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3058 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3059 /* store matrix indices and values */ 3060 PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 3061 PetscCall(PetscFree(rowidxs)); 3062 PetscCall(PetscFree2(colidxs,matvals)); 3063 PetscFunctionReturn(0); 3064 } 3065 3066 /* Not scalable because of ISAllGather() unless getting all columns. */ 3067 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3068 { 3069 IS iscol_local; 3070 PetscBool isstride; 3071 PetscMPIInt lisstride=0,gisstride; 3072 3073 PetscFunctionBegin; 3074 /* check if we are grabbing all columns*/ 3075 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 3076 3077 if (isstride) { 3078 PetscInt start,len,mstart,mlen; 3079 PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 3080 PetscCall(ISGetLocalSize(iscol,&len)); 3081 PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3082 if (mstart == start && mlen-mstart == len) lisstride = 1; 3083 } 3084 3085 PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3086 if (gisstride) { 3087 PetscInt N; 3088 PetscCall(MatGetSize(mat,NULL,&N)); 3089 PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 3090 PetscCall(ISSetIdentity(iscol_local)); 3091 PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3092 } else { 3093 PetscInt cbs; 3094 PetscCall(ISGetBlockSize(iscol,&cbs)); 3095 PetscCall(ISAllGather(iscol,&iscol_local)); 3096 PetscCall(ISSetBlockSize(iscol_local,cbs)); 3097 } 3098 3099 *isseq = iscol_local; 3100 PetscFunctionReturn(0); 3101 } 3102 3103 /* 3104 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3105 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3106 3107 Input Parameters: 3108 mat - matrix 3109 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3110 i.e., mat->rstart <= isrow[i] < mat->rend 3111 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3112 i.e., mat->cstart <= iscol[i] < mat->cend 3113 Output Parameter: 3114 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3115 iscol_o - sequential column index set for retrieving mat->B 3116 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3117 */ 3118 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3119 { 3120 Vec x,cmap; 3121 const PetscInt *is_idx; 3122 PetscScalar *xarray,*cmaparray; 3123 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3124 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3125 Mat B=a->B; 3126 Vec lvec=a->lvec,lcmap; 3127 PetscInt i,cstart,cend,Bn=B->cmap->N; 3128 MPI_Comm comm; 3129 VecScatter Mvctx=a->Mvctx; 3130 3131 PetscFunctionBegin; 3132 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3133 PetscCall(ISGetLocalSize(iscol,&ncols)); 3134 3135 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3136 PetscCall(MatCreateVecs(mat,&x,NULL)); 3137 PetscCall(VecSet(x,-1.0)); 3138 PetscCall(VecDuplicate(x,&cmap)); 3139 PetscCall(VecSet(cmap,-1.0)); 3140 3141 /* Get start indices */ 3142 PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3143 isstart -= ncols; 3144 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3145 3146 PetscCall(ISGetIndices(iscol,&is_idx)); 3147 PetscCall(VecGetArray(x,&xarray)); 3148 PetscCall(VecGetArray(cmap,&cmaparray)); 3149 PetscCall(PetscMalloc1(ncols,&idx)); 3150 for (i=0; i<ncols; i++) { 3151 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3152 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3153 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3154 } 3155 PetscCall(VecRestoreArray(x,&xarray)); 3156 PetscCall(VecRestoreArray(cmap,&cmaparray)); 3157 PetscCall(ISRestoreIndices(iscol,&is_idx)); 3158 3159 /* Get iscol_d */ 3160 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 3161 PetscCall(ISGetBlockSize(iscol,&i)); 3162 PetscCall(ISSetBlockSize(*iscol_d,i)); 3163 3164 /* Get isrow_d */ 3165 PetscCall(ISGetLocalSize(isrow,&m)); 3166 rstart = mat->rmap->rstart; 3167 PetscCall(PetscMalloc1(m,&idx)); 3168 PetscCall(ISGetIndices(isrow,&is_idx)); 3169 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3170 PetscCall(ISRestoreIndices(isrow,&is_idx)); 3171 3172 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 3173 PetscCall(ISGetBlockSize(isrow,&i)); 3174 PetscCall(ISSetBlockSize(*isrow_d,i)); 3175 3176 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3177 PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3178 PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3179 3180 PetscCall(VecDuplicate(lvec,&lcmap)); 3181 3182 PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3183 PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3184 3185 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3186 /* off-process column indices */ 3187 count = 0; 3188 PetscCall(PetscMalloc1(Bn,&idx)); 3189 PetscCall(PetscMalloc1(Bn,&cmap1)); 3190 3191 PetscCall(VecGetArray(lvec,&xarray)); 3192 PetscCall(VecGetArray(lcmap,&cmaparray)); 3193 for (i=0; i<Bn; i++) { 3194 if (PetscRealPart(xarray[i]) > -1.0) { 3195 idx[count] = i; /* local column index in off-diagonal part B */ 3196 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3197 count++; 3198 } 3199 } 3200 PetscCall(VecRestoreArray(lvec,&xarray)); 3201 PetscCall(VecRestoreArray(lcmap,&cmaparray)); 3202 3203 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3204 /* cannot ensure iscol_o has same blocksize as iscol! */ 3205 3206 PetscCall(PetscFree(idx)); 3207 *garray = cmap1; 3208 3209 PetscCall(VecDestroy(&x)); 3210 PetscCall(VecDestroy(&cmap)); 3211 PetscCall(VecDestroy(&lcmap)); 3212 PetscFunctionReturn(0); 3213 } 3214 3215 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3216 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3217 { 3218 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3219 Mat M = NULL; 3220 MPI_Comm comm; 3221 IS iscol_d,isrow_d,iscol_o; 3222 Mat Asub = NULL,Bsub = NULL; 3223 PetscInt n; 3224 3225 PetscFunctionBegin; 3226 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3227 3228 if (call == MAT_REUSE_MATRIX) { 3229 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3230 PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 3231 PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3232 3233 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 3234 PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3235 3236 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 3237 PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3238 3239 /* Update diagonal and off-diagonal portions of submat */ 3240 asub = (Mat_MPIAIJ*)(*submat)->data; 3241 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 3242 PetscCall(ISGetLocalSize(iscol_o,&n)); 3243 if (n) { 3244 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 3245 } 3246 PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 3247 PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 3248 3249 } else { /* call == MAT_INITIAL_MATRIX) */ 3250 const PetscInt *garray; 3251 PetscInt BsubN; 3252 3253 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3254 PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 3255 3256 /* Create local submatrices Asub and Bsub */ 3257 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 3258 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 3259 3260 /* Create submatrix M */ 3261 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 3262 3263 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3264 asub = (Mat_MPIAIJ*)M->data; 3265 3266 PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3267 n = asub->B->cmap->N; 3268 if (BsubN > n) { 3269 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3270 const PetscInt *idx; 3271 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3272 PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 3273 3274 PetscCall(PetscMalloc1(n,&idx_new)); 3275 j = 0; 3276 PetscCall(ISGetIndices(iscol_o,&idx)); 3277 for (i=0; i<n; i++) { 3278 if (j >= BsubN) break; 3279 while (subgarray[i] > garray[j]) j++; 3280 3281 if (subgarray[i] == garray[j]) { 3282 idx_new[i] = idx[j++]; 3283 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3284 } 3285 PetscCall(ISRestoreIndices(iscol_o,&idx)); 3286 3287 PetscCall(ISDestroy(&iscol_o)); 3288 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 3289 3290 } else if (BsubN < n) { 3291 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3292 } 3293 3294 PetscCall(PetscFree(garray)); 3295 *submat = M; 3296 3297 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3298 PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 3299 PetscCall(ISDestroy(&isrow_d)); 3300 3301 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 3302 PetscCall(ISDestroy(&iscol_d)); 3303 3304 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 3305 PetscCall(ISDestroy(&iscol_o)); 3306 } 3307 PetscFunctionReturn(0); 3308 } 3309 3310 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3311 { 3312 IS iscol_local=NULL,isrow_d; 3313 PetscInt csize; 3314 PetscInt n,i,j,start,end; 3315 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3316 MPI_Comm comm; 3317 3318 PetscFunctionBegin; 3319 /* If isrow has same processor distribution as mat, 3320 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3321 if (call == MAT_REUSE_MATRIX) { 3322 PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3323 if (isrow_d) { 3324 sameRowDist = PETSC_TRUE; 3325 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3326 } else { 3327 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3328 if (iscol_local) { 3329 sameRowDist = PETSC_TRUE; 3330 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3331 } 3332 } 3333 } else { 3334 /* Check if isrow has same processor distribution as mat */ 3335 sameDist[0] = PETSC_FALSE; 3336 PetscCall(ISGetLocalSize(isrow,&n)); 3337 if (!n) { 3338 sameDist[0] = PETSC_TRUE; 3339 } else { 3340 PetscCall(ISGetMinMax(isrow,&i,&j)); 3341 PetscCall(MatGetOwnershipRange(mat,&start,&end)); 3342 if (i >= start && j < end) { 3343 sameDist[0] = PETSC_TRUE; 3344 } 3345 } 3346 3347 /* Check if iscol has same processor distribution as mat */ 3348 sameDist[1] = PETSC_FALSE; 3349 PetscCall(ISGetLocalSize(iscol,&n)); 3350 if (!n) { 3351 sameDist[1] = PETSC_TRUE; 3352 } else { 3353 PetscCall(ISGetMinMax(iscol,&i,&j)); 3354 PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 3355 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3356 } 3357 3358 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3359 PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 3360 sameRowDist = tsameDist[0]; 3361 } 3362 3363 if (sameRowDist) { 3364 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3365 /* isrow and iscol have same processor distribution as mat */ 3366 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 3367 PetscFunctionReturn(0); 3368 } else { /* sameRowDist */ 3369 /* isrow has same processor distribution as mat */ 3370 if (call == MAT_INITIAL_MATRIX) { 3371 PetscBool sorted; 3372 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3373 PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 3374 PetscCall(ISGetSize(iscol,&i)); 3375 PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3376 3377 PetscCall(ISSorted(iscol_local,&sorted)); 3378 if (sorted) { 3379 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3380 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 3381 PetscFunctionReturn(0); 3382 } 3383 } else { /* call == MAT_REUSE_MATRIX */ 3384 IS iscol_sub; 3385 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3386 if (iscol_sub) { 3387 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 3388 PetscFunctionReturn(0); 3389 } 3390 } 3391 } 3392 } 3393 3394 /* General case: iscol -> iscol_local which has global size of iscol */ 3395 if (call == MAT_REUSE_MATRIX) { 3396 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 3397 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3398 } else { 3399 if (!iscol_local) { 3400 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3401 } 3402 } 3403 3404 PetscCall(ISGetLocalSize(iscol,&csize)); 3405 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 3406 3407 if (call == MAT_INITIAL_MATRIX) { 3408 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 3409 PetscCall(ISDestroy(&iscol_local)); 3410 } 3411 PetscFunctionReturn(0); 3412 } 3413 3414 /*@C 3415 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3416 and "off-diagonal" part of the matrix in CSR format. 3417 3418 Collective 3419 3420 Input Parameters: 3421 + comm - MPI communicator 3422 . A - "diagonal" portion of matrix 3423 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3424 - garray - global index of B columns 3425 3426 Output Parameter: 3427 . mat - the matrix, with input A as its local diagonal matrix 3428 Level: advanced 3429 3430 Notes: 3431 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3432 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3433 3434 .seealso: `MatCreateMPIAIJWithSplitArrays()` 3435 @*/ 3436 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3437 { 3438 Mat_MPIAIJ *maij; 3439 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3440 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3441 const PetscScalar *oa; 3442 Mat Bnew; 3443 PetscInt m,n,N; 3444 MatType mpi_mat_type; 3445 3446 PetscFunctionBegin; 3447 PetscCall(MatCreate(comm,mat)); 3448 PetscCall(MatGetSize(A,&m,&n)); 3449 PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3450 PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3451 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3452 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3453 3454 /* Get global columns of mat */ 3455 PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3456 3457 PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 3458 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3459 PetscCall(MatGetMPIMatType_Private(A,&mpi_mat_type)); 3460 PetscCall(MatSetType(*mat,mpi_mat_type)); 3461 3462 PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3463 maij = (Mat_MPIAIJ*)(*mat)->data; 3464 3465 (*mat)->preallocated = PETSC_TRUE; 3466 3467 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3468 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3469 3470 /* Set A as diagonal portion of *mat */ 3471 maij->A = A; 3472 3473 nz = oi[m]; 3474 for (i=0; i<nz; i++) { 3475 col = oj[i]; 3476 oj[i] = garray[col]; 3477 } 3478 3479 /* Set Bnew as off-diagonal portion of *mat */ 3480 PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 3481 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 3482 PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3483 bnew = (Mat_SeqAIJ*)Bnew->data; 3484 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3485 maij->B = Bnew; 3486 3487 PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3488 3489 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3490 b->free_a = PETSC_FALSE; 3491 b->free_ij = PETSC_FALSE; 3492 PetscCall(MatDestroy(&B)); 3493 3494 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3495 bnew->free_a = PETSC_TRUE; 3496 bnew->free_ij = PETSC_TRUE; 3497 3498 /* condense columns of maij->B */ 3499 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3500 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3501 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3502 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 3503 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3504 PetscFunctionReturn(0); 3505 } 3506 3507 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3508 3509 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3510 { 3511 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3512 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3513 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3514 Mat M,Msub,B=a->B; 3515 MatScalar *aa; 3516 Mat_SeqAIJ *aij; 3517 PetscInt *garray = a->garray,*colsub,Ncols; 3518 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3519 IS iscol_sub,iscmap; 3520 const PetscInt *is_idx,*cmap; 3521 PetscBool allcolumns=PETSC_FALSE; 3522 MPI_Comm comm; 3523 3524 PetscFunctionBegin; 3525 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3526 if (call == MAT_REUSE_MATRIX) { 3527 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3528 PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3529 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3530 3531 PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 3532 PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3533 3534 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 3535 PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3536 3537 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3538 3539 } else { /* call == MAT_INITIAL_MATRIX) */ 3540 PetscBool flg; 3541 3542 PetscCall(ISGetLocalSize(iscol,&n)); 3543 PetscCall(ISGetSize(iscol,&Ncols)); 3544 3545 /* (1) iscol -> nonscalable iscol_local */ 3546 /* Check for special case: each processor gets entire matrix columns */ 3547 PetscCall(ISIdentity(iscol_local,&flg)); 3548 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3549 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3550 if (allcolumns) { 3551 iscol_sub = iscol_local; 3552 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3553 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3554 3555 } else { 3556 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3557 PetscInt *idx,*cmap1,k; 3558 PetscCall(PetscMalloc1(Ncols,&idx)); 3559 PetscCall(PetscMalloc1(Ncols,&cmap1)); 3560 PetscCall(ISGetIndices(iscol_local,&is_idx)); 3561 count = 0; 3562 k = 0; 3563 for (i=0; i<Ncols; i++) { 3564 j = is_idx[i]; 3565 if (j >= cstart && j < cend) { 3566 /* diagonal part of mat */ 3567 idx[count] = j; 3568 cmap1[count++] = i; /* column index in submat */ 3569 } else if (Bn) { 3570 /* off-diagonal part of mat */ 3571 if (j == garray[k]) { 3572 idx[count] = j; 3573 cmap1[count++] = i; /* column index in submat */ 3574 } else if (j > garray[k]) { 3575 while (j > garray[k] && k < Bn-1) k++; 3576 if (j == garray[k]) { 3577 idx[count] = j; 3578 cmap1[count++] = i; /* column index in submat */ 3579 } 3580 } 3581 } 3582 } 3583 PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 3584 3585 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 3586 PetscCall(ISGetBlockSize(iscol,&cbs)); 3587 PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3588 3589 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3590 } 3591 3592 /* (3) Create sequential Msub */ 3593 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3594 } 3595 3596 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3597 aij = (Mat_SeqAIJ*)(Msub)->data; 3598 ii = aij->i; 3599 PetscCall(ISGetIndices(iscmap,&cmap)); 3600 3601 /* 3602 m - number of local rows 3603 Ncols - number of columns (same on all processors) 3604 rstart - first row in new global matrix generated 3605 */ 3606 PetscCall(MatGetSize(Msub,&m,NULL)); 3607 3608 if (call == MAT_INITIAL_MATRIX) { 3609 /* (4) Create parallel newmat */ 3610 PetscMPIInt rank,size; 3611 PetscInt csize; 3612 3613 PetscCallMPI(MPI_Comm_size(comm,&size)); 3614 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3615 3616 /* 3617 Determine the number of non-zeros in the diagonal and off-diagonal 3618 portions of the matrix in order to do correct preallocation 3619 */ 3620 3621 /* first get start and end of "diagonal" columns */ 3622 PetscCall(ISGetLocalSize(iscol,&csize)); 3623 if (csize == PETSC_DECIDE) { 3624 PetscCall(ISGetSize(isrow,&mglobal)); 3625 if (mglobal == Ncols) { /* square matrix */ 3626 nlocal = m; 3627 } else { 3628 nlocal = Ncols/size + ((Ncols % size) > rank); 3629 } 3630 } else { 3631 nlocal = csize; 3632 } 3633 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3634 rstart = rend - nlocal; 3635 PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3636 3637 /* next, compute all the lengths */ 3638 jj = aij->j; 3639 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3640 olens = dlens + m; 3641 for (i=0; i<m; i++) { 3642 jend = ii[i+1] - ii[i]; 3643 olen = 0; 3644 dlen = 0; 3645 for (j=0; j<jend; j++) { 3646 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3647 else dlen++; 3648 jj++; 3649 } 3650 olens[i] = olen; 3651 dlens[i] = dlen; 3652 } 3653 3654 PetscCall(ISGetBlockSize(isrow,&bs)); 3655 PetscCall(ISGetBlockSize(iscol,&cbs)); 3656 3657 PetscCall(MatCreate(comm,&M)); 3658 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 3659 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3660 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3661 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3662 PetscCall(PetscFree(dlens)); 3663 3664 } else { /* call == MAT_REUSE_MATRIX */ 3665 M = *newmat; 3666 PetscCall(MatGetLocalSize(M,&i,NULL)); 3667 PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3668 PetscCall(MatZeroEntries(M)); 3669 /* 3670 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3671 rather than the slower MatSetValues(). 3672 */ 3673 M->was_assembled = PETSC_TRUE; 3674 M->assembled = PETSC_FALSE; 3675 } 3676 3677 /* (5) Set values of Msub to *newmat */ 3678 PetscCall(PetscMalloc1(count,&colsub)); 3679 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 3680 3681 jj = aij->j; 3682 PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3683 for (i=0; i<m; i++) { 3684 row = rstart + i; 3685 nz = ii[i+1] - ii[i]; 3686 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3687 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 3688 jj += nz; aa += nz; 3689 } 3690 PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 3691 PetscCall(ISRestoreIndices(iscmap,&cmap)); 3692 3693 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3694 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3695 3696 PetscCall(PetscFree(colsub)); 3697 3698 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3699 if (call == MAT_INITIAL_MATRIX) { 3700 *newmat = M; 3701 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 3702 PetscCall(MatDestroy(&Msub)); 3703 3704 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 3705 PetscCall(ISDestroy(&iscol_sub)); 3706 3707 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 3708 PetscCall(ISDestroy(&iscmap)); 3709 3710 if (iscol_local) { 3711 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 3712 PetscCall(ISDestroy(&iscol_local)); 3713 } 3714 } 3715 PetscFunctionReturn(0); 3716 } 3717 3718 /* 3719 Not great since it makes two copies of the submatrix, first an SeqAIJ 3720 in local and then by concatenating the local matrices the end result. 3721 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3722 3723 Note: This requires a sequential iscol with all indices. 3724 */ 3725 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3726 { 3727 PetscMPIInt rank,size; 3728 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3729 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3730 Mat M,Mreuse; 3731 MatScalar *aa,*vwork; 3732 MPI_Comm comm; 3733 Mat_SeqAIJ *aij; 3734 PetscBool colflag,allcolumns=PETSC_FALSE; 3735 3736 PetscFunctionBegin; 3737 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3738 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3739 PetscCallMPI(MPI_Comm_size(comm,&size)); 3740 3741 /* Check for special case: each processor gets entire matrix columns */ 3742 PetscCall(ISIdentity(iscol,&colflag)); 3743 PetscCall(ISGetLocalSize(iscol,&n)); 3744 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3745 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3746 3747 if (call == MAT_REUSE_MATRIX) { 3748 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 3749 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3750 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3751 } else { 3752 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3753 } 3754 3755 /* 3756 m - number of local rows 3757 n - number of columns (same on all processors) 3758 rstart - first row in new global matrix generated 3759 */ 3760 PetscCall(MatGetSize(Mreuse,&m,&n)); 3761 PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3762 if (call == MAT_INITIAL_MATRIX) { 3763 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3764 ii = aij->i; 3765 jj = aij->j; 3766 3767 /* 3768 Determine the number of non-zeros in the diagonal and off-diagonal 3769 portions of the matrix in order to do correct preallocation 3770 */ 3771 3772 /* first get start and end of "diagonal" columns */ 3773 if (csize == PETSC_DECIDE) { 3774 PetscCall(ISGetSize(isrow,&mglobal)); 3775 if (mglobal == n) { /* square matrix */ 3776 nlocal = m; 3777 } else { 3778 nlocal = n/size + ((n % size) > rank); 3779 } 3780 } else { 3781 nlocal = csize; 3782 } 3783 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3784 rstart = rend - nlocal; 3785 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3786 3787 /* next, compute all the lengths */ 3788 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3789 olens = dlens + m; 3790 for (i=0; i<m; i++) { 3791 jend = ii[i+1] - ii[i]; 3792 olen = 0; 3793 dlen = 0; 3794 for (j=0; j<jend; j++) { 3795 if (*jj < rstart || *jj >= rend) olen++; 3796 else dlen++; 3797 jj++; 3798 } 3799 olens[i] = olen; 3800 dlens[i] = dlen; 3801 } 3802 PetscCall(MatCreate(comm,&M)); 3803 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 3804 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3805 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3806 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3807 PetscCall(PetscFree(dlens)); 3808 } else { 3809 PetscInt ml,nl; 3810 3811 M = *newmat; 3812 PetscCall(MatGetLocalSize(M,&ml,&nl)); 3813 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3814 PetscCall(MatZeroEntries(M)); 3815 /* 3816 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3817 rather than the slower MatSetValues(). 3818 */ 3819 M->was_assembled = PETSC_TRUE; 3820 M->assembled = PETSC_FALSE; 3821 } 3822 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3823 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3824 ii = aij->i; 3825 jj = aij->j; 3826 3827 /* trigger copy to CPU if needed */ 3828 PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3829 for (i=0; i<m; i++) { 3830 row = rstart + i; 3831 nz = ii[i+1] - ii[i]; 3832 cwork = jj; jj += nz; 3833 vwork = aa; aa += nz; 3834 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3835 } 3836 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3837 3838 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3839 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3840 *newmat = M; 3841 3842 /* save submatrix used in processor for next request */ 3843 if (call == MAT_INITIAL_MATRIX) { 3844 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 3845 PetscCall(MatDestroy(&Mreuse)); 3846 } 3847 PetscFunctionReturn(0); 3848 } 3849 3850 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3851 { 3852 PetscInt m,cstart, cend,j,nnz,i,d,*ld; 3853 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3854 const PetscInt *JJ; 3855 PetscBool nooffprocentries; 3856 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)B->data; 3857 3858 PetscFunctionBegin; 3859 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3860 3861 PetscCall(PetscLayoutSetUp(B->rmap)); 3862 PetscCall(PetscLayoutSetUp(B->cmap)); 3863 m = B->rmap->n; 3864 cstart = B->cmap->rstart; 3865 cend = B->cmap->rend; 3866 rstart = B->rmap->rstart; 3867 3868 PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3869 3870 if (PetscDefined(USE_DEBUG)) { 3871 for (i=0; i<m; i++) { 3872 nnz = Ii[i+1]- Ii[i]; 3873 JJ = J + Ii[i]; 3874 PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3875 PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3876 PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3877 } 3878 } 3879 3880 for (i=0; i<m; i++) { 3881 nnz = Ii[i+1]- Ii[i]; 3882 JJ = J + Ii[i]; 3883 nnz_max = PetscMax(nnz_max,nnz); 3884 d = 0; 3885 for (j=0; j<nnz; j++) { 3886 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3887 } 3888 d_nnz[i] = d; 3889 o_nnz[i] = nnz - d; 3890 } 3891 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 3892 PetscCall(PetscFree2(d_nnz,o_nnz)); 3893 3894 for (i=0; i<m; i++) { 3895 ii = i + rstart; 3896 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3897 } 3898 nooffprocentries = B->nooffprocentries; 3899 B->nooffprocentries = PETSC_TRUE; 3900 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3901 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3902 B->nooffprocentries = nooffprocentries; 3903 3904 /* count number of entries below block diagonal */ 3905 PetscCall(PetscFree(Aij->ld)); 3906 PetscCall(PetscCalloc1(m,&ld)); 3907 Aij->ld = ld; 3908 for (i=0; i<m; i++) { 3909 nnz = Ii[i+1] - Ii[i]; 3910 j = 0; 3911 while (j < nnz && J[j] < cstart) {j++;} 3912 ld[i] = j; 3913 J += nnz; 3914 } 3915 3916 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3917 PetscFunctionReturn(0); 3918 } 3919 3920 /*@ 3921 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3922 (the default parallel PETSc format). 3923 3924 Collective 3925 3926 Input Parameters: 3927 + B - the matrix 3928 . i - the indices into j for the start of each local row (starts with zero) 3929 . j - the column indices for each local row (starts with zero) 3930 - v - optional values in the matrix 3931 3932 Level: developer 3933 3934 Notes: 3935 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3936 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3937 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3938 3939 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3940 3941 The format which is used for the sparse matrix input, is equivalent to a 3942 row-major ordering.. i.e for the following matrix, the input data expected is 3943 as shown 3944 3945 $ 1 0 0 3946 $ 2 0 3 P0 3947 $ ------- 3948 $ 4 5 6 P1 3949 $ 3950 $ Process0 [P0]: rows_owned=[0,1] 3951 $ i = {0,1,3} [size = nrow+1 = 2+1] 3952 $ j = {0,0,2} [size = 3] 3953 $ v = {1,2,3} [size = 3] 3954 $ 3955 $ Process1 [P1]: rows_owned=[2] 3956 $ i = {0,3} [size = nrow+1 = 1+1] 3957 $ j = {0,1,2} [size = 3] 3958 $ v = {4,5,6} [size = 3] 3959 3960 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3961 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3962 @*/ 3963 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3964 { 3965 PetscFunctionBegin; 3966 PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3967 PetscFunctionReturn(0); 3968 } 3969 3970 /*@C 3971 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3972 (the default parallel PETSc format). For good matrix assembly performance 3973 the user should preallocate the matrix storage by setting the parameters 3974 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3975 performance can be increased by more than a factor of 50. 3976 3977 Collective 3978 3979 Input Parameters: 3980 + B - the matrix 3981 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3982 (same value is used for all local rows) 3983 . d_nnz - array containing the number of nonzeros in the various rows of the 3984 DIAGONAL portion of the local submatrix (possibly different for each row) 3985 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3986 The size of this array is equal to the number of local rows, i.e 'm'. 3987 For matrices that will be factored, you must leave room for (and set) 3988 the diagonal entry even if it is zero. 3989 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3990 submatrix (same value is used for all local rows). 3991 - o_nnz - array containing the number of nonzeros in the various rows of the 3992 OFF-DIAGONAL portion of the local submatrix (possibly different for 3993 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3994 structure. The size of this array is equal to the number 3995 of local rows, i.e 'm'. 3996 3997 If the *_nnz parameter is given then the *_nz parameter is ignored 3998 3999 The AIJ format (also called the Yale sparse matrix format or 4000 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4001 storage. The stored row and column indices begin with zero. 4002 See Users-Manual: ch_mat for details. 4003 4004 The parallel matrix is partitioned such that the first m0 rows belong to 4005 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4006 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4007 4008 The DIAGONAL portion of the local submatrix of a processor can be defined 4009 as the submatrix which is obtained by extraction the part corresponding to 4010 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4011 first row that belongs to the processor, r2 is the last row belonging to 4012 the this processor, and c1-c2 is range of indices of the local part of a 4013 vector suitable for applying the matrix to. This is an mxn matrix. In the 4014 common case of a square matrix, the row and column ranges are the same and 4015 the DIAGONAL part is also square. The remaining portion of the local 4016 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4017 4018 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4019 4020 You can call MatGetInfo() to get information on how effective the preallocation was; 4021 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4022 You can also run with the option -info and look for messages with the string 4023 malloc in them to see if additional memory allocation was needed. 4024 4025 Example usage: 4026 4027 Consider the following 8x8 matrix with 34 non-zero values, that is 4028 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4029 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4030 as follows: 4031 4032 .vb 4033 1 2 0 | 0 3 0 | 0 4 4034 Proc0 0 5 6 | 7 0 0 | 8 0 4035 9 0 10 | 11 0 0 | 12 0 4036 ------------------------------------- 4037 13 0 14 | 15 16 17 | 0 0 4038 Proc1 0 18 0 | 19 20 21 | 0 0 4039 0 0 0 | 22 23 0 | 24 0 4040 ------------------------------------- 4041 Proc2 25 26 27 | 0 0 28 | 29 0 4042 30 0 0 | 31 32 33 | 0 34 4043 .ve 4044 4045 This can be represented as a collection of submatrices as: 4046 4047 .vb 4048 A B C 4049 D E F 4050 G H I 4051 .ve 4052 4053 Where the submatrices A,B,C are owned by proc0, D,E,F are 4054 owned by proc1, G,H,I are owned by proc2. 4055 4056 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4057 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4058 The 'M','N' parameters are 8,8, and have the same values on all procs. 4059 4060 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4061 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4062 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4063 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4064 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4065 matrix, ans [DF] as another SeqAIJ matrix. 4066 4067 When d_nz, o_nz parameters are specified, d_nz storage elements are 4068 allocated for every row of the local diagonal submatrix, and o_nz 4069 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4070 One way to choose d_nz and o_nz is to use the max nonzerors per local 4071 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4072 In this case, the values of d_nz,o_nz are: 4073 .vb 4074 proc0 : dnz = 2, o_nz = 2 4075 proc1 : dnz = 3, o_nz = 2 4076 proc2 : dnz = 1, o_nz = 4 4077 .ve 4078 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4079 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4080 for proc3. i.e we are using 12+15+10=37 storage locations to store 4081 34 values. 4082 4083 When d_nnz, o_nnz parameters are specified, the storage is specified 4084 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4085 In the above case the values for d_nnz,o_nnz are: 4086 .vb 4087 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4088 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4089 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4090 .ve 4091 Here the space allocated is sum of all the above values i.e 34, and 4092 hence pre-allocation is perfect. 4093 4094 Level: intermediate 4095 4096 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4097 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4098 @*/ 4099 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4100 { 4101 PetscFunctionBegin; 4102 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4103 PetscValidType(B,1); 4104 PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4105 PetscFunctionReturn(0); 4106 } 4107 4108 /*@ 4109 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4110 CSR format for the local rows. 4111 4112 Collective 4113 4114 Input Parameters: 4115 + comm - MPI communicator 4116 . m - number of local rows (Cannot be PETSC_DECIDE) 4117 . n - This value should be the same as the local size used in creating the 4118 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4119 calculated if N is given) For square matrices n is almost always m. 4120 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4121 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4122 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4123 . j - column indices 4124 - a - optional matrix values 4125 4126 Output Parameter: 4127 . mat - the matrix 4128 4129 Level: intermediate 4130 4131 Notes: 4132 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4133 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4134 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4135 4136 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4137 4138 The format which is used for the sparse matrix input, is equivalent to a 4139 row-major ordering.. i.e for the following matrix, the input data expected is 4140 as shown 4141 4142 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4143 4144 $ 1 0 0 4145 $ 2 0 3 P0 4146 $ ------- 4147 $ 4 5 6 P1 4148 $ 4149 $ Process0 [P0]: rows_owned=[0,1] 4150 $ i = {0,1,3} [size = nrow+1 = 2+1] 4151 $ j = {0,0,2} [size = 3] 4152 $ v = {1,2,3} [size = 3] 4153 $ 4154 $ Process1 [P1]: rows_owned=[2] 4155 $ i = {0,3} [size = nrow+1 = 1+1] 4156 $ j = {0,1,2} [size = 3] 4157 $ v = {4,5,6} [size = 3] 4158 4159 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4160 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4161 @*/ 4162 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4163 { 4164 PetscFunctionBegin; 4165 PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4166 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4167 PetscCall(MatCreate(comm,mat)); 4168 PetscCall(MatSetSizes(*mat,m,n,M,N)); 4169 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4170 PetscCall(MatSetType(*mat,MATMPIAIJ)); 4171 PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 4172 PetscFunctionReturn(0); 4173 } 4174 4175 /*@ 4176 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4177 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from MatCreateMPIAIJWithArrays() 4178 4179 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4180 4181 Collective 4182 4183 Input Parameters: 4184 + mat - the matrix 4185 . m - number of local rows (Cannot be PETSC_DECIDE) 4186 . n - This value should be the same as the local size used in creating the 4187 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4188 calculated if N is given) For square matrices n is almost always m. 4189 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4190 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4191 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4192 . J - column indices 4193 - v - matrix values 4194 4195 Level: intermediate 4196 4197 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4198 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4199 @*/ 4200 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4201 { 4202 PetscInt nnz,i; 4203 PetscBool nooffprocentries; 4204 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4205 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4206 PetscScalar *ad,*ao; 4207 PetscInt ldi,Iii,md; 4208 const PetscInt *Adi = Ad->i; 4209 PetscInt *ld = Aij->ld; 4210 4211 PetscFunctionBegin; 4212 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4213 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4214 PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4215 PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4216 4217 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4218 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4219 4220 for (i=0; i<m; i++) { 4221 nnz = Ii[i+1]- Ii[i]; 4222 Iii = Ii[i]; 4223 ldi = ld[i]; 4224 md = Adi[i+1]-Adi[i]; 4225 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4226 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4227 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4228 ad += md; 4229 ao += nnz - md; 4230 } 4231 nooffprocentries = mat->nooffprocentries; 4232 mat->nooffprocentries = PETSC_TRUE; 4233 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4234 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4235 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4236 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4237 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4238 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4239 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4240 mat->nooffprocentries = nooffprocentries; 4241 PetscFunctionReturn(0); 4242 } 4243 4244 /*@ 4245 MatUpdateMPIAIJWithArray - updates an MPI AIJ matrix using an array that contains the nonzero values 4246 4247 Collective 4248 4249 Input Parameters: 4250 + mat - the matrix 4251 - v - matrix values, stored by row 4252 4253 Level: intermediate 4254 4255 Notes: 4256 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4257 4258 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4259 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4260 @*/ 4261 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat,const PetscScalar v[]) 4262 { 4263 PetscInt nnz,i,m; 4264 PetscBool nooffprocentries; 4265 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4266 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4267 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)Aij->B->data; 4268 PetscScalar *ad,*ao; 4269 const PetscInt *Adi = Ad->i,*Adj = Ao->i; 4270 PetscInt ldi,Iii,md; 4271 PetscInt *ld = Aij->ld; 4272 4273 PetscFunctionBegin; 4274 m = mat->rmap->n; 4275 4276 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4277 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4278 Iii = 0; 4279 for (i=0; i<m; i++) { 4280 nnz = Adi[i+1]-Adi[i] + Adj[i+1]-Adj[i]; 4281 ldi = ld[i]; 4282 md = Adi[i+1]-Adi[i]; 4283 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4284 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4285 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4286 ad += md; 4287 ao += nnz - md; 4288 Iii += nnz; 4289 } 4290 nooffprocentries = mat->nooffprocentries; 4291 mat->nooffprocentries = PETSC_TRUE; 4292 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4293 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4294 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4295 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4296 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4297 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4298 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4299 mat->nooffprocentries = nooffprocentries; 4300 PetscFunctionReturn(0); 4301 } 4302 4303 /*@C 4304 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4305 (the default parallel PETSc format). For good matrix assembly performance 4306 the user should preallocate the matrix storage by setting the parameters 4307 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4308 performance can be increased by more than a factor of 50. 4309 4310 Collective 4311 4312 Input Parameters: 4313 + comm - MPI communicator 4314 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4315 This value should be the same as the local size used in creating the 4316 y vector for the matrix-vector product y = Ax. 4317 . n - This value should be the same as the local size used in creating the 4318 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4319 calculated if N is given) For square matrices n is almost always m. 4320 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4321 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4322 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4323 (same value is used for all local rows) 4324 . d_nnz - array containing the number of nonzeros in the various rows of the 4325 DIAGONAL portion of the local submatrix (possibly different for each row) 4326 or NULL, if d_nz is used to specify the nonzero structure. 4327 The size of this array is equal to the number of local rows, i.e 'm'. 4328 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4329 submatrix (same value is used for all local rows). 4330 - o_nnz - array containing the number of nonzeros in the various rows of the 4331 OFF-DIAGONAL portion of the local submatrix (possibly different for 4332 each row) or NULL, if o_nz is used to specify the nonzero 4333 structure. The size of this array is equal to the number 4334 of local rows, i.e 'm'. 4335 4336 Output Parameter: 4337 . A - the matrix 4338 4339 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4340 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4341 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4342 4343 Notes: 4344 If the *_nnz parameter is given then the *_nz parameter is ignored 4345 4346 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4347 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4348 storage requirements for this matrix. 4349 4350 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4351 processor than it must be used on all processors that share the object for 4352 that argument. 4353 4354 The user MUST specify either the local or global matrix dimensions 4355 (possibly both). 4356 4357 The parallel matrix is partitioned across processors such that the 4358 first m0 rows belong to process 0, the next m1 rows belong to 4359 process 1, the next m2 rows belong to process 2 etc.. where 4360 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4361 values corresponding to [m x N] submatrix. 4362 4363 The columns are logically partitioned with the n0 columns belonging 4364 to 0th partition, the next n1 columns belonging to the next 4365 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4366 4367 The DIAGONAL portion of the local submatrix on any given processor 4368 is the submatrix corresponding to the rows and columns m,n 4369 corresponding to the given processor. i.e diagonal matrix on 4370 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4371 etc. The remaining portion of the local submatrix [m x (N-n)] 4372 constitute the OFF-DIAGONAL portion. The example below better 4373 illustrates this concept. 4374 4375 For a square global matrix we define each processor's diagonal portion 4376 to be its local rows and the corresponding columns (a square submatrix); 4377 each processor's off-diagonal portion encompasses the remainder of the 4378 local matrix (a rectangular submatrix). 4379 4380 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4381 4382 When calling this routine with a single process communicator, a matrix of 4383 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4384 type of communicator, use the construction mechanism 4385 .vb 4386 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4387 .ve 4388 4389 $ MatCreate(...,&A); 4390 $ MatSetType(A,MATMPIAIJ); 4391 $ MatSetSizes(A, m,n,M,N); 4392 $ MatMPIAIJSetPreallocation(A,...); 4393 4394 By default, this format uses inodes (identical nodes) when possible. 4395 We search for consecutive rows with the same nonzero structure, thereby 4396 reusing matrix information to achieve increased efficiency. 4397 4398 Options Database Keys: 4399 + -mat_no_inode - Do not use inodes 4400 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4401 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4402 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4403 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4404 4405 Example usage: 4406 4407 Consider the following 8x8 matrix with 34 non-zero values, that is 4408 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4409 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4410 as follows 4411 4412 .vb 4413 1 2 0 | 0 3 0 | 0 4 4414 Proc0 0 5 6 | 7 0 0 | 8 0 4415 9 0 10 | 11 0 0 | 12 0 4416 ------------------------------------- 4417 13 0 14 | 15 16 17 | 0 0 4418 Proc1 0 18 0 | 19 20 21 | 0 0 4419 0 0 0 | 22 23 0 | 24 0 4420 ------------------------------------- 4421 Proc2 25 26 27 | 0 0 28 | 29 0 4422 30 0 0 | 31 32 33 | 0 34 4423 .ve 4424 4425 This can be represented as a collection of submatrices as 4426 4427 .vb 4428 A B C 4429 D E F 4430 G H I 4431 .ve 4432 4433 Where the submatrices A,B,C are owned by proc0, D,E,F are 4434 owned by proc1, G,H,I are owned by proc2. 4435 4436 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4437 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4438 The 'M','N' parameters are 8,8, and have the same values on all procs. 4439 4440 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4441 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4442 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4443 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4444 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4445 matrix, ans [DF] as another SeqAIJ matrix. 4446 4447 When d_nz, o_nz parameters are specified, d_nz storage elements are 4448 allocated for every row of the local diagonal submatrix, and o_nz 4449 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4450 One way to choose d_nz and o_nz is to use the max nonzerors per local 4451 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4452 In this case, the values of d_nz,o_nz are 4453 .vb 4454 proc0 : dnz = 2, o_nz = 2 4455 proc1 : dnz = 3, o_nz = 2 4456 proc2 : dnz = 1, o_nz = 4 4457 .ve 4458 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4459 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4460 for proc3. i.e we are using 12+15+10=37 storage locations to store 4461 34 values. 4462 4463 When d_nnz, o_nnz parameters are specified, the storage is specified 4464 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4465 In the above case the values for d_nnz,o_nnz are 4466 .vb 4467 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4468 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4469 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4470 .ve 4471 Here the space allocated is sum of all the above values i.e 34, and 4472 hence pre-allocation is perfect. 4473 4474 Level: intermediate 4475 4476 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4477 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4478 @*/ 4479 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4480 { 4481 PetscMPIInt size; 4482 4483 PetscFunctionBegin; 4484 PetscCall(MatCreate(comm,A)); 4485 PetscCall(MatSetSizes(*A,m,n,M,N)); 4486 PetscCallMPI(MPI_Comm_size(comm,&size)); 4487 if (size > 1) { 4488 PetscCall(MatSetType(*A,MATMPIAIJ)); 4489 PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4490 } else { 4491 PetscCall(MatSetType(*A,MATSEQAIJ)); 4492 PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4493 } 4494 PetscFunctionReturn(0); 4495 } 4496 4497 /*@C 4498 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4499 4500 Not collective 4501 4502 Input Parameter: 4503 . A - The MPIAIJ matrix 4504 4505 Output Parameters: 4506 + Ad - The local diagonal block as a SeqAIJ matrix 4507 . Ao - The local off-diagonal block as a SeqAIJ matrix 4508 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4509 4510 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4511 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4512 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4513 local column numbers to global column numbers in the original matrix. 4514 4515 Level: intermediate 4516 4517 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4518 @*/ 4519 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4520 { 4521 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4522 PetscBool flg; 4523 4524 PetscFunctionBegin; 4525 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 4526 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4527 if (Ad) *Ad = a->A; 4528 if (Ao) *Ao = a->B; 4529 if (colmap) *colmap = a->garray; 4530 PetscFunctionReturn(0); 4531 } 4532 4533 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4534 { 4535 PetscInt m,N,i,rstart,nnz,Ii; 4536 PetscInt *indx; 4537 PetscScalar *values; 4538 MatType rootType; 4539 4540 PetscFunctionBegin; 4541 PetscCall(MatGetSize(inmat,&m,&N)); 4542 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4543 PetscInt *dnz,*onz,sum,bs,cbs; 4544 4545 if (n == PETSC_DECIDE) { 4546 PetscCall(PetscSplitOwnership(comm,&n,&N)); 4547 } 4548 /* Check sum(n) = N */ 4549 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 4550 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4551 4552 PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 4553 rstart -= m; 4554 4555 MatPreallocateBegin(comm,m,n,dnz,onz); 4556 for (i=0; i<m; i++) { 4557 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4558 PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 4559 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4560 } 4561 4562 PetscCall(MatCreate(comm,outmat)); 4563 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4564 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 4565 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 4566 PetscCall(MatGetRootType_Private(inmat,&rootType)); 4567 PetscCall(MatSetType(*outmat,rootType)); 4568 PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 4569 PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4570 MatPreallocateEnd(dnz,onz); 4571 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 4572 } 4573 4574 /* numeric phase */ 4575 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 4576 for (i=0; i<m; i++) { 4577 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4578 Ii = i + rstart; 4579 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 4580 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4581 } 4582 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 4583 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4584 PetscFunctionReturn(0); 4585 } 4586 4587 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4588 { 4589 PetscMPIInt rank; 4590 PetscInt m,N,i,rstart,nnz; 4591 size_t len; 4592 const PetscInt *indx; 4593 PetscViewer out; 4594 char *name; 4595 Mat B; 4596 const PetscScalar *values; 4597 4598 PetscFunctionBegin; 4599 PetscCall(MatGetLocalSize(A,&m,NULL)); 4600 PetscCall(MatGetSize(A,NULL,&N)); 4601 /* Should this be the type of the diagonal block of A? */ 4602 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 4603 PetscCall(MatSetSizes(B,m,N,m,N)); 4604 PetscCall(MatSetBlockSizesFromMats(B,A,A)); 4605 PetscCall(MatSetType(B,MATSEQAIJ)); 4606 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 4607 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4608 for (i=0; i<m; i++) { 4609 PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 4610 PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 4611 PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4612 } 4613 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 4614 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4615 4616 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 4617 PetscCall(PetscStrlen(outfile,&len)); 4618 PetscCall(PetscMalloc1(len+6,&name)); 4619 PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 4620 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 4621 PetscCall(PetscFree(name)); 4622 PetscCall(MatView(B,out)); 4623 PetscCall(PetscViewerDestroy(&out)); 4624 PetscCall(MatDestroy(&B)); 4625 PetscFunctionReturn(0); 4626 } 4627 4628 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4629 { 4630 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4631 4632 PetscFunctionBegin; 4633 if (!merge) PetscFunctionReturn(0); 4634 PetscCall(PetscFree(merge->id_r)); 4635 PetscCall(PetscFree(merge->len_s)); 4636 PetscCall(PetscFree(merge->len_r)); 4637 PetscCall(PetscFree(merge->bi)); 4638 PetscCall(PetscFree(merge->bj)); 4639 PetscCall(PetscFree(merge->buf_ri[0])); 4640 PetscCall(PetscFree(merge->buf_ri)); 4641 PetscCall(PetscFree(merge->buf_rj[0])); 4642 PetscCall(PetscFree(merge->buf_rj)); 4643 PetscCall(PetscFree(merge->coi)); 4644 PetscCall(PetscFree(merge->coj)); 4645 PetscCall(PetscFree(merge->owners_co)); 4646 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4647 PetscCall(PetscFree(merge)); 4648 PetscFunctionReturn(0); 4649 } 4650 4651 #include <../src/mat/utils/freespace.h> 4652 #include <petscbt.h> 4653 4654 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4655 { 4656 MPI_Comm comm; 4657 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4658 PetscMPIInt size,rank,taga,*len_s; 4659 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4660 PetscInt proc,m; 4661 PetscInt **buf_ri,**buf_rj; 4662 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4663 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4664 MPI_Request *s_waits,*r_waits; 4665 MPI_Status *status; 4666 const MatScalar *aa,*a_a; 4667 MatScalar **abuf_r,*ba_i; 4668 Mat_Merge_SeqsToMPI *merge; 4669 PetscContainer container; 4670 4671 PetscFunctionBegin; 4672 PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 4673 PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 4674 4675 PetscCallMPI(MPI_Comm_size(comm,&size)); 4676 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4677 4678 PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 4679 PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4680 PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 4681 PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4682 aa = a_a; 4683 4684 bi = merge->bi; 4685 bj = merge->bj; 4686 buf_ri = merge->buf_ri; 4687 buf_rj = merge->buf_rj; 4688 4689 PetscCall(PetscMalloc1(size,&status)); 4690 owners = merge->rowmap->range; 4691 len_s = merge->len_s; 4692 4693 /* send and recv matrix values */ 4694 /*-----------------------------*/ 4695 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 4696 PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 4697 4698 PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 4699 for (proc=0,k=0; proc<size; proc++) { 4700 if (!len_s[proc]) continue; 4701 i = owners[proc]; 4702 PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 4703 k++; 4704 } 4705 4706 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 4707 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 4708 PetscCall(PetscFree(status)); 4709 4710 PetscCall(PetscFree(s_waits)); 4711 PetscCall(PetscFree(r_waits)); 4712 4713 /* insert mat values of mpimat */ 4714 /*----------------------------*/ 4715 PetscCall(PetscMalloc1(N,&ba_i)); 4716 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4717 4718 for (k=0; k<merge->nrecv; k++) { 4719 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4720 nrows = *(buf_ri_k[k]); 4721 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4722 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4723 } 4724 4725 /* set values of ba */ 4726 m = merge->rowmap->n; 4727 for (i=0; i<m; i++) { 4728 arow = owners[rank] + i; 4729 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4730 bnzi = bi[i+1] - bi[i]; 4731 PetscCall(PetscArrayzero(ba_i,bnzi)); 4732 4733 /* add local non-zero vals of this proc's seqmat into ba */ 4734 anzi = ai[arow+1] - ai[arow]; 4735 aj = a->j + ai[arow]; 4736 aa = a_a + ai[arow]; 4737 nextaj = 0; 4738 for (j=0; nextaj<anzi; j++) { 4739 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4740 ba_i[j] += aa[nextaj++]; 4741 } 4742 } 4743 4744 /* add received vals into ba */ 4745 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4746 /* i-th row */ 4747 if (i == *nextrow[k]) { 4748 anzi = *(nextai[k]+1) - *nextai[k]; 4749 aj = buf_rj[k] + *(nextai[k]); 4750 aa = abuf_r[k] + *(nextai[k]); 4751 nextaj = 0; 4752 for (j=0; nextaj<anzi; j++) { 4753 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4754 ba_i[j] += aa[nextaj++]; 4755 } 4756 } 4757 nextrow[k]++; nextai[k]++; 4758 } 4759 } 4760 PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 4761 } 4762 PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 4763 PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 4764 PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 4765 4766 PetscCall(PetscFree(abuf_r[0])); 4767 PetscCall(PetscFree(abuf_r)); 4768 PetscCall(PetscFree(ba_i)); 4769 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4770 PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 4771 PetscFunctionReturn(0); 4772 } 4773 4774 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4775 { 4776 Mat B_mpi; 4777 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4778 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4779 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4780 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4781 PetscInt len,proc,*dnz,*onz,bs,cbs; 4782 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi; 4783 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4784 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4785 MPI_Status *status; 4786 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4787 PetscBT lnkbt; 4788 Mat_Merge_SeqsToMPI *merge; 4789 PetscContainer container; 4790 4791 PetscFunctionBegin; 4792 PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 4793 4794 /* make sure it is a PETSc comm */ 4795 PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 4796 PetscCallMPI(MPI_Comm_size(comm,&size)); 4797 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4798 4799 PetscCall(PetscNew(&merge)); 4800 PetscCall(PetscMalloc1(size,&status)); 4801 4802 /* determine row ownership */ 4803 /*---------------------------------------------------------*/ 4804 PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 4805 PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 4806 PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 4807 PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 4808 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4809 PetscCall(PetscMalloc1(size,&len_si)); 4810 PetscCall(PetscMalloc1(size,&merge->len_s)); 4811 4812 m = merge->rowmap->n; 4813 owners = merge->rowmap->range; 4814 4815 /* determine the number of messages to send, their lengths */ 4816 /*---------------------------------------------------------*/ 4817 len_s = merge->len_s; 4818 4819 len = 0; /* length of buf_si[] */ 4820 merge->nsend = 0; 4821 for (proc=0; proc<size; proc++) { 4822 len_si[proc] = 0; 4823 if (proc == rank) { 4824 len_s[proc] = 0; 4825 } else { 4826 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4827 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4828 } 4829 if (len_s[proc]) { 4830 merge->nsend++; 4831 nrows = 0; 4832 for (i=owners[proc]; i<owners[proc+1]; i++) { 4833 if (ai[i+1] > ai[i]) nrows++; 4834 } 4835 len_si[proc] = 2*(nrows+1); 4836 len += len_si[proc]; 4837 } 4838 } 4839 4840 /* determine the number and length of messages to receive for ij-structure */ 4841 /*-------------------------------------------------------------------------*/ 4842 PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 4843 PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4844 4845 /* post the Irecv of j-structure */ 4846 /*-------------------------------*/ 4847 PetscCall(PetscCommGetNewTag(comm,&tagj)); 4848 PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 4849 4850 /* post the Isend of j-structure */ 4851 /*--------------------------------*/ 4852 PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 4853 4854 for (proc=0, k=0; proc<size; proc++) { 4855 if (!len_s[proc]) continue; 4856 i = owners[proc]; 4857 PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 4858 k++; 4859 } 4860 4861 /* receives and sends of j-structure are complete */ 4862 /*------------------------------------------------*/ 4863 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 4864 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 4865 4866 /* send and recv i-structure */ 4867 /*---------------------------*/ 4868 PetscCall(PetscCommGetNewTag(comm,&tagi)); 4869 PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 4870 4871 PetscCall(PetscMalloc1(len+1,&buf_s)); 4872 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4873 for (proc=0,k=0; proc<size; proc++) { 4874 if (!len_s[proc]) continue; 4875 /* form outgoing message for i-structure: 4876 buf_si[0]: nrows to be sent 4877 [1:nrows]: row index (global) 4878 [nrows+1:2*nrows+1]: i-structure index 4879 */ 4880 /*-------------------------------------------*/ 4881 nrows = len_si[proc]/2 - 1; 4882 buf_si_i = buf_si + nrows+1; 4883 buf_si[0] = nrows; 4884 buf_si_i[0] = 0; 4885 nrows = 0; 4886 for (i=owners[proc]; i<owners[proc+1]; i++) { 4887 anzi = ai[i+1] - ai[i]; 4888 if (anzi) { 4889 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4890 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4891 nrows++; 4892 } 4893 } 4894 PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 4895 k++; 4896 buf_si += len_si[proc]; 4897 } 4898 4899 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 4900 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 4901 4902 PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 4903 for (i=0; i<merge->nrecv; i++) { 4904 PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 4905 } 4906 4907 PetscCall(PetscFree(len_si)); 4908 PetscCall(PetscFree(len_ri)); 4909 PetscCall(PetscFree(rj_waits)); 4910 PetscCall(PetscFree2(si_waits,sj_waits)); 4911 PetscCall(PetscFree(ri_waits)); 4912 PetscCall(PetscFree(buf_s)); 4913 PetscCall(PetscFree(status)); 4914 4915 /* compute a local seq matrix in each processor */ 4916 /*----------------------------------------------*/ 4917 /* allocate bi array and free space for accumulating nonzero column info */ 4918 PetscCall(PetscMalloc1(m+1,&bi)); 4919 bi[0] = 0; 4920 4921 /* create and initialize a linked list */ 4922 nlnk = N+1; 4923 PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 4924 4925 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4926 len = ai[owners[rank+1]] - ai[owners[rank]]; 4927 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 4928 4929 current_space = free_space; 4930 4931 /* determine symbolic info for each local row */ 4932 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4933 4934 for (k=0; k<merge->nrecv; k++) { 4935 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4936 nrows = *buf_ri_k[k]; 4937 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4938 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4939 } 4940 4941 MatPreallocateBegin(comm,m,n,dnz,onz); 4942 len = 0; 4943 for (i=0; i<m; i++) { 4944 bnzi = 0; 4945 /* add local non-zero cols of this proc's seqmat into lnk */ 4946 arow = owners[rank] + i; 4947 anzi = ai[arow+1] - ai[arow]; 4948 aj = a->j + ai[arow]; 4949 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4950 bnzi += nlnk; 4951 /* add received col data into lnk */ 4952 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4953 if (i == *nextrow[k]) { /* i-th row */ 4954 anzi = *(nextai[k]+1) - *nextai[k]; 4955 aj = buf_rj[k] + *nextai[k]; 4956 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4957 bnzi += nlnk; 4958 nextrow[k]++; nextai[k]++; 4959 } 4960 } 4961 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4962 4963 /* if free space is not available, make more free space */ 4964 if (current_space->local_remaining<bnzi) { 4965 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 4966 } 4967 /* copy data into free space, then initialize lnk */ 4968 PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 4969 PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4970 4971 current_space->array += bnzi; 4972 current_space->local_used += bnzi; 4973 current_space->local_remaining -= bnzi; 4974 4975 bi[i+1] = bi[i] + bnzi; 4976 } 4977 4978 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4979 4980 PetscCall(PetscMalloc1(bi[m]+1,&bj)); 4981 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4982 PetscCall(PetscLLDestroy(lnk,lnkbt)); 4983 4984 /* create symbolic parallel matrix B_mpi */ 4985 /*---------------------------------------*/ 4986 PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 4987 PetscCall(MatCreate(comm,&B_mpi)); 4988 if (n==PETSC_DECIDE) { 4989 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 4990 } else { 4991 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4992 } 4993 PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 4994 PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 4995 PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 4996 MatPreallocateEnd(dnz,onz); 4997 PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 4998 4999 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5000 B_mpi->assembled = PETSC_FALSE; 5001 merge->bi = bi; 5002 merge->bj = bj; 5003 merge->buf_ri = buf_ri; 5004 merge->buf_rj = buf_rj; 5005 merge->coi = NULL; 5006 merge->coj = NULL; 5007 merge->owners_co = NULL; 5008 5009 PetscCall(PetscCommDestroy(&comm)); 5010 5011 /* attach the supporting struct to B_mpi for reuse */ 5012 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 5013 PetscCall(PetscContainerSetPointer(container,merge)); 5014 PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 5015 PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 5016 PetscCall(PetscContainerDestroy(&container)); 5017 *mpimat = B_mpi; 5018 5019 PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 5020 PetscFunctionReturn(0); 5021 } 5022 5023 /*@C 5024 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5025 matrices from each processor 5026 5027 Collective 5028 5029 Input Parameters: 5030 + comm - the communicators the parallel matrix will live on 5031 . seqmat - the input sequential matrices 5032 . m - number of local rows (or PETSC_DECIDE) 5033 . n - number of local columns (or PETSC_DECIDE) 5034 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5035 5036 Output Parameter: 5037 . mpimat - the parallel matrix generated 5038 5039 Level: advanced 5040 5041 Notes: 5042 The dimensions of the sequential matrix in each processor MUST be the same. 5043 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5044 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5045 @*/ 5046 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5047 { 5048 PetscMPIInt size; 5049 5050 PetscFunctionBegin; 5051 PetscCallMPI(MPI_Comm_size(comm,&size)); 5052 if (size == 1) { 5053 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 5054 if (scall == MAT_INITIAL_MATRIX) { 5055 PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 5056 } else { 5057 PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 5058 } 5059 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 5060 PetscFunctionReturn(0); 5061 } 5062 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 5063 if (scall == MAT_INITIAL_MATRIX) { 5064 PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 5065 } 5066 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 5067 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 5068 PetscFunctionReturn(0); 5069 } 5070 5071 /*@ 5072 MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5073 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5074 with MatGetSize() 5075 5076 Not Collective 5077 5078 Input Parameters: 5079 + A - the matrix 5080 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5081 5082 Output Parameter: 5083 . A_loc - the local sequential matrix generated 5084 5085 Level: developer 5086 5087 Notes: 5088 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5089 5090 Destroy the matrix with MatDestroy() 5091 5092 .seealso: MatMPIAIJGetLocalMat() 5093 5094 @*/ 5095 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc) 5096 { 5097 PetscBool mpi; 5098 5099 PetscFunctionBegin; 5100 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi)); 5101 if (mpi) { 5102 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc)); 5103 } else { 5104 *A_loc = A; 5105 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5106 } 5107 PetscFunctionReturn(0); 5108 } 5109 5110 /*@ 5111 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5112 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5113 with MatGetSize() 5114 5115 Not Collective 5116 5117 Input Parameters: 5118 + A - the matrix 5119 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5120 5121 Output Parameter: 5122 . A_loc - the local sequential matrix generated 5123 5124 Level: developer 5125 5126 Notes: 5127 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5128 5129 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5130 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5131 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5132 modify the values of the returned A_loc. 5133 5134 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5135 @*/ 5136 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5137 { 5138 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5139 Mat_SeqAIJ *mat,*a,*b; 5140 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5141 const PetscScalar *aa,*ba,*aav,*bav; 5142 PetscScalar *ca,*cam; 5143 PetscMPIInt size; 5144 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5145 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5146 PetscBool match; 5147 5148 PetscFunctionBegin; 5149 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 5150 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5151 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5152 if (size == 1) { 5153 if (scall == MAT_INITIAL_MATRIX) { 5154 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5155 *A_loc = mpimat->A; 5156 } else if (scall == MAT_REUSE_MATRIX) { 5157 PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 5158 } 5159 PetscFunctionReturn(0); 5160 } 5161 5162 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5163 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5164 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5165 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5166 PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 5167 PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5168 aa = aav; 5169 ba = bav; 5170 if (scall == MAT_INITIAL_MATRIX) { 5171 PetscCall(PetscMalloc1(1+am,&ci)); 5172 ci[0] = 0; 5173 for (i=0; i<am; i++) { 5174 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5175 } 5176 PetscCall(PetscMalloc1(1+ci[am],&cj)); 5177 PetscCall(PetscMalloc1(1+ci[am],&ca)); 5178 k = 0; 5179 for (i=0; i<am; i++) { 5180 ncols_o = bi[i+1] - bi[i]; 5181 ncols_d = ai[i+1] - ai[i]; 5182 /* off-diagonal portion of A */ 5183 for (jo=0; jo<ncols_o; jo++) { 5184 col = cmap[*bj]; 5185 if (col >= cstart) break; 5186 cj[k] = col; bj++; 5187 ca[k++] = *ba++; 5188 } 5189 /* diagonal portion of A */ 5190 for (j=0; j<ncols_d; j++) { 5191 cj[k] = cstart + *aj++; 5192 ca[k++] = *aa++; 5193 } 5194 /* off-diagonal portion of A */ 5195 for (j=jo; j<ncols_o; j++) { 5196 cj[k] = cmap[*bj++]; 5197 ca[k++] = *ba++; 5198 } 5199 } 5200 /* put together the new matrix */ 5201 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5202 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5203 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5204 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5205 mat->free_a = PETSC_TRUE; 5206 mat->free_ij = PETSC_TRUE; 5207 mat->nonew = 0; 5208 } else if (scall == MAT_REUSE_MATRIX) { 5209 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5210 ci = mat->i; 5211 cj = mat->j; 5212 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 5213 for (i=0; i<am; i++) { 5214 /* off-diagonal portion of A */ 5215 ncols_o = bi[i+1] - bi[i]; 5216 for (jo=0; jo<ncols_o; jo++) { 5217 col = cmap[*bj]; 5218 if (col >= cstart) break; 5219 *cam++ = *ba++; bj++; 5220 } 5221 /* diagonal portion of A */ 5222 ncols_d = ai[i+1] - ai[i]; 5223 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5224 /* off-diagonal portion of A */ 5225 for (j=jo; j<ncols_o; j++) { 5226 *cam++ = *ba++; bj++; 5227 } 5228 } 5229 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 5230 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5231 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 5232 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 5233 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5234 PetscFunctionReturn(0); 5235 } 5236 5237 /*@ 5238 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5239 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5240 5241 Not Collective 5242 5243 Input Parameters: 5244 + A - the matrix 5245 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5246 5247 Output Parameters: 5248 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5249 - A_loc - the local sequential matrix generated 5250 5251 Level: developer 5252 5253 Notes: 5254 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5255 5256 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5257 5258 @*/ 5259 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5260 { 5261 Mat Ao,Ad; 5262 const PetscInt *cmap; 5263 PetscMPIInt size; 5264 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5265 5266 PetscFunctionBegin; 5267 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 5268 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5269 if (size == 1) { 5270 if (scall == MAT_INITIAL_MATRIX) { 5271 PetscCall(PetscObjectReference((PetscObject)Ad)); 5272 *A_loc = Ad; 5273 } else if (scall == MAT_REUSE_MATRIX) { 5274 PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5275 } 5276 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5277 PetscFunctionReturn(0); 5278 } 5279 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 5280 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5281 if (f) { 5282 PetscCall((*f)(A,scall,glob,A_loc)); 5283 } else { 5284 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5285 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5286 Mat_SeqAIJ *c; 5287 PetscInt *ai = a->i, *aj = a->j; 5288 PetscInt *bi = b->i, *bj = b->j; 5289 PetscInt *ci,*cj; 5290 const PetscScalar *aa,*ba; 5291 PetscScalar *ca; 5292 PetscInt i,j,am,dn,on; 5293 5294 PetscCall(MatGetLocalSize(Ad,&am,&dn)); 5295 PetscCall(MatGetLocalSize(Ao,NULL,&on)); 5296 PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 5297 PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5298 if (scall == MAT_INITIAL_MATRIX) { 5299 PetscInt k; 5300 PetscCall(PetscMalloc1(1+am,&ci)); 5301 PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 5302 PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5303 ci[0] = 0; 5304 for (i=0,k=0; i<am; i++) { 5305 const PetscInt ncols_o = bi[i+1] - bi[i]; 5306 const PetscInt ncols_d = ai[i+1] - ai[i]; 5307 ci[i+1] = ci[i] + ncols_o + ncols_d; 5308 /* diagonal portion of A */ 5309 for (j=0; j<ncols_d; j++,k++) { 5310 cj[k] = *aj++; 5311 ca[k] = *aa++; 5312 } 5313 /* off-diagonal portion of A */ 5314 for (j=0; j<ncols_o; j++,k++) { 5315 cj[k] = dn + *bj++; 5316 ca[k] = *ba++; 5317 } 5318 } 5319 /* put together the new matrix */ 5320 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5321 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5322 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5323 c = (Mat_SeqAIJ*)(*A_loc)->data; 5324 c->free_a = PETSC_TRUE; 5325 c->free_ij = PETSC_TRUE; 5326 c->nonew = 0; 5327 PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5328 } else if (scall == MAT_REUSE_MATRIX) { 5329 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5330 for (i=0; i<am; i++) { 5331 const PetscInt ncols_d = ai[i+1] - ai[i]; 5332 const PetscInt ncols_o = bi[i+1] - bi[i]; 5333 /* diagonal portion of A */ 5334 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5335 /* off-diagonal portion of A */ 5336 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5337 } 5338 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 5339 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5340 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 5341 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5342 if (glob) { 5343 PetscInt cst, *gidx; 5344 5345 PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 5346 PetscCall(PetscMalloc1(dn+on,&gidx)); 5347 for (i=0; i<dn; i++) gidx[i] = cst + i; 5348 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5349 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5350 } 5351 } 5352 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5353 PetscFunctionReturn(0); 5354 } 5355 5356 /*@C 5357 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5358 5359 Not Collective 5360 5361 Input Parameters: 5362 + A - the matrix 5363 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5364 - row, col - index sets of rows and columns to extract (or NULL) 5365 5366 Output Parameter: 5367 . A_loc - the local sequential matrix generated 5368 5369 Level: developer 5370 5371 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5372 5373 @*/ 5374 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5375 { 5376 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5377 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5378 IS isrowa,iscola; 5379 Mat *aloc; 5380 PetscBool match; 5381 5382 PetscFunctionBegin; 5383 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 5384 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5385 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 5386 if (!row) { 5387 start = A->rmap->rstart; end = A->rmap->rend; 5388 PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 5389 } else { 5390 isrowa = *row; 5391 } 5392 if (!col) { 5393 start = A->cmap->rstart; 5394 cmap = a->garray; 5395 nzA = a->A->cmap->n; 5396 nzB = a->B->cmap->n; 5397 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5398 ncols = 0; 5399 for (i=0; i<nzB; i++) { 5400 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5401 else break; 5402 } 5403 imark = i; 5404 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5405 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5406 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 5407 } else { 5408 iscola = *col; 5409 } 5410 if (scall != MAT_INITIAL_MATRIX) { 5411 PetscCall(PetscMalloc1(1,&aloc)); 5412 aloc[0] = *A_loc; 5413 } 5414 PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5415 if (!col) { /* attach global id of condensed columns */ 5416 PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5417 } 5418 *A_loc = aloc[0]; 5419 PetscCall(PetscFree(aloc)); 5420 if (!row) { 5421 PetscCall(ISDestroy(&isrowa)); 5422 } 5423 if (!col) { 5424 PetscCall(ISDestroy(&iscola)); 5425 } 5426 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 5427 PetscFunctionReturn(0); 5428 } 5429 5430 /* 5431 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5432 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5433 * on a global size. 5434 * */ 5435 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5436 { 5437 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5438 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5439 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5440 PetscMPIInt owner; 5441 PetscSFNode *iremote,*oiremote; 5442 const PetscInt *lrowindices; 5443 PetscSF sf,osf; 5444 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5445 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5446 MPI_Comm comm; 5447 ISLocalToGlobalMapping mapping; 5448 const PetscScalar *pd_a,*po_a; 5449 5450 PetscFunctionBegin; 5451 PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 5452 /* plocalsize is the number of roots 5453 * nrows is the number of leaves 5454 * */ 5455 PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 5456 PetscCall(ISGetLocalSize(rows,&nrows)); 5457 PetscCall(PetscCalloc1(nrows,&iremote)); 5458 PetscCall(ISGetIndices(rows,&lrowindices)); 5459 for (i=0;i<nrows;i++) { 5460 /* Find a remote index and an owner for a row 5461 * The row could be local or remote 5462 * */ 5463 owner = 0; 5464 lidx = 0; 5465 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 5466 iremote[i].index = lidx; 5467 iremote[i].rank = owner; 5468 } 5469 /* Create SF to communicate how many nonzero columns for each row */ 5470 PetscCall(PetscSFCreate(comm,&sf)); 5471 /* SF will figure out the number of nonzero colunms for each row, and their 5472 * offsets 5473 * */ 5474 PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5475 PetscCall(PetscSFSetFromOptions(sf)); 5476 PetscCall(PetscSFSetUp(sf)); 5477 5478 PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 5479 PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 5480 PetscCall(PetscCalloc1(nrows,&pnnz)); 5481 roffsets[0] = 0; 5482 roffsets[1] = 0; 5483 for (i=0;i<plocalsize;i++) { 5484 /* diag */ 5485 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5486 /* off diag */ 5487 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5488 /* compute offsets so that we relative location for each row */ 5489 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5490 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5491 } 5492 PetscCall(PetscCalloc1(2*nrows,&nlcols)); 5493 PetscCall(PetscCalloc1(2*nrows,&loffsets)); 5494 /* 'r' means root, and 'l' means leaf */ 5495 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5496 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5497 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5498 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5499 PetscCall(PetscSFDestroy(&sf)); 5500 PetscCall(PetscFree(roffsets)); 5501 PetscCall(PetscFree(nrcols)); 5502 dntotalcols = 0; 5503 ontotalcols = 0; 5504 ncol = 0; 5505 for (i=0;i<nrows;i++) { 5506 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5507 ncol = PetscMax(pnnz[i],ncol); 5508 /* diag */ 5509 dntotalcols += nlcols[i*2+0]; 5510 /* off diag */ 5511 ontotalcols += nlcols[i*2+1]; 5512 } 5513 /* We do not need to figure the right number of columns 5514 * since all the calculations will be done by going through the raw data 5515 * */ 5516 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 5517 PetscCall(MatSetUp(*P_oth)); 5518 PetscCall(PetscFree(pnnz)); 5519 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5520 /* diag */ 5521 PetscCall(PetscCalloc1(dntotalcols,&iremote)); 5522 /* off diag */ 5523 PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 5524 /* diag */ 5525 PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 5526 /* off diag */ 5527 PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 5528 dntotalcols = 0; 5529 ontotalcols = 0; 5530 ntotalcols = 0; 5531 for (i=0;i<nrows;i++) { 5532 owner = 0; 5533 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 5534 /* Set iremote for diag matrix */ 5535 for (j=0;j<nlcols[i*2+0];j++) { 5536 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5537 iremote[dntotalcols].rank = owner; 5538 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5539 ilocal[dntotalcols++] = ntotalcols++; 5540 } 5541 /* off diag */ 5542 for (j=0;j<nlcols[i*2+1];j++) { 5543 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5544 oiremote[ontotalcols].rank = owner; 5545 oilocal[ontotalcols++] = ntotalcols++; 5546 } 5547 } 5548 PetscCall(ISRestoreIndices(rows,&lrowindices)); 5549 PetscCall(PetscFree(loffsets)); 5550 PetscCall(PetscFree(nlcols)); 5551 PetscCall(PetscSFCreate(comm,&sf)); 5552 /* P serves as roots and P_oth is leaves 5553 * Diag matrix 5554 * */ 5555 PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5556 PetscCall(PetscSFSetFromOptions(sf)); 5557 PetscCall(PetscSFSetUp(sf)); 5558 5559 PetscCall(PetscSFCreate(comm,&osf)); 5560 /* Off diag */ 5561 PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 5562 PetscCall(PetscSFSetFromOptions(osf)); 5563 PetscCall(PetscSFSetUp(osf)); 5564 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5565 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5566 /* We operate on the matrix internal data for saving memory */ 5567 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5568 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5569 PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 5570 /* Convert to global indices for diag matrix */ 5571 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5572 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5573 /* We want P_oth store global indices */ 5574 PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 5575 /* Use memory scalable approach */ 5576 PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 5577 PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 5578 PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5579 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5580 /* Convert back to local indices */ 5581 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5582 PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5583 nout = 0; 5584 PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 5585 PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5586 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5587 /* Exchange values */ 5588 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5589 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5590 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5591 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5592 /* Stop PETSc from shrinking memory */ 5593 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5594 PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 5595 PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 5596 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5597 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 5598 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 5599 PetscCall(PetscSFDestroy(&sf)); 5600 PetscCall(PetscSFDestroy(&osf)); 5601 PetscFunctionReturn(0); 5602 } 5603 5604 /* 5605 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5606 * This supports MPIAIJ and MAIJ 5607 * */ 5608 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5609 { 5610 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5611 Mat_SeqAIJ *p_oth; 5612 IS rows,map; 5613 PetscHMapI hamp; 5614 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5615 MPI_Comm comm; 5616 PetscSF sf,osf; 5617 PetscBool has; 5618 5619 PetscFunctionBegin; 5620 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5621 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 5622 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5623 * and then create a submatrix (that often is an overlapping matrix) 5624 * */ 5625 if (reuse == MAT_INITIAL_MATRIX) { 5626 /* Use a hash table to figure out unique keys */ 5627 PetscCall(PetscHMapICreate(&hamp)); 5628 PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 5629 PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5630 count = 0; 5631 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5632 for (i=0;i<a->B->cmap->n;i++) { 5633 key = a->garray[i]/dof; 5634 PetscCall(PetscHMapIHas(hamp,key,&has)); 5635 if (!has) { 5636 mapping[i] = count; 5637 PetscCall(PetscHMapISet(hamp,key,count++)); 5638 } else { 5639 /* Current 'i' has the same value the previous step */ 5640 mapping[i] = count-1; 5641 } 5642 } 5643 PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 5644 PetscCall(PetscHMapIGetSize(hamp,&htsize)); 5645 PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5646 PetscCall(PetscCalloc1(htsize,&rowindices)); 5647 off = 0; 5648 PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 5649 PetscCall(PetscHMapIDestroy(&hamp)); 5650 PetscCall(PetscSortInt(htsize,rowindices)); 5651 PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 5652 /* In case, the matrix was already created but users want to recreate the matrix */ 5653 PetscCall(MatDestroy(P_oth)); 5654 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 5655 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 5656 PetscCall(ISDestroy(&map)); 5657 PetscCall(ISDestroy(&rows)); 5658 } else if (reuse == MAT_REUSE_MATRIX) { 5659 /* If matrix was already created, we simply update values using SF objects 5660 * that as attached to the matrix ealier. 5661 */ 5662 const PetscScalar *pd_a,*po_a; 5663 5664 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 5665 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 5666 PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5667 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5668 /* Update values in place */ 5669 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5670 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5671 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5672 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5673 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5674 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5675 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5676 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5677 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5678 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 5679 PetscFunctionReturn(0); 5680 } 5681 5682 /*@C 5683 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5684 5685 Collective on Mat 5686 5687 Input Parameters: 5688 + A - the first matrix in mpiaij format 5689 . B - the second matrix in mpiaij format 5690 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5691 5692 Output Parameters: 5693 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5694 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5695 - B_seq - the sequential matrix generated 5696 5697 Level: developer 5698 5699 @*/ 5700 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5701 { 5702 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5703 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5704 IS isrowb,iscolb; 5705 Mat *bseq=NULL; 5706 5707 PetscFunctionBegin; 5708 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5709 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5710 } 5711 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 5712 5713 if (scall == MAT_INITIAL_MATRIX) { 5714 start = A->cmap->rstart; 5715 cmap = a->garray; 5716 nzA = a->A->cmap->n; 5717 nzB = a->B->cmap->n; 5718 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5719 ncols = 0; 5720 for (i=0; i<nzB; i++) { /* row < local row index */ 5721 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5722 else break; 5723 } 5724 imark = i; 5725 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5726 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5727 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 5728 PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 5729 } else { 5730 PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5731 isrowb = *rowb; iscolb = *colb; 5732 PetscCall(PetscMalloc1(1,&bseq)); 5733 bseq[0] = *B_seq; 5734 } 5735 PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 5736 *B_seq = bseq[0]; 5737 PetscCall(PetscFree(bseq)); 5738 if (!rowb) { 5739 PetscCall(ISDestroy(&isrowb)); 5740 } else { 5741 *rowb = isrowb; 5742 } 5743 if (!colb) { 5744 PetscCall(ISDestroy(&iscolb)); 5745 } else { 5746 *colb = iscolb; 5747 } 5748 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 5749 PetscFunctionReturn(0); 5750 } 5751 5752 /* 5753 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5754 of the OFF-DIAGONAL portion of local A 5755 5756 Collective on Mat 5757 5758 Input Parameters: 5759 + A,B - the matrices in mpiaij format 5760 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5761 5762 Output Parameter: 5763 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5764 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5765 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5766 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5767 5768 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5769 for this matrix. This is not desirable.. 5770 5771 Level: developer 5772 5773 */ 5774 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5775 { 5776 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5777 Mat_SeqAIJ *b_oth; 5778 VecScatter ctx; 5779 MPI_Comm comm; 5780 const PetscMPIInt *rprocs,*sprocs; 5781 const PetscInt *srow,*rstarts,*sstarts; 5782 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5783 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5784 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5785 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5786 PetscMPIInt size,tag,rank,nreqs; 5787 5788 PetscFunctionBegin; 5789 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5790 PetscCallMPI(MPI_Comm_size(comm,&size)); 5791 5792 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5793 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5794 } 5795 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 5796 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5797 5798 if (size == 1) { 5799 startsj_s = NULL; 5800 bufa_ptr = NULL; 5801 *B_oth = NULL; 5802 PetscFunctionReturn(0); 5803 } 5804 5805 ctx = a->Mvctx; 5806 tag = ((PetscObject)ctx)->tag; 5807 5808 PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5809 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5810 PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 5811 PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 5812 PetscCall(PetscMalloc1(nreqs,&reqs)); 5813 rwaits = reqs; 5814 swaits = reqs + nrecvs; 5815 5816 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5817 if (scall == MAT_INITIAL_MATRIX) { 5818 /* i-array */ 5819 /*---------*/ 5820 /* post receives */ 5821 if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5822 for (i=0; i<nrecvs; i++) { 5823 rowlen = rvalues + rstarts[i]*rbs; 5824 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5825 PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5826 } 5827 5828 /* pack the outgoing message */ 5829 PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 5830 5831 sstartsj[0] = 0; 5832 rstartsj[0] = 0; 5833 len = 0; /* total length of j or a array to be sent */ 5834 if (nsends) { 5835 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5836 PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 5837 } 5838 for (i=0; i<nsends; i++) { 5839 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5840 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5841 for (j=0; j<nrows; j++) { 5842 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5843 for (l=0; l<sbs; l++) { 5844 PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 5845 5846 rowlen[j*sbs+l] = ncols; 5847 5848 len += ncols; 5849 PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5850 } 5851 k++; 5852 } 5853 PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5854 5855 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5856 } 5857 /* recvs and sends of i-array are completed */ 5858 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5859 PetscCall(PetscFree(svalues)); 5860 5861 /* allocate buffers for sending j and a arrays */ 5862 PetscCall(PetscMalloc1(len+1,&bufj)); 5863 PetscCall(PetscMalloc1(len+1,&bufa)); 5864 5865 /* create i-array of B_oth */ 5866 PetscCall(PetscMalloc1(aBn+2,&b_othi)); 5867 5868 b_othi[0] = 0; 5869 len = 0; /* total length of j or a array to be received */ 5870 k = 0; 5871 for (i=0; i<nrecvs; i++) { 5872 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5873 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5874 for (j=0; j<nrows; j++) { 5875 b_othi[k+1] = b_othi[k] + rowlen[j]; 5876 PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5877 k++; 5878 } 5879 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5880 } 5881 PetscCall(PetscFree(rvalues)); 5882 5883 /* allocate space for j and a arrays of B_oth */ 5884 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 5885 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5886 5887 /* j-array */ 5888 /*---------*/ 5889 /* post receives of j-array */ 5890 for (i=0; i<nrecvs; i++) { 5891 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5892 PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5893 } 5894 5895 /* pack the outgoing message j-array */ 5896 if (nsends) k = sstarts[0]; 5897 for (i=0; i<nsends; i++) { 5898 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5899 bufJ = bufj+sstartsj[i]; 5900 for (j=0; j<nrows; j++) { 5901 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5902 for (ll=0; ll<sbs; ll++) { 5903 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5904 for (l=0; l<ncols; l++) { 5905 *bufJ++ = cols[l]; 5906 } 5907 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5908 } 5909 } 5910 PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5911 } 5912 5913 /* recvs and sends of j-array are completed */ 5914 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5915 } else if (scall == MAT_REUSE_MATRIX) { 5916 sstartsj = *startsj_s; 5917 rstartsj = *startsj_r; 5918 bufa = *bufa_ptr; 5919 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5920 PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5921 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5922 5923 /* a-array */ 5924 /*---------*/ 5925 /* post receives of a-array */ 5926 for (i=0; i<nrecvs; i++) { 5927 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5928 PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 5929 } 5930 5931 /* pack the outgoing message a-array */ 5932 if (nsends) k = sstarts[0]; 5933 for (i=0; i<nsends; i++) { 5934 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5935 bufA = bufa+sstartsj[i]; 5936 for (j=0; j<nrows; j++) { 5937 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5938 for (ll=0; ll<sbs; ll++) { 5939 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5940 for (l=0; l<ncols; l++) { 5941 *bufA++ = vals[l]; 5942 } 5943 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5944 } 5945 } 5946 PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5947 } 5948 /* recvs and sends of a-array are completed */ 5949 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5950 PetscCall(PetscFree(reqs)); 5951 5952 if (scall == MAT_INITIAL_MATRIX) { 5953 /* put together the new matrix */ 5954 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5955 5956 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5957 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5958 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5959 b_oth->free_a = PETSC_TRUE; 5960 b_oth->free_ij = PETSC_TRUE; 5961 b_oth->nonew = 0; 5962 5963 PetscCall(PetscFree(bufj)); 5964 if (!startsj_s || !bufa_ptr) { 5965 PetscCall(PetscFree2(sstartsj,rstartsj)); 5966 PetscCall(PetscFree(bufa_ptr)); 5967 } else { 5968 *startsj_s = sstartsj; 5969 *startsj_r = rstartsj; 5970 *bufa_ptr = bufa; 5971 } 5972 } else if (scall == MAT_REUSE_MATRIX) { 5973 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5974 } 5975 5976 PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5977 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 5978 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5979 PetscFunctionReturn(0); 5980 } 5981 5982 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5983 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5984 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5985 #if defined(PETSC_HAVE_MKL_SPARSE) 5986 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5987 #endif 5988 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5989 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5990 #if defined(PETSC_HAVE_ELEMENTAL) 5991 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5992 #endif 5993 #if defined(PETSC_HAVE_SCALAPACK) 5994 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5995 #endif 5996 #if defined(PETSC_HAVE_HYPRE) 5997 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5998 #endif 5999 #if defined(PETSC_HAVE_CUDA) 6000 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 6001 #endif 6002 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6003 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 6004 #endif 6005 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 6006 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 6007 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6008 6009 /* 6010 Computes (B'*A')' since computing B*A directly is untenable 6011 6012 n p p 6013 [ ] [ ] [ ] 6014 m [ A ] * n [ B ] = m [ C ] 6015 [ ] [ ] [ ] 6016 6017 */ 6018 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 6019 { 6020 Mat At,Bt,Ct; 6021 6022 PetscFunctionBegin; 6023 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 6024 PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 6025 PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 6026 PetscCall(MatDestroy(&At)); 6027 PetscCall(MatDestroy(&Bt)); 6028 PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 6029 PetscCall(MatDestroy(&Ct)); 6030 PetscFunctionReturn(0); 6031 } 6032 6033 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 6034 { 6035 PetscBool cisdense; 6036 6037 PetscFunctionBegin; 6038 PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 6039 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 6040 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 6041 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 6042 if (!cisdense) { 6043 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 6044 } 6045 PetscCall(MatSetUp(C)); 6046 6047 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6048 PetscFunctionReturn(0); 6049 } 6050 6051 /* ----------------------------------------------------------------*/ 6052 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6053 { 6054 Mat_Product *product = C->product; 6055 Mat A = product->A,B=product->B; 6056 6057 PetscFunctionBegin; 6058 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6059 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6060 6061 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6062 C->ops->productsymbolic = MatProductSymbolic_AB; 6063 PetscFunctionReturn(0); 6064 } 6065 6066 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6067 { 6068 Mat_Product *product = C->product; 6069 6070 PetscFunctionBegin; 6071 if (product->type == MATPRODUCT_AB) { 6072 PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6073 } 6074 PetscFunctionReturn(0); 6075 } 6076 6077 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6078 6079 Input Parameters: 6080 6081 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6082 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6083 6084 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6085 6086 For Set1, j1[] contains column indices of the nonzeros. 6087 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6088 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6089 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6090 6091 Similar for Set2. 6092 6093 This routine merges the two sets of nonzeros row by row and removes repeats. 6094 6095 Output Parameters: (memory is allocated by the caller) 6096 6097 i[],j[]: the CSR of the merged matrix, which has m rows. 6098 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6099 imap2[]: similar to imap1[], but for Set2. 6100 Note we order nonzeros row-by-row and from left to right. 6101 */ 6102 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 6103 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 6104 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 6105 { 6106 PetscInt r,m; /* Row index of mat */ 6107 PetscCount t,t1,t2,b1,e1,b2,e2; 6108 6109 PetscFunctionBegin; 6110 PetscCall(MatGetLocalSize(mat,&m,NULL)); 6111 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6112 i[0] = 0; 6113 for (r=0; r<m; r++) { /* Do row by row merging */ 6114 b1 = rowBegin1[r]; 6115 e1 = rowEnd1[r]; 6116 b2 = rowBegin2[r]; 6117 e2 = rowEnd2[r]; 6118 while (b1 < e1 && b2 < e2) { 6119 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6120 j[t] = j1[b1]; 6121 imap1[t1] = t; 6122 imap2[t2] = t; 6123 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6124 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6125 t1++; t2++; t++; 6126 } else if (j1[b1] < j2[b2]) { 6127 j[t] = j1[b1]; 6128 imap1[t1] = t; 6129 b1 += jmap1[t1+1] - jmap1[t1]; 6130 t1++; t++; 6131 } else { 6132 j[t] = j2[b2]; 6133 imap2[t2] = t; 6134 b2 += jmap2[t2+1] - jmap2[t2]; 6135 t2++; t++; 6136 } 6137 } 6138 /* Merge the remaining in either j1[] or j2[] */ 6139 while (b1 < e1) { 6140 j[t] = j1[b1]; 6141 imap1[t1] = t; 6142 b1 += jmap1[t1+1] - jmap1[t1]; 6143 t1++; t++; 6144 } 6145 while (b2 < e2) { 6146 j[t] = j2[b2]; 6147 imap2[t2] = t; 6148 b2 += jmap2[t2+1] - jmap2[t2]; 6149 t2++; t++; 6150 } 6151 i[r+1] = t; 6152 } 6153 PetscFunctionReturn(0); 6154 } 6155 6156 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6157 6158 Input Parameters: 6159 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6160 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6161 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6162 6163 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6164 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6165 6166 Output Parameters: 6167 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6168 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6169 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6170 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6171 6172 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6173 Atot: number of entries belonging to the diagonal block. 6174 Annz: number of unique nonzeros belonging to the diagonal block. 6175 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6176 repeats (i.e., same 'i,j' pair). 6177 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6178 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6179 6180 Atot: number of entries belonging to the diagonal block 6181 Annz: number of unique nonzeros belonging to the diagonal block. 6182 6183 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6184 6185 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6186 */ 6187 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6188 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6189 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6190 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6191 { 6192 PetscInt cstart,cend,rstart,rend,row,col; 6193 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6194 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6195 PetscCount k,m,p,q,r,s,mid; 6196 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6197 6198 PetscFunctionBegin; 6199 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6200 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6201 m = rend - rstart; 6202 6203 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6204 6205 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6206 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6207 */ 6208 while (k<n) { 6209 row = i[k]; 6210 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6211 for (s=k; s<n; s++) if (i[s] != row) break; 6212 for (p=k; p<s; p++) { 6213 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6214 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6215 } 6216 PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6217 PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6218 rowBegin[row-rstart] = k; 6219 rowMid[row-rstart] = mid; 6220 rowEnd[row-rstart] = s; 6221 6222 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6223 Atot += mid - k; 6224 Btot += s - mid; 6225 6226 /* Count unique nonzeros of this diag/offdiag row */ 6227 for (p=k; p<mid;) { 6228 col = j[p]; 6229 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6230 Annz++; 6231 } 6232 6233 for (p=mid; p<s;) { 6234 col = j[p]; 6235 do {p++;} while (p<s && j[p] == col); 6236 Bnnz++; 6237 } 6238 k = s; 6239 } 6240 6241 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6242 PetscCall(PetscMalloc1(Atot,&Aperm)); 6243 PetscCall(PetscMalloc1(Btot,&Bperm)); 6244 PetscCall(PetscMalloc1(Annz+1,&Ajmap)); 6245 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap)); 6246 6247 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6248 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6249 for (r=0; r<m; r++) { 6250 k = rowBegin[r]; 6251 mid = rowMid[r]; 6252 s = rowEnd[r]; 6253 PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 6254 PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6255 Atot += mid - k; 6256 Btot += s - mid; 6257 6258 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6259 for (p=k; p<mid;) { 6260 col = j[p]; 6261 q = p; 6262 do {p++;} while (p<mid && j[p] == col); 6263 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6264 Annz++; 6265 } 6266 6267 for (p=mid; p<s;) { 6268 col = j[p]; 6269 q = p; 6270 do {p++;} while (p<s && j[p] == col); 6271 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6272 Bnnz++; 6273 } 6274 } 6275 /* Output */ 6276 *Aperm_ = Aperm; 6277 *Annz_ = Annz; 6278 *Atot_ = Atot; 6279 *Ajmap_ = Ajmap; 6280 *Bperm_ = Bperm; 6281 *Bnnz_ = Bnnz; 6282 *Btot_ = Btot; 6283 *Bjmap_ = Bjmap; 6284 PetscFunctionReturn(0); 6285 } 6286 6287 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6288 6289 Input Parameters: 6290 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6291 nnz: number of unique nonzeros in the merged matrix 6292 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6293 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6294 6295 Output Parameter: (memory is allocated by the caller) 6296 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6297 6298 Example: 6299 nnz1 = 4 6300 nnz = 6 6301 imap = [1,3,4,5] 6302 jmap = [0,3,5,6,7] 6303 then, 6304 jmap_new = [0,0,3,3,5,6,7] 6305 */ 6306 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[]) 6307 { 6308 PetscCount k,p; 6309 6310 PetscFunctionBegin; 6311 jmap_new[0] = 0; 6312 p = nnz; /* p loops over jmap_new[] backwards */ 6313 for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */ 6314 for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1]; 6315 } 6316 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6317 PetscFunctionReturn(0); 6318 } 6319 6320 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6321 { 6322 MPI_Comm comm; 6323 PetscMPIInt rank,size; 6324 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6325 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6326 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6327 6328 PetscFunctionBegin; 6329 PetscCall(PetscFree(mpiaij->garray)); 6330 PetscCall(VecDestroy(&mpiaij->lvec)); 6331 #if defined(PETSC_USE_CTABLE) 6332 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6333 #else 6334 PetscCall(PetscFree(mpiaij->colmap)); 6335 #endif 6336 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6337 mat->assembled = PETSC_FALSE; 6338 mat->was_assembled = PETSC_FALSE; 6339 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6340 6341 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 6342 PetscCallMPI(MPI_Comm_size(comm,&size)); 6343 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 6344 PetscCall(PetscLayoutSetUp(mat->rmap)); 6345 PetscCall(PetscLayoutSetUp(mat->cmap)); 6346 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6347 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6348 PetscCall(MatGetLocalSize(mat,&m,&n)); 6349 PetscCall(MatGetSize(mat,&M,&N)); 6350 6351 /* ---------------------------------------------------------------------------*/ 6352 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6353 /* entries come first, then local rows, then remote rows. */ 6354 /* ---------------------------------------------------------------------------*/ 6355 PetscCount n1 = coo_n,*perm1; 6356 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6357 PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1)); 6358 PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */ 6359 PetscCall(PetscArraycpy(j1,coo_j,n1)); 6360 for (k=0; k<n1; k++) perm1[k] = k; 6361 6362 /* Manipulate indices so that entries with negative row or col indices will have smallest 6363 row indices, local entries will have greater but negative row indices, and remote entries 6364 will have positive row indices. 6365 */ 6366 for (k=0; k<n1; k++) { 6367 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6368 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6369 else { 6370 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6371 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6372 } 6373 } 6374 6375 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6376 PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6377 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6378 PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6379 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6380 6381 /* ---------------------------------------------------------------------------*/ 6382 /* Split local rows into diag/offdiag portions */ 6383 /* ---------------------------------------------------------------------------*/ 6384 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6385 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6386 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6387 6388 PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 6389 PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 6390 PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6391 6392 /* ---------------------------------------------------------------------------*/ 6393 /* Send remote rows to their owner */ 6394 /* ---------------------------------------------------------------------------*/ 6395 /* Find which rows should be sent to which remote ranks*/ 6396 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6397 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6398 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6399 const PetscInt *ranges; 6400 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6401 6402 PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 6403 PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6404 for (k=rem; k<n1;) { 6405 PetscMPIInt owner; 6406 PetscInt firstRow,lastRow; 6407 6408 /* Locate a row range */ 6409 firstRow = i1[k]; /* first row of this owner */ 6410 PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6411 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6412 6413 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6414 PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6415 6416 /* All entries in [k,p) belong to this remote owner */ 6417 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6418 PetscMPIInt *sendto2; 6419 PetscInt *nentries2; 6420 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6421 6422 PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 6423 PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 6424 PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 6425 PetscCall(PetscFree2(sendto,nentries2)); 6426 sendto = sendto2; 6427 nentries = nentries2; 6428 maxNsend = maxNsend2; 6429 } 6430 sendto[nsend] = owner; 6431 nentries[nsend] = p - k; 6432 PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6433 nsend++; 6434 k = p; 6435 } 6436 6437 /* Build 1st SF to know offsets on remote to send data */ 6438 PetscSF sf1; 6439 PetscInt nroots = 1,nroots2 = 0; 6440 PetscInt nleaves = nsend,nleaves2 = 0; 6441 PetscInt *offsets; 6442 PetscSFNode *iremote; 6443 6444 PetscCall(PetscSFCreate(comm,&sf1)); 6445 PetscCall(PetscMalloc1(nsend,&iremote)); 6446 PetscCall(PetscMalloc1(nsend,&offsets)); 6447 for (k=0; k<nsend; k++) { 6448 iremote[k].rank = sendto[k]; 6449 iremote[k].index = 0; 6450 nleaves2 += nentries[k]; 6451 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6452 } 6453 PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6454 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 6455 PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6456 PetscCall(PetscSFDestroy(&sf1)); 6457 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem); 6458 6459 /* Build 2nd SF to send remote COOs to their owner */ 6460 PetscSF sf2; 6461 nroots = nroots2; 6462 nleaves = nleaves2; 6463 PetscCall(PetscSFCreate(comm,&sf2)); 6464 PetscCall(PetscSFSetFromOptions(sf2)); 6465 PetscCall(PetscMalloc1(nleaves,&iremote)); 6466 p = 0; 6467 for (k=0; k<nsend; k++) { 6468 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6469 for (q=0; q<nentries[k]; q++,p++) { 6470 iremote[p].rank = sendto[k]; 6471 iremote[p].index = offsets[k] + q; 6472 } 6473 } 6474 PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6475 6476 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6477 PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6478 6479 /* Send the remote COOs to their owner */ 6480 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6481 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6482 PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 6483 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 6484 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 6485 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 6486 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6487 6488 PetscCall(PetscFree(offsets)); 6489 PetscCall(PetscFree2(sendto,nentries)); 6490 6491 /* ---------------------------------------------------------------*/ 6492 /* Sort received COOs by row along with the permutation array */ 6493 /* ---------------------------------------------------------------*/ 6494 for (k=0; k<n2; k++) perm2[k] = k; 6495 PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6496 6497 /* ---------------------------------------------------------------*/ 6498 /* Split received COOs into diag/offdiag portions */ 6499 /* ---------------------------------------------------------------*/ 6500 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6501 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6502 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6503 6504 PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 6505 PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6506 6507 /* --------------------------------------------------------------------------*/ 6508 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6509 /* --------------------------------------------------------------------------*/ 6510 PetscInt *Ai,*Bi; 6511 PetscInt *Aj,*Bj; 6512 6513 PetscCall(PetscMalloc1(m+1,&Ai)); 6514 PetscCall(PetscMalloc1(m+1,&Bi)); 6515 PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6516 PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6517 6518 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6519 PetscCall(PetscMalloc1(Annz1,&Aimap1)); 6520 PetscCall(PetscMalloc1(Bnnz1,&Bimap1)); 6521 PetscCall(PetscMalloc1(Annz2,&Aimap2)); 6522 PetscCall(PetscMalloc1(Bnnz2,&Bimap2)); 6523 6524 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 6525 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6526 6527 /* --------------------------------------------------------------------------*/ 6528 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6529 /* expect nonzeros in A/B most likely have local contributing entries */ 6530 /* --------------------------------------------------------------------------*/ 6531 PetscInt Annz = Ai[m]; 6532 PetscInt Bnnz = Bi[m]; 6533 PetscCount *Ajmap1_new,*Bjmap1_new; 6534 6535 PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new)); 6536 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new)); 6537 6538 PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new)); 6539 PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new)); 6540 6541 PetscCall(PetscFree(Aimap1)); 6542 PetscCall(PetscFree(Ajmap1)); 6543 PetscCall(PetscFree(Bimap1)); 6544 PetscCall(PetscFree(Bjmap1)); 6545 PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 6546 PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 6547 PetscCall(PetscFree3(i1,j1,perm1)); 6548 PetscCall(PetscFree3(i2,j2,perm2)); 6549 6550 Ajmap1 = Ajmap1_new; 6551 Bjmap1 = Bjmap1_new; 6552 6553 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6554 if (Annz < Annz1 + Annz2) { 6555 PetscInt *Aj_new; 6556 PetscCall(PetscMalloc1(Annz,&Aj_new)); 6557 PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 6558 PetscCall(PetscFree(Aj)); 6559 Aj = Aj_new; 6560 } 6561 6562 if (Bnnz < Bnnz1 + Bnnz2) { 6563 PetscInt *Bj_new; 6564 PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 6565 PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 6566 PetscCall(PetscFree(Bj)); 6567 Bj = Bj_new; 6568 } 6569 6570 /* --------------------------------------------------------------------------------*/ 6571 /* Create new submatrices for on-process and off-process coupling */ 6572 /* --------------------------------------------------------------------------------*/ 6573 PetscScalar *Aa,*Ba; 6574 MatType rtype; 6575 Mat_SeqAIJ *a,*b; 6576 PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 6577 PetscCall(PetscCalloc1(Bnnz,&Ba)); 6578 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6579 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6580 PetscCall(MatDestroy(&mpiaij->A)); 6581 PetscCall(MatDestroy(&mpiaij->B)); 6582 PetscCall(MatGetRootType_Private(mat,&rtype)); 6583 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 6584 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 6585 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6586 6587 a = (Mat_SeqAIJ*)mpiaij->A->data; 6588 b = (Mat_SeqAIJ*)mpiaij->B->data; 6589 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6590 a->free_a = b->free_a = PETSC_TRUE; 6591 a->free_ij = b->free_ij = PETSC_TRUE; 6592 6593 /* conversion must happen AFTER multiply setup */ 6594 PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 6595 PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 6596 PetscCall(VecDestroy(&mpiaij->lvec)); 6597 PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 6598 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6599 6600 mpiaij->coo_n = coo_n; 6601 mpiaij->coo_sf = sf2; 6602 mpiaij->sendlen = nleaves; 6603 mpiaij->recvlen = nroots; 6604 6605 mpiaij->Annz = Annz; 6606 mpiaij->Bnnz = Bnnz; 6607 6608 mpiaij->Annz2 = Annz2; 6609 mpiaij->Bnnz2 = Bnnz2; 6610 6611 mpiaij->Atot1 = Atot1; 6612 mpiaij->Atot2 = Atot2; 6613 mpiaij->Btot1 = Btot1; 6614 mpiaij->Btot2 = Btot2; 6615 6616 mpiaij->Ajmap1 = Ajmap1; 6617 mpiaij->Aperm1 = Aperm1; 6618 6619 mpiaij->Bjmap1 = Bjmap1; 6620 mpiaij->Bperm1 = Bperm1; 6621 6622 mpiaij->Aimap2 = Aimap2; 6623 mpiaij->Ajmap2 = Ajmap2; 6624 mpiaij->Aperm2 = Aperm2; 6625 6626 mpiaij->Bimap2 = Bimap2; 6627 mpiaij->Bjmap2 = Bjmap2; 6628 mpiaij->Bperm2 = Bperm2; 6629 6630 mpiaij->Cperm1 = Cperm1; 6631 6632 /* Allocate in preallocation. If not used, it has zero cost on host */ 6633 PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6634 PetscFunctionReturn(0); 6635 } 6636 6637 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6638 { 6639 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6640 Mat A = mpiaij->A,B = mpiaij->B; 6641 PetscCount Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2; 6642 PetscScalar *Aa,*Ba; 6643 PetscScalar *sendbuf = mpiaij->sendbuf; 6644 PetscScalar *recvbuf = mpiaij->recvbuf; 6645 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2; 6646 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2; 6647 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6648 const PetscCount *Cperm1 = mpiaij->Cperm1; 6649 6650 PetscFunctionBegin; 6651 PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 6652 PetscCall(MatSeqAIJGetArray(B,&Ba)); 6653 6654 /* Pack entries to be sent to remote */ 6655 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6656 6657 /* Send remote entries to their owner and overlap the communication with local computation */ 6658 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6659 /* Add local entries to A and B */ 6660 for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6661 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6662 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]]; 6663 Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum; 6664 } 6665 for (PetscCount i=0; i<Bnnz; i++) { 6666 PetscScalar sum = 0.0; 6667 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]]; 6668 Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum; 6669 } 6670 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6671 6672 /* Add received remote entries to A and B */ 6673 for (PetscCount i=0; i<Annz2; i++) { 6674 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6675 } 6676 for (PetscCount i=0; i<Bnnz2; i++) { 6677 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6678 } 6679 PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 6680 PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6681 PetscFunctionReturn(0); 6682 } 6683 6684 /* ----------------------------------------------------------------*/ 6685 6686 /*MC 6687 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6688 6689 Options Database Keys: 6690 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6691 6692 Level: beginner 6693 6694 Notes: 6695 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6696 in this case the values associated with the rows and columns one passes in are set to zero 6697 in the matrix 6698 6699 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6700 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6701 6702 .seealso: `MatCreateAIJ()` 6703 M*/ 6704 6705 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6706 { 6707 Mat_MPIAIJ *b; 6708 PetscMPIInt size; 6709 6710 PetscFunctionBegin; 6711 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 6712 6713 PetscCall(PetscNewLog(B,&b)); 6714 B->data = (void*)b; 6715 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6716 B->assembled = PETSC_FALSE; 6717 B->insertmode = NOT_SET_VALUES; 6718 b->size = size; 6719 6720 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6721 6722 /* build cache for off array entries formed */ 6723 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 6724 6725 b->donotstash = PETSC_FALSE; 6726 b->colmap = NULL; 6727 b->garray = NULL; 6728 b->roworiented = PETSC_TRUE; 6729 6730 /* stuff used for matrix vector multiply */ 6731 b->lvec = NULL; 6732 b->Mvctx = NULL; 6733 6734 /* stuff for MatGetRow() */ 6735 b->rowindices = NULL; 6736 b->rowvalues = NULL; 6737 b->getrowactive = PETSC_FALSE; 6738 6739 /* flexible pointer used in CUSPARSE classes */ 6740 b->spptr = NULL; 6741 6742 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6743 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 6744 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 6745 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 6746 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 6747 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 6748 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6749 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 6750 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 6751 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 6752 #if defined(PETSC_HAVE_CUDA) 6753 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6754 #endif 6755 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6756 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 6757 #endif 6758 #if defined(PETSC_HAVE_MKL_SPARSE) 6759 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6760 #endif 6761 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 6762 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 6763 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 6764 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 6765 #if defined(PETSC_HAVE_ELEMENTAL) 6766 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 6767 #endif 6768 #if defined(PETSC_HAVE_SCALAPACK) 6769 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6770 #endif 6771 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 6772 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 6773 #if defined(PETSC_HAVE_HYPRE) 6774 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 6775 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6776 #endif 6777 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 6778 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 6779 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 6780 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 6781 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6782 PetscFunctionReturn(0); 6783 } 6784 6785 /*@C 6786 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6787 and "off-diagonal" part of the matrix in CSR format. 6788 6789 Collective 6790 6791 Input Parameters: 6792 + comm - MPI communicator 6793 . m - number of local rows (Cannot be PETSC_DECIDE) 6794 . n - This value should be the same as the local size used in creating the 6795 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6796 calculated if N is given) For square matrices n is almost always m. 6797 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6798 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6799 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6800 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6801 . a - matrix values 6802 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6803 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6804 - oa - matrix values 6805 6806 Output Parameter: 6807 . mat - the matrix 6808 6809 Level: advanced 6810 6811 Notes: 6812 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6813 must free the arrays once the matrix has been destroyed and not before. 6814 6815 The i and j indices are 0 based 6816 6817 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6818 6819 This sets local rows and cannot be used to set off-processor values. 6820 6821 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6822 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6823 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6824 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6825 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6826 communication if it is known that only local entries will be set. 6827 6828 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6829 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6830 @*/ 6831 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6832 { 6833 Mat_MPIAIJ *maij; 6834 6835 PetscFunctionBegin; 6836 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6837 PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6838 PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6839 PetscCall(MatCreate(comm,mat)); 6840 PetscCall(MatSetSizes(*mat,m,n,M,N)); 6841 PetscCall(MatSetType(*mat,MATMPIAIJ)); 6842 maij = (Mat_MPIAIJ*) (*mat)->data; 6843 6844 (*mat)->preallocated = PETSC_TRUE; 6845 6846 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6847 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6848 6849 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 6850 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 6851 6852 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 6853 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 6854 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 6855 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 6856 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 6857 PetscFunctionReturn(0); 6858 } 6859 6860 typedef struct { 6861 Mat *mp; /* intermediate products */ 6862 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6863 PetscInt cp; /* number of intermediate products */ 6864 6865 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6866 PetscInt *startsj_s,*startsj_r; 6867 PetscScalar *bufa; 6868 Mat P_oth; 6869 6870 /* may take advantage of merging product->B */ 6871 Mat Bloc; /* B-local by merging diag and off-diag */ 6872 6873 /* cusparse does not have support to split between symbolic and numeric phases. 6874 When api_user is true, we don't need to update the numerical values 6875 of the temporary storage */ 6876 PetscBool reusesym; 6877 6878 /* support for COO values insertion */ 6879 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6880 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6881 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6882 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6883 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6884 PetscMemType mtype; 6885 6886 /* customization */ 6887 PetscBool abmerge; 6888 PetscBool P_oth_bind; 6889 } MatMatMPIAIJBACKEND; 6890 6891 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6892 { 6893 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6894 PetscInt i; 6895 6896 PetscFunctionBegin; 6897 PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 6898 PetscCall(PetscFree(mmdata->bufa)); 6899 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 6900 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 6901 PetscCall(MatDestroy(&mmdata->P_oth)); 6902 PetscCall(MatDestroy(&mmdata->Bloc)); 6903 PetscCall(PetscSFDestroy(&mmdata->sf)); 6904 for (i = 0; i < mmdata->cp; i++) { 6905 PetscCall(MatDestroy(&mmdata->mp[i])); 6906 } 6907 PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 6908 PetscCall(PetscFree(mmdata->own[0])); 6909 PetscCall(PetscFree(mmdata->own)); 6910 PetscCall(PetscFree(mmdata->off[0])); 6911 PetscCall(PetscFree(mmdata->off)); 6912 PetscCall(PetscFree(mmdata)); 6913 PetscFunctionReturn(0); 6914 } 6915 6916 /* Copy selected n entries with indices in idx[] of A to v[]. 6917 If idx is NULL, copy the whole data array of A to v[] 6918 */ 6919 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6920 { 6921 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6922 6923 PetscFunctionBegin; 6924 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6925 if (f) { 6926 PetscCall((*f)(A,n,idx,v)); 6927 } else { 6928 const PetscScalar *vv; 6929 6930 PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6931 if (n && idx) { 6932 PetscScalar *w = v; 6933 const PetscInt *oi = idx; 6934 PetscInt j; 6935 6936 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6937 } else { 6938 PetscCall(PetscArraycpy(v,vv,n)); 6939 } 6940 PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6941 } 6942 PetscFunctionReturn(0); 6943 } 6944 6945 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6946 { 6947 MatMatMPIAIJBACKEND *mmdata; 6948 PetscInt i,n_d,n_o; 6949 6950 PetscFunctionBegin; 6951 MatCheckProduct(C,1); 6952 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6953 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6954 if (!mmdata->reusesym) { /* update temporary matrices */ 6955 if (mmdata->P_oth) { 6956 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6957 } 6958 if (mmdata->Bloc) { 6959 PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 6960 } 6961 } 6962 mmdata->reusesym = PETSC_FALSE; 6963 6964 for (i = 0; i < mmdata->cp; i++) { 6965 PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6966 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6967 } 6968 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6969 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6970 6971 if (mmdata->mptmp[i]) continue; 6972 if (noff) { 6973 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6974 6975 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 6976 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 6977 n_o += noff; 6978 n_d += nown; 6979 } else { 6980 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6981 6982 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 6983 n_d += mm->nz; 6984 } 6985 } 6986 if (mmdata->hasoffproc) { /* offprocess insertion */ 6987 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6988 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6989 } 6990 PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 6991 PetscFunctionReturn(0); 6992 } 6993 6994 /* Support for Pt * A, A * P, or Pt * A * P */ 6995 #define MAX_NUMBER_INTERMEDIATE 4 6996 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6997 { 6998 Mat_Product *product = C->product; 6999 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7000 Mat_MPIAIJ *a,*p; 7001 MatMatMPIAIJBACKEND *mmdata; 7002 ISLocalToGlobalMapping P_oth_l2g = NULL; 7003 IS glob = NULL; 7004 const char *prefix; 7005 char pprefix[256]; 7006 const PetscInt *globidx,*P_oth_idx; 7007 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 7008 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 7009 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7010 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7011 /* a base offset; type-2: sparse with a local to global map table */ 7012 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7013 7014 MatProductType ptype; 7015 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 7016 PetscMPIInt size; 7017 7018 PetscFunctionBegin; 7019 MatCheckProduct(C,1); 7020 PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 7021 ptype = product->type; 7022 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 7023 ptype = MATPRODUCT_AB; 7024 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7025 } 7026 switch (ptype) { 7027 case MATPRODUCT_AB: 7028 A = product->A; 7029 P = product->B; 7030 m = A->rmap->n; 7031 n = P->cmap->n; 7032 M = A->rmap->N; 7033 N = P->cmap->N; 7034 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7035 break; 7036 case MATPRODUCT_AtB: 7037 P = product->A; 7038 A = product->B; 7039 m = P->cmap->n; 7040 n = A->cmap->n; 7041 M = P->cmap->N; 7042 N = A->cmap->N; 7043 hasoffproc = PETSC_TRUE; 7044 break; 7045 case MATPRODUCT_PtAP: 7046 A = product->A; 7047 P = product->B; 7048 m = P->cmap->n; 7049 n = P->cmap->n; 7050 M = P->cmap->N; 7051 N = P->cmap->N; 7052 hasoffproc = PETSC_TRUE; 7053 break; 7054 default: 7055 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7056 } 7057 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 7058 if (size == 1) hasoffproc = PETSC_FALSE; 7059 7060 /* defaults */ 7061 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 7062 mp[i] = NULL; 7063 mptmp[i] = PETSC_FALSE; 7064 rmapt[i] = -1; 7065 cmapt[i] = -1; 7066 rmapa[i] = NULL; 7067 cmapa[i] = NULL; 7068 } 7069 7070 /* customization */ 7071 PetscCall(PetscNew(&mmdata)); 7072 mmdata->reusesym = product->api_user; 7073 if (ptype == MATPRODUCT_AB) { 7074 if (product->api_user) { 7075 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 7076 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 7077 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7078 PetscOptionsEnd(); 7079 } else { 7080 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 7081 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 7082 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7083 PetscOptionsEnd(); 7084 } 7085 } else if (ptype == MATPRODUCT_PtAP) { 7086 if (product->api_user) { 7087 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 7088 PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7089 PetscOptionsEnd(); 7090 } else { 7091 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 7092 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7093 PetscOptionsEnd(); 7094 } 7095 } 7096 a = (Mat_MPIAIJ*)A->data; 7097 p = (Mat_MPIAIJ*)P->data; 7098 PetscCall(MatSetSizes(C,m,n,M,N)); 7099 PetscCall(PetscLayoutSetUp(C->rmap)); 7100 PetscCall(PetscLayoutSetUp(C->cmap)); 7101 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 7102 PetscCall(MatGetOptionsPrefix(C,&prefix)); 7103 7104 cp = 0; 7105 switch (ptype) { 7106 case MATPRODUCT_AB: /* A * P */ 7107 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7108 7109 /* A_diag * P_local (merged or not) */ 7110 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7111 /* P is product->B */ 7112 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7113 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7114 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7115 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7116 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7117 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7118 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7119 mp[cp]->product->api_user = product->api_user; 7120 PetscCall(MatProductSetFromOptions(mp[cp])); 7121 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7122 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7123 PetscCall(ISGetIndices(glob,&globidx)); 7124 rmapt[cp] = 1; 7125 cmapt[cp] = 2; 7126 cmapa[cp] = globidx; 7127 mptmp[cp] = PETSC_FALSE; 7128 cp++; 7129 } else { /* A_diag * P_diag and A_diag * P_off */ 7130 PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 7131 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7132 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7133 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7134 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7135 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7136 mp[cp]->product->api_user = product->api_user; 7137 PetscCall(MatProductSetFromOptions(mp[cp])); 7138 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7139 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7140 rmapt[cp] = 1; 7141 cmapt[cp] = 1; 7142 mptmp[cp] = PETSC_FALSE; 7143 cp++; 7144 PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 7145 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7146 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7147 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7148 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7149 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7150 mp[cp]->product->api_user = product->api_user; 7151 PetscCall(MatProductSetFromOptions(mp[cp])); 7152 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7153 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7154 rmapt[cp] = 1; 7155 cmapt[cp] = 2; 7156 cmapa[cp] = p->garray; 7157 mptmp[cp] = PETSC_FALSE; 7158 cp++; 7159 } 7160 7161 /* A_off * P_other */ 7162 if (mmdata->P_oth) { 7163 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 7164 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7165 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7166 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7167 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7168 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7169 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7170 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7171 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7172 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7173 mp[cp]->product->api_user = product->api_user; 7174 PetscCall(MatProductSetFromOptions(mp[cp])); 7175 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7176 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7177 rmapt[cp] = 1; 7178 cmapt[cp] = 2; 7179 cmapa[cp] = P_oth_idx; 7180 mptmp[cp] = PETSC_FALSE; 7181 cp++; 7182 } 7183 break; 7184 7185 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7186 /* A is product->B */ 7187 PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7188 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7189 PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 7190 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7191 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7192 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7193 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7194 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7195 mp[cp]->product->api_user = product->api_user; 7196 PetscCall(MatProductSetFromOptions(mp[cp])); 7197 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7198 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7199 PetscCall(ISGetIndices(glob,&globidx)); 7200 rmapt[cp] = 2; 7201 rmapa[cp] = globidx; 7202 cmapt[cp] = 2; 7203 cmapa[cp] = globidx; 7204 mptmp[cp] = PETSC_FALSE; 7205 cp++; 7206 } else { 7207 PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 7208 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7209 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7210 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7211 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7212 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7213 mp[cp]->product->api_user = product->api_user; 7214 PetscCall(MatProductSetFromOptions(mp[cp])); 7215 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7216 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7217 PetscCall(ISGetIndices(glob,&globidx)); 7218 rmapt[cp] = 1; 7219 cmapt[cp] = 2; 7220 cmapa[cp] = globidx; 7221 mptmp[cp] = PETSC_FALSE; 7222 cp++; 7223 PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 7224 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7225 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7226 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7227 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7228 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7229 mp[cp]->product->api_user = product->api_user; 7230 PetscCall(MatProductSetFromOptions(mp[cp])); 7231 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7232 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7233 rmapt[cp] = 2; 7234 rmapa[cp] = p->garray; 7235 cmapt[cp] = 2; 7236 cmapa[cp] = globidx; 7237 mptmp[cp] = PETSC_FALSE; 7238 cp++; 7239 } 7240 break; 7241 case MATPRODUCT_PtAP: 7242 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7243 /* P is product->B */ 7244 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7245 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7246 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 7247 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7248 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7249 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7250 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7251 mp[cp]->product->api_user = product->api_user; 7252 PetscCall(MatProductSetFromOptions(mp[cp])); 7253 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7254 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7255 PetscCall(ISGetIndices(glob,&globidx)); 7256 rmapt[cp] = 2; 7257 rmapa[cp] = globidx; 7258 cmapt[cp] = 2; 7259 cmapa[cp] = globidx; 7260 mptmp[cp] = PETSC_FALSE; 7261 cp++; 7262 if (mmdata->P_oth) { 7263 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 7264 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7265 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7266 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7267 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7268 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7269 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7270 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7271 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7272 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7273 mp[cp]->product->api_user = product->api_user; 7274 PetscCall(MatProductSetFromOptions(mp[cp])); 7275 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7276 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7277 mptmp[cp] = PETSC_TRUE; 7278 cp++; 7279 PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 7280 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7281 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7282 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7283 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7284 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7285 mp[cp]->product->api_user = product->api_user; 7286 PetscCall(MatProductSetFromOptions(mp[cp])); 7287 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7288 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7289 rmapt[cp] = 2; 7290 rmapa[cp] = globidx; 7291 cmapt[cp] = 2; 7292 cmapa[cp] = P_oth_idx; 7293 mptmp[cp] = PETSC_FALSE; 7294 cp++; 7295 } 7296 break; 7297 default: 7298 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7299 } 7300 /* sanity check */ 7301 if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7302 7303 PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7304 for (i = 0; i < cp; i++) { 7305 mmdata->mp[i] = mp[i]; 7306 mmdata->mptmp[i] = mptmp[i]; 7307 } 7308 mmdata->cp = cp; 7309 C->product->data = mmdata; 7310 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7311 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7312 7313 /* memory type */ 7314 mmdata->mtype = PETSC_MEMTYPE_HOST; 7315 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 7316 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7317 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7318 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7319 7320 /* prepare coo coordinates for values insertion */ 7321 7322 /* count total nonzeros of those intermediate seqaij Mats 7323 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7324 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7325 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7326 */ 7327 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7328 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7329 if (mptmp[cp]) continue; 7330 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7331 const PetscInt *rmap = rmapa[cp]; 7332 const PetscInt mr = mp[cp]->rmap->n; 7333 const PetscInt rs = C->rmap->rstart; 7334 const PetscInt re = C->rmap->rend; 7335 const PetscInt *ii = mm->i; 7336 for (i = 0; i < mr; i++) { 7337 const PetscInt gr = rmap[i]; 7338 const PetscInt nz = ii[i+1] - ii[i]; 7339 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7340 else ncoo_oown += nz; /* this row is local */ 7341 } 7342 } else ncoo_d += mm->nz; 7343 } 7344 7345 /* 7346 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7347 7348 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7349 7350 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7351 7352 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7353 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7354 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7355 7356 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7357 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7358 */ 7359 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 7360 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7361 7362 /* gather (i,j) of nonzeros inserted by remote procs */ 7363 if (hasoffproc) { 7364 PetscSF msf; 7365 PetscInt ncoo2,*coo_i2,*coo_j2; 7366 7367 PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 7368 PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 7369 PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7370 7371 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7372 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7373 PetscInt *idxoff = mmdata->off[cp]; 7374 PetscInt *idxown = mmdata->own[cp]; 7375 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7376 const PetscInt *rmap = rmapa[cp]; 7377 const PetscInt *cmap = cmapa[cp]; 7378 const PetscInt *ii = mm->i; 7379 PetscInt *coi = coo_i + ncoo_o; 7380 PetscInt *coj = coo_j + ncoo_o; 7381 const PetscInt mr = mp[cp]->rmap->n; 7382 const PetscInt rs = C->rmap->rstart; 7383 const PetscInt re = C->rmap->rend; 7384 const PetscInt cs = C->cmap->rstart; 7385 for (i = 0; i < mr; i++) { 7386 const PetscInt *jj = mm->j + ii[i]; 7387 const PetscInt gr = rmap[i]; 7388 const PetscInt nz = ii[i+1] - ii[i]; 7389 if (gr < rs || gr >= re) { /* this is an offproc row */ 7390 for (j = ii[i]; j < ii[i+1]; j++) { 7391 *coi++ = gr; 7392 *idxoff++ = j; 7393 } 7394 if (!cmapt[cp]) { /* already global */ 7395 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7396 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7397 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7398 } else { /* offdiag */ 7399 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7400 } 7401 ncoo_o += nz; 7402 } else { /* this is a local row */ 7403 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7404 } 7405 } 7406 } 7407 mmdata->off[cp + 1] = idxoff; 7408 mmdata->own[cp + 1] = idxown; 7409 } 7410 7411 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7412 PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 7413 PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 7414 PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 7415 ncoo = ncoo_d + ncoo_oown + ncoo2; 7416 PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 7417 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7418 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 7419 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7420 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7421 PetscCall(PetscFree2(coo_i,coo_j)); 7422 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7423 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 7424 coo_i = coo_i2; 7425 coo_j = coo_j2; 7426 } else { /* no offproc values insertion */ 7427 ncoo = ncoo_d; 7428 PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7429 7430 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7431 PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 7432 PetscCall(PetscSFSetUp(mmdata->sf)); 7433 } 7434 mmdata->hasoffproc = hasoffproc; 7435 7436 /* gather (i,j) of nonzeros inserted locally */ 7437 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7438 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7439 PetscInt *coi = coo_i + ncoo_d; 7440 PetscInt *coj = coo_j + ncoo_d; 7441 const PetscInt *jj = mm->j; 7442 const PetscInt *ii = mm->i; 7443 const PetscInt *cmap = cmapa[cp]; 7444 const PetscInt *rmap = rmapa[cp]; 7445 const PetscInt mr = mp[cp]->rmap->n; 7446 const PetscInt rs = C->rmap->rstart; 7447 const PetscInt re = C->rmap->rend; 7448 const PetscInt cs = C->cmap->rstart; 7449 7450 if (mptmp[cp]) continue; 7451 if (rmapt[cp] == 1) { /* consecutive rows */ 7452 /* fill coo_i */ 7453 for (i = 0; i < mr; i++) { 7454 const PetscInt gr = i + rs; 7455 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7456 } 7457 /* fill coo_j */ 7458 if (!cmapt[cp]) { /* type-0, already global */ 7459 PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7460 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7461 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7462 } else { /* type-2, local to global for sparse columns */ 7463 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7464 } 7465 ncoo_d += mm->nz; 7466 } else if (rmapt[cp] == 2) { /* sparse rows */ 7467 for (i = 0; i < mr; i++) { 7468 const PetscInt *jj = mm->j + ii[i]; 7469 const PetscInt gr = rmap[i]; 7470 const PetscInt nz = ii[i+1] - ii[i]; 7471 if (gr >= rs && gr < re) { /* local rows */ 7472 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7473 if (!cmapt[cp]) { /* type-0, already global */ 7474 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7475 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7476 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7477 } else { /* type-2, local to global for sparse columns */ 7478 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7479 } 7480 ncoo_d += nz; 7481 } 7482 } 7483 } 7484 } 7485 if (glob) { 7486 PetscCall(ISRestoreIndices(glob,&globidx)); 7487 } 7488 PetscCall(ISDestroy(&glob)); 7489 if (P_oth_l2g) { 7490 PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 7491 } 7492 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7493 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7494 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 7495 7496 /* preallocate with COO data */ 7497 PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 7498 PetscCall(PetscFree2(coo_i,coo_j)); 7499 PetscFunctionReturn(0); 7500 } 7501 7502 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7503 { 7504 Mat_Product *product = mat->product; 7505 #if defined(PETSC_HAVE_DEVICE) 7506 PetscBool match = PETSC_FALSE; 7507 PetscBool usecpu = PETSC_FALSE; 7508 #else 7509 PetscBool match = PETSC_TRUE; 7510 #endif 7511 7512 PetscFunctionBegin; 7513 MatCheckProduct(mat,1); 7514 #if defined(PETSC_HAVE_DEVICE) 7515 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7516 PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 7517 } 7518 if (match) { /* we can always fallback to the CPU if requested */ 7519 switch (product->type) { 7520 case MATPRODUCT_AB: 7521 if (product->api_user) { 7522 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat"); 7523 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7524 PetscOptionsEnd(); 7525 } else { 7526 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat"); 7527 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7528 PetscOptionsEnd(); 7529 } 7530 break; 7531 case MATPRODUCT_AtB: 7532 if (product->api_user) { 7533 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat"); 7534 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7535 PetscOptionsEnd(); 7536 } else { 7537 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat"); 7538 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7539 PetscOptionsEnd(); 7540 } 7541 break; 7542 case MATPRODUCT_PtAP: 7543 if (product->api_user) { 7544 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat"); 7545 PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7546 PetscOptionsEnd(); 7547 } else { 7548 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat"); 7549 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7550 PetscOptionsEnd(); 7551 } 7552 break; 7553 default: 7554 break; 7555 } 7556 match = (PetscBool)!usecpu; 7557 } 7558 #endif 7559 if (match) { 7560 switch (product->type) { 7561 case MATPRODUCT_AB: 7562 case MATPRODUCT_AtB: 7563 case MATPRODUCT_PtAP: 7564 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7565 break; 7566 default: 7567 break; 7568 } 7569 } 7570 /* fallback to MPIAIJ ops */ 7571 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7572 PetscFunctionReturn(0); 7573 } 7574 7575 /* 7576 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7577 7578 n - the number of block indices in cc[] 7579 cc - the block indices (must be large enough to contain the indices) 7580 */ 7581 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc) 7582 { 7583 PetscInt cnt = -1,nidx,j; 7584 const PetscInt *idx; 7585 7586 PetscFunctionBegin; 7587 PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL)); 7588 if (nidx) { 7589 cnt = 0; 7590 cc[cnt] = idx[0]/bs; 7591 for (j=1; j<nidx; j++) { 7592 if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs; 7593 } 7594 } 7595 PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL)); 7596 *n = cnt+1; 7597 PetscFunctionReturn(0); 7598 } 7599 7600 /* 7601 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7602 7603 ncollapsed - the number of block indices 7604 collapsed - the block indices (must be large enough to contain the indices) 7605 */ 7606 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed) 7607 { 7608 PetscInt i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp; 7609 7610 PetscFunctionBegin; 7611 PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev)); 7612 for (i=start+1; i<start+bs; i++) { 7613 PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur)); 7614 PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged)); 7615 cprevtmp = cprev; cprev = merged; merged = cprevtmp; 7616 } 7617 *ncollapsed = nprev; 7618 if (collapsed) *collapsed = cprev; 7619 PetscFunctionReturn(0); 7620 } 7621 7622 /* -------------------------------------------------------------------------- */ 7623 /* 7624 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7625 7626 Input Parameter: 7627 . Amat - matrix 7628 - symmetrize - make the result symmetric 7629 + scale - scale with diagonal 7630 7631 Output Parameter: 7632 . a_Gmat - output scalar graph >= 0 7633 7634 */ 7635 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat) 7636 { 7637 PetscInt Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs; 7638 MPI_Comm comm; 7639 Mat Gmat; 7640 PetscBool ismpiaij,isseqaij; 7641 Mat a, b, c; 7642 MatType jtype; 7643 7644 PetscFunctionBegin; 7645 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 7646 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7647 PetscCall(MatGetSize(Amat, &MM, &NN)); 7648 PetscCall(MatGetBlockSize(Amat, &bs)); 7649 nloc = (Iend-Istart)/bs; 7650 7651 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij)); 7652 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij)); 7653 PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type"); 7654 7655 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7656 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7657 implementation */ 7658 if (bs > 1) { 7659 PetscCall(MatGetType(Amat,&jtype)); 7660 PetscCall(MatCreate(comm, &Gmat)); 7661 PetscCall(MatSetType(Gmat, jtype)); 7662 PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE)); 7663 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7664 if (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray) { 7665 PetscInt *d_nnz, *o_nnz; 7666 MatScalar *aa,val,AA[4096]; 7667 PetscInt *aj,*ai,AJ[4096],nc; 7668 if (isseqaij) { a = Amat; b = NULL; } 7669 else { 7670 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data; 7671 a = d->A; b = d->B; 7672 } 7673 PetscCall(PetscInfo(Amat,"New bs>1 Graph. nloc=%" PetscInt_FMT "\n",nloc)); 7674 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7675 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7676 PetscInt *nnz = (c==a) ? d_nnz : o_nnz, nmax=0; 7677 const PetscInt *cols; 7678 for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows 7679 PetscCall(MatGetRow(c,brow,&jj,&cols,NULL)); 7680 nnz[brow/bs] = jj/bs; 7681 if (jj%bs) ok = 0; 7682 if (cols) j0 = cols[0]; 7683 else j0 = -1; 7684 PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL)); 7685 if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs]; 7686 for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks 7687 PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL)); 7688 if (jj%bs) ok = 0; 7689 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7690 if (nnz[brow/bs] != jj/bs) ok = 0; 7691 PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL)); 7692 } 7693 if (!ok) { 7694 PetscCall(PetscFree2(d_nnz,o_nnz)); 7695 goto old_bs; 7696 } 7697 } 7698 PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax); 7699 } 7700 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7701 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7702 PetscCall(PetscFree2(d_nnz,o_nnz)); 7703 // diag 7704 for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows 7705 Mat_SeqAIJ *aseq = (Mat_SeqAIJ*)a->data; 7706 ai = aseq->i; 7707 n = ai[brow+1] - ai[brow]; 7708 aj = aseq->j + ai[brow]; 7709 for (int k=0; k<n; k += bs) { // block columns 7710 AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart) 7711 val = 0; 7712 for (int ii=0; ii<bs; ii++) { // rows in block 7713 aa = aseq->a + ai[brow+ii] + k; 7714 for (int jj=0; jj<bs; jj++) { // columns in block 7715 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7716 } 7717 } 7718 AA[k/bs] = val; 7719 } 7720 grow = Istart/bs + brow/bs; 7721 PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES)); 7722 } 7723 // off-diag 7724 if (ismpiaij) { 7725 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)Amat->data; 7726 const PetscScalar *vals; 7727 const PetscInt *cols, *garray = aij->garray; 7728 PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?"); 7729 for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows 7730 PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL)); 7731 for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) { 7732 AA[k/bs] = 0; 7733 AJ[cidx] = garray[cols[k]]/bs; 7734 } 7735 nc = ncols/bs; 7736 PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL)); 7737 for (int ii=0; ii<bs; ii++) { // rows in block 7738 PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals)); 7739 for (int k=0; k<ncols; k += bs) { 7740 for (int jj=0; jj<bs; jj++) { // cols in block 7741 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj])); 7742 } 7743 } 7744 PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals)); 7745 } 7746 grow = Istart/bs + brow/bs; 7747 PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES)); 7748 } 7749 } 7750 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7751 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7752 } else { 7753 const PetscScalar *vals; 7754 const PetscInt *idx; 7755 PetscInt *d_nnz, *o_nnz,*w0,*w1,*w2; 7756 old_bs: 7757 /* 7758 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7759 */ 7760 PetscCall(PetscInfo(Amat,"OLD bs>1 CreateGraph\n")); 7761 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7762 if (isseqaij) { 7763 PetscInt max_d_nnz; 7764 /* 7765 Determine exact preallocation count for (sequential) scalar matrix 7766 */ 7767 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz)); 7768 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7769 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7770 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) { 7771 PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7772 } 7773 PetscCall(PetscFree3(w0,w1,w2)); 7774 } else if (ismpiaij) { 7775 Mat Daij,Oaij; 7776 const PetscInt *garray; 7777 PetscInt max_d_nnz; 7778 PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray)); 7779 /* 7780 Determine exact preallocation count for diagonal block portion of scalar matrix 7781 */ 7782 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz)); 7783 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7784 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7785 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7786 PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7787 } 7788 PetscCall(PetscFree3(w0,w1,w2)); 7789 /* 7790 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7791 */ 7792 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7793 o_nnz[jj] = 0; 7794 for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */ 7795 PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7796 o_nnz[jj] += ncols; 7797 PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7798 } 7799 if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc; 7800 } 7801 } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type"); 7802 /* get scalar copy (norms) of matrix */ 7803 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7804 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7805 PetscCall(PetscFree2(d_nnz,o_nnz)); 7806 for (Ii = Istart; Ii < Iend; Ii++) { 7807 PetscInt dest_row = Ii/bs; 7808 PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals)); 7809 for (jj=0; jj<ncols; jj++) { 7810 PetscInt dest_col = idx[jj]/bs; 7811 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7812 PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES)); 7813 } 7814 PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals)); 7815 } 7816 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7817 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7818 } 7819 } else { 7820 /* TODO GPU: optimization proposal, each class provides fast implementation of this 7821 procedure via MatAbs API */ 7822 /* just copy scalar matrix & abs() */ 7823 PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7824 if (isseqaij) { a = Gmat; b = NULL; } 7825 else { 7826 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7827 a = d->A; b = d->B; 7828 } 7829 /* abs */ 7830 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7831 MatInfo info; 7832 PetscScalar *avals; 7833 PetscCall(MatGetInfo(c,MAT_LOCAL,&info)); 7834 PetscCall(MatSeqAIJGetArray(c,&avals)); 7835 for (int jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7836 PetscCall(MatSeqAIJRestoreArray(c,&avals)); 7837 } 7838 } 7839 if (symmetrize) { 7840 PetscBool issym; 7841 PetscCall(MatGetOption(Amat,MAT_SYMMETRIC,&issym)); 7842 if (!issym) { 7843 Mat matTrans; 7844 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7845 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7846 PetscCall(MatDestroy(&matTrans)); 7847 } 7848 PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE)); 7849 } else { 7850 PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7851 } 7852 if (scale) { 7853 /* scale c for all diagonal values = 1 or -1 */ 7854 Vec diag; 7855 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7856 PetscCall(MatGetDiagonal(Gmat, diag)); 7857 PetscCall(VecReciprocal(diag)); 7858 PetscCall(VecSqrtAbs(diag)); 7859 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7860 PetscCall(VecDestroy(&diag)); 7861 } 7862 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7863 *a_Gmat = Gmat; 7864 PetscFunctionReturn(0); 7865 } 7866 7867 /* -------------------------------------------------------------------------- */ 7868 /*@C 7869 MatFilter_AIJ - filter values with small absolute values 7870 With vfilter < 0 does nothing so should not be called. 7871 7872 Collective on Mat 7873 7874 Input Parameters: 7875 + Gmat - the graph 7876 . vfilter - threshold parameter [0,1) 7877 7878 Output Parameter: 7879 . filteredG - output filtered scalar graph 7880 7881 Level: developer 7882 7883 Notes: 7884 This is called before graph coarsers are called. 7885 This could go into Mat, move 'symm' to GAMG 7886 7887 .seealso: `PCGAMGSetThreshold()` 7888 @*/ 7889 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat,PetscReal vfilter, Mat *filteredG) 7890 { 7891 PetscInt Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc; 7892 Mat tGmat; 7893 MPI_Comm comm; 7894 const PetscScalar *vals; 7895 const PetscInt *idx; 7896 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols=0; 7897 MatScalar *AA; // this is checked in graph 7898 PetscBool isseqaij; 7899 Mat a, b, c; 7900 MatType jtype; 7901 7902 PetscFunctionBegin; 7903 PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm)); 7904 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij)); 7905 PetscCall(MatGetType(Gmat,&jtype)); 7906 PetscCall(MatCreate(comm, &tGmat)); 7907 PetscCall(MatSetType(tGmat, jtype)); 7908 7909 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7910 Also, if the matrix is symmetric, can we skip this 7911 operation? It can be very expensive on large matrices. */ 7912 7913 // global sizes 7914 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7915 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7916 nloc = Iend - Istart; 7917 PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz)); 7918 if (isseqaij) { a = Gmat; b = NULL; } 7919 else { 7920 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7921 a = d->A; b = d->B; 7922 garray = d->garray; 7923 } 7924 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7925 for (PetscInt row=0; row < nloc; row++) { 7926 PetscCall(MatGetRow(a,row,&ncols,NULL,NULL)); 7927 d_nnz[row] = ncols; 7928 if (ncols>maxcols) maxcols=ncols; 7929 PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL)); 7930 } 7931 if (b) { 7932 for (PetscInt row=0; row < nloc; row++) { 7933 PetscCall(MatGetRow(b,row,&ncols,NULL,NULL)); 7934 o_nnz[row] = ncols; 7935 if (ncols>maxcols) maxcols=ncols; 7936 PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL)); 7937 } 7938 } 7939 PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM)); 7940 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7941 PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz)); 7942 PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz)); 7943 PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 7944 PetscCall(PetscFree2(d_nnz,o_nnz)); 7945 // 7946 PetscCall(PetscMalloc2(maxcols, &AA,maxcols, &AJ)); 7947 nnz0 = nnz1 = 0; 7948 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7949 for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) { 7950 PetscCall(MatGetRow(c,row,&ncols,&idx,&vals)); 7951 for (ncol_row=jj=0; jj<ncols; jj++,nnz0++) { 7952 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7953 if (PetscRealPart(sv) > vfilter) { 7954 nnz1++; 7955 PetscInt cid = idx[jj] + Istart; //diag 7956 if (c!=a) cid = garray[idx[jj]]; 7957 AA[ncol_row] = vals[jj]; 7958 AJ[ncol_row] = cid; 7959 ncol_row++; 7960 } 7961 } 7962 PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals)); 7963 PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES)); 7964 } 7965 } 7966 PetscCall(PetscFree2(AA,AJ)); 7967 PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY)); 7968 PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY)); 7969 PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */ 7970 7971 PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", 7972 (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter, 7973 (!nloc) ? 1. : (double)nnz0/(double)nloc,MM,(int)maxcols)); 7974 7975 *filteredG = tGmat; 7976 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7977 PetscFunctionReturn(0); 7978 } 7979 7980 /* 7981 Special version for direct calls from Fortran 7982 */ 7983 #include <petsc/private/fortranimpl.h> 7984 7985 /* Change these macros so can be used in void function */ 7986 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7987 #undef PetscCall 7988 #define PetscCall(...) do { \ 7989 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7990 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7991 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7992 return; \ 7993 } \ 7994 } while (0) 7995 7996 #undef SETERRQ 7997 #define SETERRQ(comm,ierr,...) do { \ 7998 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 7999 return; \ 8000 } while (0) 8001 8002 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8003 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8004 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8005 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8006 #else 8007 #endif 8008 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 8009 { 8010 Mat mat = *mmat; 8011 PetscInt m = *mm, n = *mn; 8012 InsertMode addv = *maddv; 8013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 8014 PetscScalar value; 8015 8016 MatCheckPreallocated(mat,1); 8017 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8018 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 8019 { 8020 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 8021 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 8022 PetscBool roworiented = aij->roworiented; 8023 8024 /* Some Variables required in the macro */ 8025 Mat A = aij->A; 8026 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 8027 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 8028 MatScalar *aa; 8029 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8030 Mat B = aij->B; 8031 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 8032 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 8033 MatScalar *ba; 8034 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8035 * cannot use "#if defined" inside a macro. */ 8036 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8037 8038 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 8039 PetscInt nonew = a->nonew; 8040 MatScalar *ap1,*ap2; 8041 8042 PetscFunctionBegin; 8043 PetscCall(MatSeqAIJGetArray(A,&aa)); 8044 PetscCall(MatSeqAIJGetArray(B,&ba)); 8045 for (i=0; i<m; i++) { 8046 if (im[i] < 0) continue; 8047 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 8048 if (im[i] >= rstart && im[i] < rend) { 8049 row = im[i] - rstart; 8050 lastcol1 = -1; 8051 rp1 = aj + ai[row]; 8052 ap1 = aa + ai[row]; 8053 rmax1 = aimax[row]; 8054 nrow1 = ailen[row]; 8055 low1 = 0; 8056 high1 = nrow1; 8057 lastcol2 = -1; 8058 rp2 = bj + bi[row]; 8059 ap2 = ba + bi[row]; 8060 rmax2 = bimax[row]; 8061 nrow2 = bilen[row]; 8062 low2 = 0; 8063 high2 = nrow2; 8064 8065 for (j=0; j<n; j++) { 8066 if (roworiented) value = v[i*n+j]; 8067 else value = v[i+j*m]; 8068 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8069 if (in[j] >= cstart && in[j] < cend) { 8070 col = in[j] - cstart; 8071 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 8072 } else if (in[j] < 0) continue; 8073 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8074 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 8075 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 8076 } else { 8077 if (mat->was_assembled) { 8078 if (!aij->colmap) { 8079 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8080 } 8081 #if defined(PETSC_USE_CTABLE) 8082 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 8083 col--; 8084 #else 8085 col = aij->colmap[in[j]] - 1; 8086 #endif 8087 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 8088 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8089 col = in[j]; 8090 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8091 B = aij->B; 8092 b = (Mat_SeqAIJ*)B->data; 8093 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 8094 rp2 = bj + bi[row]; 8095 ap2 = ba + bi[row]; 8096 rmax2 = bimax[row]; 8097 nrow2 = bilen[row]; 8098 low2 = 0; 8099 high2 = nrow2; 8100 bm = aij->B->rmap->n; 8101 ba = b->a; 8102 inserted = PETSC_FALSE; 8103 } 8104 } else col = in[j]; 8105 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 8106 } 8107 } 8108 } else if (!aij->donotstash) { 8109 if (roworiented) { 8110 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8111 } else { 8112 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8113 } 8114 } 8115 } 8116 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 8117 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 8118 } 8119 PetscFunctionReturnVoid(); 8120 } 8121 8122 /* Undefining these here since they were redefined from their original definition above! No 8123 * other PETSc functions should be defined past this point, as it is impossible to recover the 8124 * original definitions */ 8125 #undef PetscCall 8126 #undef SETERRQ 8127