1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B)); 15 PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B)); 16 PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 17 PetscFunctionReturn(0); 18 } 19 20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 21 { 22 Mat B; 23 24 PetscFunctionBegin; 25 PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B)); 26 PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 27 PetscCall(MatDestroy(&B)); 28 PetscFunctionReturn(0); 29 } 30 31 /*MC 32 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 33 34 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 35 and MATMPIAIJ otherwise. As a result, for single process communicators, 36 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 37 for communicators controlling multiple processes. It is recommended that you call both of 38 the above preallocation routines for simplicity. 39 40 Options Database Keys: 41 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 42 43 Developer Notes: 44 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 45 enough exist. 46 47 Level: beginner 48 49 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 50 M*/ 51 52 /*MC 53 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 54 55 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 56 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 57 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 58 for communicators controlling multiple processes. It is recommended that you call both of 59 the above preallocation routines for simplicity. 60 61 Options Database Keys: 62 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 63 64 Level: beginner 65 66 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 67 M*/ 68 69 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 70 { 71 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 72 73 PetscFunctionBegin; 74 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 75 A->boundtocpu = flg; 76 #endif 77 if (a->A) PetscCall(MatBindToCPU(a->A,flg)); 78 if (a->B) PetscCall(MatBindToCPU(a->B,flg)); 79 80 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 81 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 82 * to differ from the parent matrix. */ 83 if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg)); 84 if (a->diag) PetscCall(VecBindToCPU(a->diag,flg)); 85 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 90 { 91 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 92 93 PetscFunctionBegin; 94 if (mat->A) { 95 PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 96 PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 97 } 98 PetscFunctionReturn(0); 99 } 100 101 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 102 { 103 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 104 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 105 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 106 const PetscInt *ia,*ib; 107 const MatScalar *aa,*bb,*aav,*bav; 108 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 109 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 110 111 PetscFunctionBegin; 112 *keptrows = NULL; 113 114 ia = a->i; 115 ib = b->i; 116 PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 117 PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 118 for (i=0; i<m; i++) { 119 na = ia[i+1] - ia[i]; 120 nb = ib[i+1] - ib[i]; 121 if (!na && !nb) { 122 cnt++; 123 goto ok1; 124 } 125 aa = aav + ia[i]; 126 for (j=0; j<na; j++) { 127 if (aa[j] != 0.0) goto ok1; 128 } 129 bb = bav + ib[i]; 130 for (j=0; j <nb; j++) { 131 if (bb[j] != 0.0) goto ok1; 132 } 133 cnt++; 134 ok1:; 135 } 136 PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 137 if (!n0rows) { 138 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 139 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 140 PetscFunctionReturn(0); 141 } 142 PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 143 cnt = 0; 144 for (i=0; i<m; i++) { 145 na = ia[i+1] - ia[i]; 146 nb = ib[i+1] - ib[i]; 147 if (!na && !nb) continue; 148 aa = aav + ia[i]; 149 for (j=0; j<na;j++) { 150 if (aa[j] != 0.0) { 151 rows[cnt++] = rstart + i; 152 goto ok2; 153 } 154 } 155 bb = bav + ib[i]; 156 for (j=0; j<nb; j++) { 157 if (bb[j] != 0.0) { 158 rows[cnt++] = rstart + i; 159 goto ok2; 160 } 161 } 162 ok2:; 163 } 164 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 165 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 166 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 167 PetscFunctionReturn(0); 168 } 169 170 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 171 { 172 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 173 PetscBool cong; 174 175 PetscFunctionBegin; 176 PetscCall(MatHasCongruentLayouts(Y,&cong)); 177 if (Y->assembled && cong) { 178 PetscCall(MatDiagonalSet(aij->A,D,is)); 179 } else { 180 PetscCall(MatDiagonalSet_Default(Y,D,is)); 181 } 182 PetscFunctionReturn(0); 183 } 184 185 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 188 PetscInt i,rstart,nrows,*rows; 189 190 PetscFunctionBegin; 191 *zrows = NULL; 192 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 193 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 194 for (i=0; i<nrows; i++) rows[i] += rstart; 195 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 200 { 201 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 202 PetscInt i,m,n,*garray = aij->garray; 203 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 204 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 205 PetscReal *work; 206 const PetscScalar *dummy; 207 208 PetscFunctionBegin; 209 PetscCall(MatGetSize(A,&m,&n)); 210 PetscCall(PetscCalloc1(n,&work)); 211 PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 212 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 213 PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 214 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 215 if (type == NORM_2) { 216 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 217 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 218 } 219 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 220 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 221 } 222 } else if (type == NORM_1) { 223 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 224 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 225 } 226 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 227 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 228 } 229 } else if (type == NORM_INFINITY) { 230 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 231 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 232 } 233 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 234 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 235 } 236 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 237 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 238 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 239 } 240 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 241 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 242 } 243 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 244 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 245 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 246 } 247 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 248 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 249 } 250 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 251 if (type == NORM_INFINITY) { 252 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 253 } else { 254 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 255 } 256 PetscCall(PetscFree(work)); 257 if (type == NORM_2) { 258 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 259 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 260 for (i=0; i<n; i++) reductions[i] /= m; 261 } 262 PetscFunctionReturn(0); 263 } 264 265 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 266 { 267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 268 IS sis,gis; 269 const PetscInt *isis,*igis; 270 PetscInt n,*iis,nsis,ngis,rstart,i; 271 272 PetscFunctionBegin; 273 PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 274 PetscCall(MatFindNonzeroRows(a->B,&gis)); 275 PetscCall(ISGetSize(gis,&ngis)); 276 PetscCall(ISGetSize(sis,&nsis)); 277 PetscCall(ISGetIndices(sis,&isis)); 278 PetscCall(ISGetIndices(gis,&igis)); 279 280 PetscCall(PetscMalloc1(ngis+nsis,&iis)); 281 PetscCall(PetscArraycpy(iis,igis,ngis)); 282 PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 283 n = ngis + nsis; 284 PetscCall(PetscSortRemoveDupsInt(&n,iis)); 285 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 286 for (i=0; i<n; i++) iis[i] += rstart; 287 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 288 289 PetscCall(ISRestoreIndices(sis,&isis)); 290 PetscCall(ISRestoreIndices(gis,&igis)); 291 PetscCall(ISDestroy(&sis)); 292 PetscCall(ISDestroy(&gis)); 293 PetscFunctionReturn(0); 294 } 295 296 /* 297 Local utility routine that creates a mapping from the global column 298 number to the local number in the off-diagonal part of the local 299 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 300 a slightly higher hash table cost; without it it is not scalable (each processor 301 has an order N integer array but is fast to access. 302 */ 303 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscInt n = aij->B->cmap->n,i; 307 308 PetscFunctionBegin; 309 PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 310 #if defined(PETSC_USE_CTABLE) 311 PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 312 for (i=0; i<n; i++) { 313 PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 314 } 315 #else 316 PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 317 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 318 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 319 #endif 320 PetscFunctionReturn(0); 321 } 322 323 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 324 { \ 325 if (col <= lastcol1) low1 = 0; \ 326 else high1 = nrow1; \ 327 lastcol1 = col;\ 328 while (high1-low1 > 5) { \ 329 t = (low1+high1)/2; \ 330 if (rp1[t] > col) high1 = t; \ 331 else low1 = t; \ 332 } \ 333 for (_i=low1; _i<high1; _i++) { \ 334 if (rp1[_i] > col) break; \ 335 if (rp1[_i] == col) { \ 336 if (addv == ADD_VALUES) { \ 337 ap1[_i] += value; \ 338 /* Not sure LogFlops will slow dow the code or not */ \ 339 (void)PetscLogFlops(1.0); \ 340 } \ 341 else ap1[_i] = value; \ 342 goto a_noinsert; \ 343 } \ 344 } \ 345 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 346 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 347 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 348 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 349 N = nrow1++ - 1; a->nz++; high1++; \ 350 /* shift up all the later entries in this row */ \ 351 PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 352 PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 353 rp1[_i] = col; \ 354 ap1[_i] = value; \ 355 A->nonzerostate++;\ 356 a_noinsert: ; \ 357 ailen[row] = nrow1; \ 358 } 359 360 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 361 { \ 362 if (col <= lastcol2) low2 = 0; \ 363 else high2 = nrow2; \ 364 lastcol2 = col; \ 365 while (high2-low2 > 5) { \ 366 t = (low2+high2)/2; \ 367 if (rp2[t] > col) high2 = t; \ 368 else low2 = t; \ 369 } \ 370 for (_i=low2; _i<high2; _i++) { \ 371 if (rp2[_i] > col) break; \ 372 if (rp2[_i] == col) { \ 373 if (addv == ADD_VALUES) { \ 374 ap2[_i] += value; \ 375 (void)PetscLogFlops(1.0); \ 376 } \ 377 else ap2[_i] = value; \ 378 goto b_noinsert; \ 379 } \ 380 } \ 381 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 382 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 383 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 384 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 385 N = nrow2++ - 1; b->nz++; high2++; \ 386 /* shift up all the later entries in this row */ \ 387 PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 388 PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 389 rp2[_i] = col; \ 390 ap2[_i] = value; \ 391 B->nonzerostate++; \ 392 b_noinsert: ; \ 393 bilen[row] = nrow2; \ 394 } 395 396 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 397 { 398 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 399 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 400 PetscInt l,*garray = mat->garray,diag; 401 PetscScalar *aa,*ba; 402 403 PetscFunctionBegin; 404 /* code only works for square matrices A */ 405 406 /* find size of row to the left of the diagonal part */ 407 PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 408 row = row - diag; 409 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 410 if (garray[b->j[b->i[row]+l]] > diag) break; 411 } 412 if (l) { 413 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 414 PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 415 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 416 } 417 418 /* diagonal part */ 419 if (a->i[row+1]-a->i[row]) { 420 PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 421 PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 422 PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 423 } 424 425 /* right of diagonal part */ 426 if (b->i[row+1]-b->i[row]-l) { 427 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 428 PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 429 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 430 } 431 PetscFunctionReturn(0); 432 } 433 434 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 435 { 436 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 437 PetscScalar value = 0.0; 438 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 439 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 440 PetscBool roworiented = aij->roworiented; 441 442 /* Some Variables required in the macro */ 443 Mat A = aij->A; 444 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 445 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 446 PetscBool ignorezeroentries = a->ignorezeroentries; 447 Mat B = aij->B; 448 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 449 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 450 MatScalar *aa,*ba; 451 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 452 PetscInt nonew; 453 MatScalar *ap1,*ap2; 454 455 PetscFunctionBegin; 456 PetscCall(MatSeqAIJGetArray(A,&aa)); 457 PetscCall(MatSeqAIJGetArray(B,&ba)); 458 for (i=0; i<m; i++) { 459 if (im[i] < 0) continue; 460 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 461 if (im[i] >= rstart && im[i] < rend) { 462 row = im[i] - rstart; 463 lastcol1 = -1; 464 rp1 = aj + ai[row]; 465 ap1 = aa + ai[row]; 466 rmax1 = aimax[row]; 467 nrow1 = ailen[row]; 468 low1 = 0; 469 high1 = nrow1; 470 lastcol2 = -1; 471 rp2 = bj + bi[row]; 472 ap2 = ba + bi[row]; 473 rmax2 = bimax[row]; 474 nrow2 = bilen[row]; 475 low2 = 0; 476 high2 = nrow2; 477 478 for (j=0; j<n; j++) { 479 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 480 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 481 if (in[j] >= cstart && in[j] < cend) { 482 col = in[j] - cstart; 483 nonew = a->nonew; 484 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 485 } else if (in[j] < 0) { 486 continue; 487 } else { 488 PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 489 if (mat->was_assembled) { 490 if (!aij->colmap) { 491 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 492 } 493 #if defined(PETSC_USE_CTABLE) 494 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 495 col--; 496 #else 497 col = aij->colmap[in[j]] - 1; 498 #endif 499 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 500 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 501 col = in[j]; 502 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 503 B = aij->B; 504 b = (Mat_SeqAIJ*)B->data; 505 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 506 rp2 = bj + bi[row]; 507 ap2 = ba + bi[row]; 508 rmax2 = bimax[row]; 509 nrow2 = bilen[row]; 510 low2 = 0; 511 high2 = nrow2; 512 bm = aij->B->rmap->n; 513 ba = b->a; 514 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 515 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 516 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 517 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 518 } 519 } else col = in[j]; 520 nonew = b->nonew; 521 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 522 } 523 } 524 } else { 525 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 526 if (!aij->donotstash) { 527 mat->assembled = PETSC_FALSE; 528 if (roworiented) { 529 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 530 } else { 531 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 532 } 533 } 534 } 535 } 536 PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 537 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 538 PetscFunctionReturn(0); 539 } 540 541 /* 542 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 543 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 544 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 545 */ 546 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 547 { 548 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 549 Mat A = aij->A; /* diagonal part of the matrix */ 550 Mat B = aij->B; /* offdiagonal part of the matrix */ 551 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 552 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 553 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 554 PetscInt *ailen = a->ilen,*aj = a->j; 555 PetscInt *bilen = b->ilen,*bj = b->j; 556 PetscInt am = aij->A->rmap->n,j; 557 PetscInt diag_so_far = 0,dnz; 558 PetscInt offd_so_far = 0,onz; 559 560 PetscFunctionBegin; 561 /* Iterate over all rows of the matrix */ 562 for (j=0; j<am; j++) { 563 dnz = onz = 0; 564 /* Iterate over all non-zero columns of the current row */ 565 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 566 /* If column is in the diagonal */ 567 if (mat_j[col] >= cstart && mat_j[col] < cend) { 568 aj[diag_so_far++] = mat_j[col] - cstart; 569 dnz++; 570 } else { /* off-diagonal entries */ 571 bj[offd_so_far++] = mat_j[col]; 572 onz++; 573 } 574 } 575 ailen[j] = dnz; 576 bilen[j] = onz; 577 } 578 PetscFunctionReturn(0); 579 } 580 581 /* 582 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 583 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 584 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 585 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 586 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 587 */ 588 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 589 { 590 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 591 Mat A = aij->A; /* diagonal part of the matrix */ 592 Mat B = aij->B; /* offdiagonal part of the matrix */ 593 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 594 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 595 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 596 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 597 PetscInt *ailen = a->ilen,*aj = a->j; 598 PetscInt *bilen = b->ilen,*bj = b->j; 599 PetscInt am = aij->A->rmap->n,j; 600 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 601 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 602 PetscScalar *aa = a->a,*ba = b->a; 603 604 PetscFunctionBegin; 605 /* Iterate over all rows of the matrix */ 606 for (j=0; j<am; j++) { 607 dnz_row = onz_row = 0; 608 rowstart_offd = full_offd_i[j]; 609 rowstart_diag = full_diag_i[j]; 610 /* Iterate over all non-zero columns of the current row */ 611 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 612 /* If column is in the diagonal */ 613 if (mat_j[col] >= cstart && mat_j[col] < cend) { 614 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 615 aa[rowstart_diag+dnz_row] = mat_a[col]; 616 dnz_row++; 617 } else { /* off-diagonal entries */ 618 bj[rowstart_offd+onz_row] = mat_j[col]; 619 ba[rowstart_offd+onz_row] = mat_a[col]; 620 onz_row++; 621 } 622 } 623 ailen[j] = dnz_row; 624 bilen[j] = onz_row; 625 } 626 PetscFunctionReturn(0); 627 } 628 629 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* negative row */ 638 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= rstart && idxm[i] < rend) { 640 row = idxm[i] - rstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* negative column */ 643 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= cstart && idxn[j] < cend) { 645 col = idxn[j] - cstart; 646 PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 647 } else { 648 if (!aij->colmap) { 649 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 653 col--; 654 #else 655 col = aij->colmap[idxn[j]] - 1; 656 #endif 657 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 658 else { 659 PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 660 } 661 } 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 671 PetscInt nstash,reallocs; 672 673 PetscFunctionBegin; 674 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 675 676 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 677 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 678 PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 679 PetscFunctionReturn(0); 680 } 681 682 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 PetscMPIInt n; 686 PetscInt i,j,rstart,ncols,flg; 687 PetscInt *row,*col; 688 PetscBool other_disassembled; 689 PetscScalar *val; 690 691 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 692 693 PetscFunctionBegin; 694 if (!aij->donotstash && !mat->nooffprocentries) { 695 while (1) { 696 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 697 if (!flg) break; 698 699 for (i=0; i<n;) { 700 /* Now identify the consecutive vals belonging to the same row */ 701 for (j=i,rstart=row[j]; j<n; j++) { 702 if (row[j] != rstart) break; 703 } 704 if (j < n) ncols = j-i; 705 else ncols = n-i; 706 /* Now assemble all these values with a single function call */ 707 PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 708 i = j; 709 } 710 } 711 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 712 } 713 #if defined(PETSC_HAVE_DEVICE) 714 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 715 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 716 if (mat->boundtocpu) { 717 PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 718 PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 719 } 720 #endif 721 PetscCall(MatAssemblyBegin(aij->A,mode)); 722 PetscCall(MatAssemblyEnd(aij->A,mode)); 723 724 /* determine if any processor has disassembled, if so we must 725 also disassemble ourself, in order that we may reassemble. */ 726 /* 727 if nonzero structure of submatrix B cannot change then we know that 728 no processor disassembled thus we can skip this stuff 729 */ 730 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 731 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 732 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 733 PetscCall(MatDisAssemble_MPIAIJ(mat)); 734 } 735 } 736 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 737 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 738 } 739 PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 740 #if defined(PETSC_HAVE_DEVICE) 741 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 742 #endif 743 PetscCall(MatAssemblyBegin(aij->B,mode)); 744 PetscCall(MatAssemblyEnd(aij->B,mode)); 745 746 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 747 748 aij->rowvalues = NULL; 749 750 PetscCall(VecDestroy(&aij->diag)); 751 752 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 753 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 754 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 755 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 756 } 757 #if defined(PETSC_HAVE_DEVICE) 758 mat->offloadmask = PETSC_OFFLOAD_BOTH; 759 #endif 760 PetscFunctionReturn(0); 761 } 762 763 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 764 { 765 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 766 767 PetscFunctionBegin; 768 PetscCall(MatZeroEntries(l->A)); 769 PetscCall(MatZeroEntries(l->B)); 770 PetscFunctionReturn(0); 771 } 772 773 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 774 { 775 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 776 PetscObjectState sA, sB; 777 PetscInt *lrows; 778 PetscInt r, len; 779 PetscBool cong, lch, gch; 780 781 PetscFunctionBegin; 782 /* get locally owned rows */ 783 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 784 PetscCall(MatHasCongruentLayouts(A,&cong)); 785 /* fix right hand side if needed */ 786 if (x && b) { 787 const PetscScalar *xx; 788 PetscScalar *bb; 789 790 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 791 PetscCall(VecGetArrayRead(x, &xx)); 792 PetscCall(VecGetArray(b, &bb)); 793 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 794 PetscCall(VecRestoreArrayRead(x, &xx)); 795 PetscCall(VecRestoreArray(b, &bb)); 796 } 797 798 sA = mat->A->nonzerostate; 799 sB = mat->B->nonzerostate; 800 801 if (diag != 0.0 && cong) { 802 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 803 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 804 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 805 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 806 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 807 PetscInt nnwA, nnwB; 808 PetscBool nnzA, nnzB; 809 810 nnwA = aijA->nonew; 811 nnwB = aijB->nonew; 812 nnzA = aijA->keepnonzeropattern; 813 nnzB = aijB->keepnonzeropattern; 814 if (!nnzA) { 815 PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 816 aijA->nonew = 0; 817 } 818 if (!nnzB) { 819 PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 820 aijB->nonew = 0; 821 } 822 /* Must zero here before the next loop */ 823 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 824 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 825 for (r = 0; r < len; ++r) { 826 const PetscInt row = lrows[r] + A->rmap->rstart; 827 if (row >= A->cmap->N) continue; 828 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 829 } 830 aijA->nonew = nnwA; 831 aijB->nonew = nnwB; 832 } else { 833 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 834 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 835 } 836 PetscCall(PetscFree(lrows)); 837 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 838 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 839 840 /* reduce nonzerostate */ 841 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 842 PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 843 if (gch) A->nonzerostate++; 844 PetscFunctionReturn(0); 845 } 846 847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 848 { 849 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 850 PetscMPIInt n = A->rmap->n; 851 PetscInt i,j,r,m,len = 0; 852 PetscInt *lrows,*owners = A->rmap->range; 853 PetscMPIInt p = 0; 854 PetscSFNode *rrows; 855 PetscSF sf; 856 const PetscScalar *xx; 857 PetscScalar *bb,*mask,*aij_a; 858 Vec xmask,lmask; 859 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 860 const PetscInt *aj, *ii,*ridx; 861 PetscScalar *aa; 862 863 PetscFunctionBegin; 864 /* Create SF where leaves are input rows and roots are owned rows */ 865 PetscCall(PetscMalloc1(n, &lrows)); 866 for (r = 0; r < n; ++r) lrows[r] = -1; 867 PetscCall(PetscMalloc1(N, &rrows)); 868 for (r = 0; r < N; ++r) { 869 const PetscInt idx = rows[r]; 870 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 871 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 872 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 873 } 874 rrows[r].rank = p; 875 rrows[r].index = rows[r] - owners[p]; 876 } 877 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 878 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 879 /* Collect flags for rows to be zeroed */ 880 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 881 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 882 PetscCall(PetscSFDestroy(&sf)); 883 /* Compress and put in row numbers */ 884 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 885 /* zero diagonal part of matrix */ 886 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 887 /* handle off diagonal part of matrix */ 888 PetscCall(MatCreateVecs(A,&xmask,NULL)); 889 PetscCall(VecDuplicate(l->lvec,&lmask)); 890 PetscCall(VecGetArray(xmask,&bb)); 891 for (i=0; i<len; i++) bb[lrows[i]] = 1; 892 PetscCall(VecRestoreArray(xmask,&bb)); 893 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 894 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 895 PetscCall(VecDestroy(&xmask)); 896 if (x && b) { /* this code is buggy when the row and column layout don't match */ 897 PetscBool cong; 898 899 PetscCall(MatHasCongruentLayouts(A,&cong)); 900 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 901 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 902 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 903 PetscCall(VecGetArrayRead(l->lvec,&xx)); 904 PetscCall(VecGetArray(b,&bb)); 905 } 906 PetscCall(VecGetArray(lmask,&mask)); 907 /* remove zeroed rows of off diagonal matrix */ 908 PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 909 ii = aij->i; 910 for (i=0; i<len; i++) { 911 PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 912 } 913 /* loop over all elements of off process part of matrix zeroing removed columns*/ 914 if (aij->compressedrow.use) { 915 m = aij->compressedrow.nrows; 916 ii = aij->compressedrow.i; 917 ridx = aij->compressedrow.rindex; 918 for (i=0; i<m; i++) { 919 n = ii[i+1] - ii[i]; 920 aj = aij->j + ii[i]; 921 aa = aij_a + ii[i]; 922 923 for (j=0; j<n; j++) { 924 if (PetscAbsScalar(mask[*aj])) { 925 if (b) bb[*ridx] -= *aa*xx[*aj]; 926 *aa = 0.0; 927 } 928 aa++; 929 aj++; 930 } 931 ridx++; 932 } 933 } else { /* do not use compressed row format */ 934 m = l->B->rmap->n; 935 for (i=0; i<m; i++) { 936 n = ii[i+1] - ii[i]; 937 aj = aij->j + ii[i]; 938 aa = aij_a + ii[i]; 939 for (j=0; j<n; j++) { 940 if (PetscAbsScalar(mask[*aj])) { 941 if (b) bb[i] -= *aa*xx[*aj]; 942 *aa = 0.0; 943 } 944 aa++; 945 aj++; 946 } 947 } 948 } 949 if (x && b) { 950 PetscCall(VecRestoreArray(b,&bb)); 951 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 952 } 953 PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 954 PetscCall(VecRestoreArray(lmask,&mask)); 955 PetscCall(VecDestroy(&lmask)); 956 PetscCall(PetscFree(lrows)); 957 958 /* only change matrix nonzero state if pattern was allowed to be changed */ 959 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 960 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 961 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 962 } 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 967 { 968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 969 PetscInt nt; 970 VecScatter Mvctx = a->Mvctx; 971 972 PetscFunctionBegin; 973 PetscCall(VecGetLocalSize(xx,&nt)); 974 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 975 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 976 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 977 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 978 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 979 PetscFunctionReturn(0); 980 } 981 982 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 985 986 PetscFunctionBegin; 987 PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 988 PetscFunctionReturn(0); 989 } 990 991 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 992 { 993 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 994 VecScatter Mvctx = a->Mvctx; 995 996 PetscFunctionBegin; 997 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 998 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 999 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1000 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1005 { 1006 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1007 1008 PetscFunctionBegin; 1009 /* do nondiagonal part */ 1010 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1011 /* do local part */ 1012 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1013 /* add partial results together */ 1014 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1015 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1020 { 1021 MPI_Comm comm; 1022 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1023 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1024 IS Me,Notme; 1025 PetscInt M,N,first,last,*notme,i; 1026 PetscBool lf; 1027 PetscMPIInt size; 1028 1029 PetscFunctionBegin; 1030 /* Easy test: symmetric diagonal block */ 1031 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1032 PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 1033 PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1034 if (!*f) PetscFunctionReturn(0); 1035 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 1036 PetscCallMPI(MPI_Comm_size(comm,&size)); 1037 if (size == 1) PetscFunctionReturn(0); 1038 1039 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1040 PetscCall(MatGetSize(Amat,&M,&N)); 1041 PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 1042 PetscCall(PetscMalloc1(N-last+first,¬me)); 1043 for (i=0; i<first; i++) notme[i] = i; 1044 for (i=last; i<M; i++) notme[i-last+first] = i; 1045 PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 1046 PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 1047 PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 1048 Aoff = Aoffs[0]; 1049 PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 1050 Boff = Boffs[0]; 1051 PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 1052 PetscCall(MatDestroyMatrices(1,&Aoffs)); 1053 PetscCall(MatDestroyMatrices(1,&Boffs)); 1054 PetscCall(ISDestroy(&Me)); 1055 PetscCall(ISDestroy(&Notme)); 1056 PetscCall(PetscFree(notme)); 1057 PetscFunctionReturn(0); 1058 } 1059 1060 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1061 { 1062 PetscFunctionBegin; 1063 PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 1071 PetscFunctionBegin; 1072 /* do nondiagonal part */ 1073 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1074 /* do local part */ 1075 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1076 /* add partial results together */ 1077 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1078 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 /* 1083 This only works correctly for square matrices where the subblock A->A is the 1084 diagonal block 1085 */ 1086 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1087 { 1088 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1089 1090 PetscFunctionBegin; 1091 PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1092 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1093 PetscCall(MatGetDiagonal(a->A,v)); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1098 { 1099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1100 1101 PetscFunctionBegin; 1102 PetscCall(MatScale(a->A,aa)); 1103 PetscCall(MatScale(a->B,aa)); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1108 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1109 { 1110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1111 1112 PetscFunctionBegin; 1113 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1114 PetscCall(PetscFree(aij->Aperm1)); 1115 PetscCall(PetscFree(aij->Bperm1)); 1116 PetscCall(PetscFree(aij->Ajmap1)); 1117 PetscCall(PetscFree(aij->Bjmap1)); 1118 1119 PetscCall(PetscFree(aij->Aimap2)); 1120 PetscCall(PetscFree(aij->Bimap2)); 1121 PetscCall(PetscFree(aij->Aperm2)); 1122 PetscCall(PetscFree(aij->Bperm2)); 1123 PetscCall(PetscFree(aij->Ajmap2)); 1124 PetscCall(PetscFree(aij->Bjmap2)); 1125 1126 PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 1127 PetscCall(PetscFree(aij->Cperm1)); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1132 { 1133 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1134 1135 PetscFunctionBegin; 1136 #if defined(PETSC_USE_LOG) 1137 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1138 #endif 1139 PetscCall(MatStashDestroy_Private(&mat->stash)); 1140 PetscCall(VecDestroy(&aij->diag)); 1141 PetscCall(MatDestroy(&aij->A)); 1142 PetscCall(MatDestroy(&aij->B)); 1143 #if defined(PETSC_USE_CTABLE) 1144 PetscCall(PetscTableDestroy(&aij->colmap)); 1145 #else 1146 PetscCall(PetscFree(aij->colmap)); 1147 #endif 1148 PetscCall(PetscFree(aij->garray)); 1149 PetscCall(VecDestroy(&aij->lvec)); 1150 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1151 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 1152 PetscCall(PetscFree(aij->ld)); 1153 1154 /* Free COO */ 1155 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1156 1157 PetscCall(PetscFree(mat->data)); 1158 1159 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1160 PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 1161 1162 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1164 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 1166 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 1167 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 1169 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 1171 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 1172 #if defined(PETSC_HAVE_CUDA) 1173 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 1174 #endif 1175 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1176 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 1177 #endif 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 1179 #if defined(PETSC_HAVE_ELEMENTAL) 1180 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 1181 #endif 1182 #if defined(PETSC_HAVE_SCALAPACK) 1183 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1184 #endif 1185 #if defined(PETSC_HAVE_HYPRE) 1186 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 1187 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 1188 #endif 1189 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1190 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 1192 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 1195 #if defined(PETSC_HAVE_MKL_SPARSE) 1196 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 1197 #endif 1198 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 1199 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1200 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 1201 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 1202 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1207 { 1208 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1209 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1210 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1211 const PetscInt *garray = aij->garray; 1212 const PetscScalar *aa,*ba; 1213 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1214 PetscInt *rowlens; 1215 PetscInt *colidxs; 1216 PetscScalar *matvals; 1217 1218 PetscFunctionBegin; 1219 PetscCall(PetscViewerSetUp(viewer)); 1220 1221 M = mat->rmap->N; 1222 N = mat->cmap->N; 1223 m = mat->rmap->n; 1224 rs = mat->rmap->rstart; 1225 cs = mat->cmap->rstart; 1226 nz = A->nz + B->nz; 1227 1228 /* write matrix header */ 1229 header[0] = MAT_FILE_CLASSID; 1230 header[1] = M; header[2] = N; header[3] = nz; 1231 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1232 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1233 1234 /* fill in and store row lengths */ 1235 PetscCall(PetscMalloc1(m,&rowlens)); 1236 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1237 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1238 PetscCall(PetscFree(rowlens)); 1239 1240 /* fill in and store column indices */ 1241 PetscCall(PetscMalloc1(nz,&colidxs)); 1242 for (cnt=0, i=0; i<m; i++) { 1243 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1244 if (garray[B->j[jb]] > cs) break; 1245 colidxs[cnt++] = garray[B->j[jb]]; 1246 } 1247 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1248 colidxs[cnt++] = A->j[ja] + cs; 1249 for (; jb<B->i[i+1]; jb++) 1250 colidxs[cnt++] = garray[B->j[jb]]; 1251 } 1252 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 1254 PetscCall(PetscFree(colidxs)); 1255 1256 /* fill in and store nonzero values */ 1257 PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 1258 PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 1259 PetscCall(PetscMalloc1(nz,&matvals)); 1260 for (cnt=0, i=0; i<m; i++) { 1261 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1262 if (garray[B->j[jb]] > cs) break; 1263 matvals[cnt++] = ba[jb]; 1264 } 1265 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1266 matvals[cnt++] = aa[ja]; 1267 for (; jb<B->i[i+1]; jb++) 1268 matvals[cnt++] = ba[jb]; 1269 } 1270 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 1271 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 1272 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1273 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 1274 PetscCall(PetscFree(matvals)); 1275 1276 /* write block size option to the viewer's .info file */ 1277 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #include <petscdraw.h> 1282 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1283 { 1284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1285 PetscMPIInt rank = aij->rank,size = aij->size; 1286 PetscBool isdraw,iascii,isbinary; 1287 PetscViewer sviewer; 1288 PetscViewerFormat format; 1289 1290 PetscFunctionBegin; 1291 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1292 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1293 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1294 if (iascii) { 1295 PetscCall(PetscViewerGetFormat(viewer,&format)); 1296 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1297 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1298 PetscCall(PetscMalloc1(size,&nz)); 1299 PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1300 for (i=0; i<(PetscInt)size; i++) { 1301 nmax = PetscMax(nmax,nz[i]); 1302 nmin = PetscMin(nmin,nz[i]); 1303 navg += nz[i]; 1304 } 1305 PetscCall(PetscFree(nz)); 1306 navg = navg/size; 1307 PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1308 PetscFunctionReturn(0); 1309 } 1310 PetscCall(PetscViewerGetFormat(viewer,&format)); 1311 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1312 MatInfo info; 1313 PetscInt *inodes=NULL; 1314 1315 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 1316 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 1317 PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 1318 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1319 if (!inodes) { 1320 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1321 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1322 } else { 1323 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1324 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1325 } 1326 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 1327 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1328 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 1329 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1330 PetscCall(PetscViewerFlush(viewer)); 1331 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1332 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 1333 PetscCall(VecScatterView(aij->Mvctx,viewer)); 1334 PetscFunctionReturn(0); 1335 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1336 PetscInt inodecount,inodelimit,*inodes; 1337 PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1338 if (inodes) { 1339 PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1340 } else { 1341 PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1342 } 1343 PetscFunctionReturn(0); 1344 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1345 PetscFunctionReturn(0); 1346 } 1347 } else if (isbinary) { 1348 if (size == 1) { 1349 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1350 PetscCall(MatView(aij->A,viewer)); 1351 } else { 1352 PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 1353 } 1354 PetscFunctionReturn(0); 1355 } else if (iascii && size == 1) { 1356 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1357 PetscCall(MatView(aij->A,viewer)); 1358 PetscFunctionReturn(0); 1359 } else if (isdraw) { 1360 PetscDraw draw; 1361 PetscBool isnull; 1362 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1363 PetscCall(PetscDrawIsNull(draw,&isnull)); 1364 if (isnull) PetscFunctionReturn(0); 1365 } 1366 1367 { /* assemble the entire matrix onto first processor */ 1368 Mat A = NULL, Av; 1369 IS isrow,iscol; 1370 1371 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1372 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1373 PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 1374 PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 1375 /* The commented code uses MatCreateSubMatrices instead */ 1376 /* 1377 Mat *AA, A = NULL, Av; 1378 IS isrow,iscol; 1379 1380 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1381 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1382 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1383 if (rank == 0) { 1384 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1385 A = AA[0]; 1386 Av = AA[0]; 1387 } 1388 PetscCall(MatDestroySubMatrices(1,&AA)); 1389 */ 1390 PetscCall(ISDestroy(&iscol)); 1391 PetscCall(ISDestroy(&isrow)); 1392 /* 1393 Everyone has to call to draw the matrix since the graphics waits are 1394 synchronized across all processors that share the PetscDraw object 1395 */ 1396 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1397 if (rank == 0) { 1398 if (((PetscObject)mat)->name) { 1399 PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 1400 } 1401 PetscCall(MatView_SeqAIJ(Av,sviewer)); 1402 } 1403 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1404 PetscCall(PetscViewerFlush(viewer)); 1405 PetscCall(MatDestroy(&A)); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1411 { 1412 PetscBool iascii,isdraw,issocket,isbinary; 1413 1414 PetscFunctionBegin; 1415 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1416 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1417 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1418 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1419 if (iascii || isdraw || isbinary || issocket) { 1420 PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1421 } 1422 PetscFunctionReturn(0); 1423 } 1424 1425 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1426 { 1427 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1428 Vec bb1 = NULL; 1429 PetscBool hasop; 1430 1431 PetscFunctionBegin; 1432 if (flag == SOR_APPLY_UPPER) { 1433 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1438 PetscCall(VecDuplicate(bb,&bb1)); 1439 } 1440 1441 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1442 if (flag & SOR_ZERO_INITIAL_GUESS) { 1443 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1444 its--; 1445 } 1446 1447 while (its--) { 1448 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1449 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1450 1451 /* update rhs: bb1 = bb - B*x */ 1452 PetscCall(VecScale(mat->lvec,-1.0)); 1453 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1454 1455 /* local sweep */ 1456 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 1457 } 1458 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1459 if (flag & SOR_ZERO_INITIAL_GUESS) { 1460 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1461 its--; 1462 } 1463 while (its--) { 1464 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1465 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1466 1467 /* update rhs: bb1 = bb - B*x */ 1468 PetscCall(VecScale(mat->lvec,-1.0)); 1469 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1470 1471 /* local sweep */ 1472 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 1473 } 1474 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1475 if (flag & SOR_ZERO_INITIAL_GUESS) { 1476 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1477 its--; 1478 } 1479 while (its--) { 1480 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1481 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1482 1483 /* update rhs: bb1 = bb - B*x */ 1484 PetscCall(VecScale(mat->lvec,-1.0)); 1485 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1486 1487 /* local sweep */ 1488 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 1489 } 1490 } else if (flag & SOR_EISENSTAT) { 1491 Vec xx1; 1492 1493 PetscCall(VecDuplicate(bb,&xx1)); 1494 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1495 1496 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1497 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1498 if (!mat->diag) { 1499 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 1500 PetscCall(MatGetDiagonal(matin,mat->diag)); 1501 } 1502 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1503 if (hasop) { 1504 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1505 } else { 1506 PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1507 } 1508 PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1509 1510 PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1511 1512 /* local sweep */ 1513 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 1514 PetscCall(VecAXPY(xx,1.0,xx1)); 1515 PetscCall(VecDestroy(&xx1)); 1516 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1517 1518 PetscCall(VecDestroy(&bb1)); 1519 1520 matin->factorerrortype = mat->A->factorerrortype; 1521 PetscFunctionReturn(0); 1522 } 1523 1524 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1525 { 1526 Mat aA,aB,Aperm; 1527 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1528 PetscScalar *aa,*ba; 1529 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1530 PetscSF rowsf,sf; 1531 IS parcolp = NULL; 1532 PetscBool done; 1533 1534 PetscFunctionBegin; 1535 PetscCall(MatGetLocalSize(A,&m,&n)); 1536 PetscCall(ISGetIndices(rowp,&rwant)); 1537 PetscCall(ISGetIndices(colp,&cwant)); 1538 PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 1539 1540 /* Invert row permutation to find out where my rows should go */ 1541 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 1542 PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 1543 PetscCall(PetscSFSetFromOptions(rowsf)); 1544 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1545 PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1546 PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1547 1548 /* Invert column permutation to find out where my columns should go */ 1549 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1550 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 1551 PetscCall(PetscSFSetFromOptions(sf)); 1552 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1553 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1554 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1555 PetscCall(PetscSFDestroy(&sf)); 1556 1557 PetscCall(ISRestoreIndices(rowp,&rwant)); 1558 PetscCall(ISRestoreIndices(colp,&cwant)); 1559 PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 1560 1561 /* Find out where my gcols should go */ 1562 PetscCall(MatGetSize(aB,NULL,&ng)); 1563 PetscCall(PetscMalloc1(ng,&gcdest)); 1564 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1565 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 1566 PetscCall(PetscSFSetFromOptions(sf)); 1567 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1568 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1569 PetscCall(PetscSFDestroy(&sf)); 1570 1571 PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 1572 PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1573 PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1574 for (i=0; i<m; i++) { 1575 PetscInt row = rdest[i]; 1576 PetscMPIInt rowner; 1577 PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 1578 for (j=ai[i]; j<ai[i+1]; j++) { 1579 PetscInt col = cdest[aj[j]]; 1580 PetscMPIInt cowner; 1581 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 1582 if (rowner == cowner) dnnz[i]++; 1583 else onnz[i]++; 1584 } 1585 for (j=bi[i]; j<bi[i+1]; j++) { 1586 PetscInt col = gcdest[bj[j]]; 1587 PetscMPIInt cowner; 1588 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 1589 if (rowner == cowner) dnnz[i]++; 1590 else onnz[i]++; 1591 } 1592 } 1593 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1594 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1595 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1596 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1597 PetscCall(PetscSFDestroy(&rowsf)); 1598 1599 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 1600 PetscCall(MatSeqAIJGetArray(aA,&aa)); 1601 PetscCall(MatSeqAIJGetArray(aB,&ba)); 1602 for (i=0; i<m; i++) { 1603 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1604 PetscInt j0,rowlen; 1605 rowlen = ai[i+1] - ai[i]; 1606 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1607 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1608 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1609 } 1610 rowlen = bi[i+1] - bi[i]; 1611 for (j0=j=0; j<rowlen; j0=j) { 1612 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1613 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1614 } 1615 } 1616 PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 1617 PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 1618 PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1619 PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1620 PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 1621 PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 1622 PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 1623 PetscCall(PetscFree3(work,rdest,cdest)); 1624 PetscCall(PetscFree(gcdest)); 1625 if (parcolp) PetscCall(ISDestroy(&colp)); 1626 *B = Aperm; 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1631 { 1632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1633 1634 PetscFunctionBegin; 1635 PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1636 if (ghosts) *ghosts = aij->garray; 1637 PetscFunctionReturn(0); 1638 } 1639 1640 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1641 { 1642 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1643 Mat A = mat->A,B = mat->B; 1644 PetscLogDouble isend[5],irecv[5]; 1645 1646 PetscFunctionBegin; 1647 info->block_size = 1.0; 1648 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1649 1650 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1651 isend[3] = info->memory; isend[4] = info->mallocs; 1652 1653 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1654 1655 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1656 isend[3] += info->memory; isend[4] += info->mallocs; 1657 if (flag == MAT_LOCAL) { 1658 info->nz_used = isend[0]; 1659 info->nz_allocated = isend[1]; 1660 info->nz_unneeded = isend[2]; 1661 info->memory = isend[3]; 1662 info->mallocs = isend[4]; 1663 } else if (flag == MAT_GLOBAL_MAX) { 1664 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1665 1666 info->nz_used = irecv[0]; 1667 info->nz_allocated = irecv[1]; 1668 info->nz_unneeded = irecv[2]; 1669 info->memory = irecv[3]; 1670 info->mallocs = irecv[4]; 1671 } else if (flag == MAT_GLOBAL_SUM) { 1672 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1673 1674 info->nz_used = irecv[0]; 1675 info->nz_allocated = irecv[1]; 1676 info->nz_unneeded = irecv[2]; 1677 info->memory = irecv[3]; 1678 info->mallocs = irecv[4]; 1679 } 1680 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1681 info->fill_ratio_needed = 0; 1682 info->factor_mallocs = 0; 1683 PetscFunctionReturn(0); 1684 } 1685 1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1687 { 1688 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1689 1690 PetscFunctionBegin; 1691 switch (op) { 1692 case MAT_NEW_NONZERO_LOCATIONS: 1693 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1694 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1695 case MAT_KEEP_NONZERO_PATTERN: 1696 case MAT_NEW_NONZERO_LOCATION_ERR: 1697 case MAT_USE_INODES: 1698 case MAT_IGNORE_ZERO_ENTRIES: 1699 case MAT_FORM_EXPLICIT_TRANSPOSE: 1700 MatCheckPreallocated(A,1); 1701 PetscCall(MatSetOption(a->A,op,flg)); 1702 PetscCall(MatSetOption(a->B,op,flg)); 1703 break; 1704 case MAT_ROW_ORIENTED: 1705 MatCheckPreallocated(A,1); 1706 a->roworiented = flg; 1707 1708 PetscCall(MatSetOption(a->A,op,flg)); 1709 PetscCall(MatSetOption(a->B,op,flg)); 1710 break; 1711 case MAT_FORCE_DIAGONAL_ENTRIES: 1712 case MAT_SORTED_FULL: 1713 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1714 break; 1715 case MAT_IGNORE_OFF_PROC_ENTRIES: 1716 a->donotstash = flg; 1717 break; 1718 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1719 case MAT_SPD: 1720 case MAT_SYMMETRIC: 1721 case MAT_STRUCTURALLY_SYMMETRIC: 1722 case MAT_HERMITIAN: 1723 case MAT_SYMMETRY_ETERNAL: 1724 break; 1725 case MAT_SUBMAT_SINGLEIS: 1726 A->submat_singleis = flg; 1727 break; 1728 case MAT_STRUCTURE_ONLY: 1729 /* The option is handled directly by MatSetOption() */ 1730 break; 1731 default: 1732 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1733 } 1734 PetscFunctionReturn(0); 1735 } 1736 1737 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1738 { 1739 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1740 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1741 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1742 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1743 PetscInt *cmap,*idx_p; 1744 1745 PetscFunctionBegin; 1746 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1747 mat->getrowactive = PETSC_TRUE; 1748 1749 if (!mat->rowvalues && (idx || v)) { 1750 /* 1751 allocate enough space to hold information from the longest row. 1752 */ 1753 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1754 PetscInt max = 1,tmp; 1755 for (i=0; i<matin->rmap->n; i++) { 1756 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1757 if (max < tmp) max = tmp; 1758 } 1759 PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 1760 } 1761 1762 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1763 lrow = row - rstart; 1764 1765 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1766 if (!v) {pvA = NULL; pvB = NULL;} 1767 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1768 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1769 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1770 nztot = nzA + nzB; 1771 1772 cmap = mat->garray; 1773 if (v || idx) { 1774 if (nztot) { 1775 /* Sort by increasing column numbers, assuming A and B already sorted */ 1776 PetscInt imark = -1; 1777 if (v) { 1778 *v = v_p = mat->rowvalues; 1779 for (i=0; i<nzB; i++) { 1780 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1781 else break; 1782 } 1783 imark = i; 1784 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1785 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1786 } 1787 if (idx) { 1788 *idx = idx_p = mat->rowindices; 1789 if (imark > -1) { 1790 for (i=0; i<imark; i++) { 1791 idx_p[i] = cmap[cworkB[i]]; 1792 } 1793 } else { 1794 for (i=0; i<nzB; i++) { 1795 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1796 else break; 1797 } 1798 imark = i; 1799 } 1800 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1801 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1802 } 1803 } else { 1804 if (idx) *idx = NULL; 1805 if (v) *v = NULL; 1806 } 1807 } 1808 *nz = nztot; 1809 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1810 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1811 PetscFunctionReturn(0); 1812 } 1813 1814 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1815 { 1816 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1817 1818 PetscFunctionBegin; 1819 PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1820 aij->getrowactive = PETSC_FALSE; 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1825 { 1826 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1827 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1828 PetscInt i,j,cstart = mat->cmap->rstart; 1829 PetscReal sum = 0.0; 1830 const MatScalar *v,*amata,*bmata; 1831 1832 PetscFunctionBegin; 1833 if (aij->size == 1) { 1834 PetscCall(MatNorm(aij->A,type,norm)); 1835 } else { 1836 PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 1837 PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 1838 if (type == NORM_FROBENIUS) { 1839 v = amata; 1840 for (i=0; i<amat->nz; i++) { 1841 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1842 } 1843 v = bmata; 1844 for (i=0; i<bmat->nz; i++) { 1845 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1846 } 1847 PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1848 *norm = PetscSqrtReal(*norm); 1849 PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 1850 } else if (type == NORM_1) { /* max column norm */ 1851 PetscReal *tmp,*tmp2; 1852 PetscInt *jj,*garray = aij->garray; 1853 PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 1854 PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 1855 *norm = 0.0; 1856 v = amata; jj = amat->j; 1857 for (j=0; j<amat->nz; j++) { 1858 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1859 } 1860 v = bmata; jj = bmat->j; 1861 for (j=0; j<bmat->nz; j++) { 1862 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1863 } 1864 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1865 for (j=0; j<mat->cmap->N; j++) { 1866 if (tmp2[j] > *norm) *norm = tmp2[j]; 1867 } 1868 PetscCall(PetscFree(tmp)); 1869 PetscCall(PetscFree(tmp2)); 1870 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1871 } else if (type == NORM_INFINITY) { /* max row norm */ 1872 PetscReal ntemp = 0.0; 1873 for (j=0; j<aij->A->rmap->n; j++) { 1874 v = amata + amat->i[j]; 1875 sum = 0.0; 1876 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1877 sum += PetscAbsScalar(*v); v++; 1878 } 1879 v = bmata + bmat->i[j]; 1880 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1881 sum += PetscAbsScalar(*v); v++; 1882 } 1883 if (sum > ntemp) ntemp = sum; 1884 } 1885 PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 1886 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1887 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1888 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 1889 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 1890 } 1891 PetscFunctionReturn(0); 1892 } 1893 1894 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1895 { 1896 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1897 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1898 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1899 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1900 Mat B,A_diag,*B_diag; 1901 const MatScalar *pbv,*bv; 1902 1903 PetscFunctionBegin; 1904 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1905 ai = Aloc->i; aj = Aloc->j; 1906 bi = Bloc->i; bj = Bloc->j; 1907 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1908 PetscInt *d_nnz,*g_nnz,*o_nnz; 1909 PetscSFNode *oloc; 1910 PETSC_UNUSED PetscSF sf; 1911 1912 PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 1913 /* compute d_nnz for preallocation */ 1914 PetscCall(PetscArrayzero(d_nnz,na)); 1915 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1916 /* compute local off-diagonal contributions */ 1917 PetscCall(PetscArrayzero(g_nnz,nb)); 1918 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1919 /* map those to global */ 1920 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1921 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 1922 PetscCall(PetscSFSetFromOptions(sf)); 1923 PetscCall(PetscArrayzero(o_nnz,na)); 1924 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1925 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1926 PetscCall(PetscSFDestroy(&sf)); 1927 1928 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1929 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1930 PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1931 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1932 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 1933 PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1934 } else { 1935 B = *matout; 1936 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1937 } 1938 1939 b = (Mat_MPIAIJ*)B->data; 1940 A_diag = a->A; 1941 B_diag = &b->A; 1942 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1943 A_diag_ncol = A_diag->cmap->N; 1944 B_diag_ilen = sub_B_diag->ilen; 1945 B_diag_i = sub_B_diag->i; 1946 1947 /* Set ilen for diagonal of B */ 1948 for (i=0; i<A_diag_ncol; i++) { 1949 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1950 } 1951 1952 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1953 very quickly (=without using MatSetValues), because all writes are local. */ 1954 PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1955 1956 /* copy over the B part */ 1957 PetscCall(PetscMalloc1(bi[mb],&cols)); 1958 PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1959 pbv = bv; 1960 row = A->rmap->rstart; 1961 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1962 cols_tmp = cols; 1963 for (i=0; i<mb; i++) { 1964 ncol = bi[i+1]-bi[i]; 1965 PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 1966 row++; 1967 pbv += ncol; cols_tmp += ncol; 1968 } 1969 PetscCall(PetscFree(cols)); 1970 PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1971 1972 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1973 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1974 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1975 *matout = B; 1976 } else { 1977 PetscCall(MatHeaderMerge(A,&B)); 1978 } 1979 PetscFunctionReturn(0); 1980 } 1981 1982 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1983 { 1984 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1985 Mat a = aij->A,b = aij->B; 1986 PetscInt s1,s2,s3; 1987 1988 PetscFunctionBegin; 1989 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1990 if (rr) { 1991 PetscCall(VecGetLocalSize(rr,&s1)); 1992 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1993 /* Overlap communication with computation. */ 1994 PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1995 } 1996 if (ll) { 1997 PetscCall(VecGetLocalSize(ll,&s1)); 1998 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1999 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 2000 } 2001 /* scale the diagonal block */ 2002 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 2003 2004 if (rr) { 2005 /* Do a scatter end and then right scale the off-diagonal block */ 2006 PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2007 PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 2008 } 2009 PetscFunctionReturn(0); 2010 } 2011 2012 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2013 { 2014 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2015 2016 PetscFunctionBegin; 2017 PetscCall(MatSetUnfactored(a->A)); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2022 { 2023 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2024 Mat a,b,c,d; 2025 PetscBool flg; 2026 2027 PetscFunctionBegin; 2028 a = matA->A; b = matA->B; 2029 c = matB->A; d = matB->B; 2030 2031 PetscCall(MatEqual(a,c,&flg)); 2032 if (flg) { 2033 PetscCall(MatEqual(b,d,&flg)); 2034 } 2035 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2036 PetscFunctionReturn(0); 2037 } 2038 2039 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2040 { 2041 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2042 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2043 2044 PetscFunctionBegin; 2045 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2046 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2047 /* because of the column compression in the off-processor part of the matrix a->B, 2048 the number of columns in a->B and b->B may be different, hence we cannot call 2049 the MatCopy() directly on the two parts. If need be, we can provide a more 2050 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2051 then copying the submatrices */ 2052 PetscCall(MatCopy_Basic(A,B,str)); 2053 } else { 2054 PetscCall(MatCopy(a->A,b->A,str)); 2055 PetscCall(MatCopy(a->B,b->B,str)); 2056 } 2057 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2058 PetscFunctionReturn(0); 2059 } 2060 2061 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2062 { 2063 PetscFunctionBegin; 2064 PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /* 2069 Computes the number of nonzeros per row needed for preallocation when X and Y 2070 have different nonzero structure. 2071 */ 2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2073 { 2074 PetscInt i,j,k,nzx,nzy; 2075 2076 PetscFunctionBegin; 2077 /* Set the number of nonzeros in the new matrix */ 2078 for (i=0; i<m; i++) { 2079 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2080 nzx = xi[i+1] - xi[i]; 2081 nzy = yi[i+1] - yi[i]; 2082 nnz[i] = 0; 2083 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2084 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2085 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2086 nnz[i]++; 2087 } 2088 for (; k<nzy; k++) nnz[i]++; 2089 } 2090 PetscFunctionReturn(0); 2091 } 2092 2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2095 { 2096 PetscInt m = Y->rmap->N; 2097 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2098 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2099 2100 PetscFunctionBegin; 2101 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2106 { 2107 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2108 2109 PetscFunctionBegin; 2110 if (str == SAME_NONZERO_PATTERN) { 2111 PetscCall(MatAXPY(yy->A,a,xx->A,str)); 2112 PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2113 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2114 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2115 } else { 2116 Mat B; 2117 PetscInt *nnz_d,*nnz_o; 2118 2119 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 2120 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 2121 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2122 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2123 PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 2124 PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 2125 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 2126 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 2127 PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 2128 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2129 PetscCall(MatHeaderMerge(Y,&B)); 2130 PetscCall(PetscFree(nnz_d)); 2131 PetscCall(PetscFree(nnz_o)); 2132 } 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2137 2138 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2139 { 2140 PetscFunctionBegin; 2141 if (PetscDefined(USE_COMPLEX)) { 2142 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2143 2144 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2145 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2146 } 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2151 { 2152 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2153 2154 PetscFunctionBegin; 2155 PetscCall(MatRealPart(a->A)); 2156 PetscCall(MatRealPart(a->B)); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2161 { 2162 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2163 2164 PetscFunctionBegin; 2165 PetscCall(MatImaginaryPart(a->A)); 2166 PetscCall(MatImaginaryPart(a->B)); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2171 { 2172 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2173 PetscInt i,*idxb = NULL,m = A->rmap->n; 2174 PetscScalar *va,*vv; 2175 Vec vB,vA; 2176 const PetscScalar *vb; 2177 2178 PetscFunctionBegin; 2179 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 2180 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2181 2182 PetscCall(VecGetArrayWrite(vA,&va)); 2183 if (idx) { 2184 for (i=0; i<m; i++) { 2185 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2186 } 2187 } 2188 2189 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 2190 PetscCall(PetscMalloc1(m,&idxb)); 2191 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2192 2193 PetscCall(VecGetArrayWrite(v,&vv)); 2194 PetscCall(VecGetArrayRead(vB,&vb)); 2195 for (i=0; i<m; i++) { 2196 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2197 vv[i] = vb[i]; 2198 if (idx) idx[i] = a->garray[idxb[i]]; 2199 } else { 2200 vv[i] = va[i]; 2201 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2202 idx[i] = a->garray[idxb[i]]; 2203 } 2204 } 2205 PetscCall(VecRestoreArrayWrite(vA,&vv)); 2206 PetscCall(VecRestoreArrayWrite(vA,&va)); 2207 PetscCall(VecRestoreArrayRead(vB,&vb)); 2208 PetscCall(PetscFree(idxb)); 2209 PetscCall(VecDestroy(&vA)); 2210 PetscCall(VecDestroy(&vB)); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2215 { 2216 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2217 PetscInt m = A->rmap->n,n = A->cmap->n; 2218 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2219 PetscInt *cmap = mat->garray; 2220 PetscInt *diagIdx, *offdiagIdx; 2221 Vec diagV, offdiagV; 2222 PetscScalar *a, *diagA, *offdiagA; 2223 const PetscScalar *ba,*bav; 2224 PetscInt r,j,col,ncols,*bi,*bj; 2225 Mat B = mat->B; 2226 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2227 2228 PetscFunctionBegin; 2229 /* When a process holds entire A and other processes have no entry */ 2230 if (A->cmap->N == n) { 2231 PetscCall(VecGetArrayWrite(v,&diagA)); 2232 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2233 PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 2234 PetscCall(VecDestroy(&diagV)); 2235 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2236 PetscFunctionReturn(0); 2237 } else if (n == 0) { 2238 if (m) { 2239 PetscCall(VecGetArrayWrite(v,&a)); 2240 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2241 PetscCall(VecRestoreArrayWrite(v,&a)); 2242 } 2243 PetscFunctionReturn(0); 2244 } 2245 2246 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2248 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2249 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2250 2251 /* Get offdiagIdx[] for implicit 0.0 */ 2252 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2253 ba = bav; 2254 bi = b->i; 2255 bj = b->j; 2256 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2257 for (r = 0; r < m; r++) { 2258 ncols = bi[r+1] - bi[r]; 2259 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2260 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2261 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2262 offdiagA[r] = 0.0; 2263 2264 /* Find first hole in the cmap */ 2265 for (j=0; j<ncols; j++) { 2266 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2267 if (col > j && j < cstart) { 2268 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2269 break; 2270 } else if (col > j + n && j >= cstart) { 2271 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2272 break; 2273 } 2274 } 2275 if (j == ncols && ncols < A->cmap->N - n) { 2276 /* a hole is outside compressed Bcols */ 2277 if (ncols == 0) { 2278 if (cstart) { 2279 offdiagIdx[r] = 0; 2280 } else offdiagIdx[r] = cend; 2281 } else { /* ncols > 0 */ 2282 offdiagIdx[r] = cmap[ncols-1] + 1; 2283 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2284 } 2285 } 2286 } 2287 2288 for (j=0; j<ncols; j++) { 2289 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2290 ba++; bj++; 2291 } 2292 } 2293 2294 PetscCall(VecGetArrayWrite(v, &a)); 2295 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2296 for (r = 0; r < m; ++r) { 2297 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2298 a[r] = diagA[r]; 2299 if (idx) idx[r] = cstart + diagIdx[r]; 2300 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2301 a[r] = diagA[r]; 2302 if (idx) { 2303 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2304 idx[r] = cstart + diagIdx[r]; 2305 } else idx[r] = offdiagIdx[r]; 2306 } 2307 } else { 2308 a[r] = offdiagA[r]; 2309 if (idx) idx[r] = offdiagIdx[r]; 2310 } 2311 } 2312 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2313 PetscCall(VecRestoreArrayWrite(v, &a)); 2314 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2315 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2316 PetscCall(VecDestroy(&diagV)); 2317 PetscCall(VecDestroy(&offdiagV)); 2318 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2323 { 2324 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2325 PetscInt m = A->rmap->n,n = A->cmap->n; 2326 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2327 PetscInt *cmap = mat->garray; 2328 PetscInt *diagIdx, *offdiagIdx; 2329 Vec diagV, offdiagV; 2330 PetscScalar *a, *diagA, *offdiagA; 2331 const PetscScalar *ba,*bav; 2332 PetscInt r,j,col,ncols,*bi,*bj; 2333 Mat B = mat->B; 2334 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2335 2336 PetscFunctionBegin; 2337 /* When a process holds entire A and other processes have no entry */ 2338 if (A->cmap->N == n) { 2339 PetscCall(VecGetArrayWrite(v,&diagA)); 2340 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2341 PetscCall(MatGetRowMin(mat->A,diagV,idx)); 2342 PetscCall(VecDestroy(&diagV)); 2343 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2344 PetscFunctionReturn(0); 2345 } else if (n == 0) { 2346 if (m) { 2347 PetscCall(VecGetArrayWrite(v,&a)); 2348 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2349 PetscCall(VecRestoreArrayWrite(v,&a)); 2350 } 2351 PetscFunctionReturn(0); 2352 } 2353 2354 PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 2355 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2356 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2357 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2358 2359 /* Get offdiagIdx[] for implicit 0.0 */ 2360 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2361 ba = bav; 2362 bi = b->i; 2363 bj = b->j; 2364 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2365 for (r = 0; r < m; r++) { 2366 ncols = bi[r+1] - bi[r]; 2367 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2368 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2369 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2370 offdiagA[r] = 0.0; 2371 2372 /* Find first hole in the cmap */ 2373 for (j=0; j<ncols; j++) { 2374 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2375 if (col > j && j < cstart) { 2376 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2377 break; 2378 } else if (col > j + n && j >= cstart) { 2379 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2380 break; 2381 } 2382 } 2383 if (j == ncols && ncols < A->cmap->N - n) { 2384 /* a hole is outside compressed Bcols */ 2385 if (ncols == 0) { 2386 if (cstart) { 2387 offdiagIdx[r] = 0; 2388 } else offdiagIdx[r] = cend; 2389 } else { /* ncols > 0 */ 2390 offdiagIdx[r] = cmap[ncols-1] + 1; 2391 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2392 } 2393 } 2394 } 2395 2396 for (j=0; j<ncols; j++) { 2397 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2398 ba++; bj++; 2399 } 2400 } 2401 2402 PetscCall(VecGetArrayWrite(v, &a)); 2403 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2404 for (r = 0; r < m; ++r) { 2405 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2406 a[r] = diagA[r]; 2407 if (idx) idx[r] = cstart + diagIdx[r]; 2408 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2409 a[r] = diagA[r]; 2410 if (idx) { 2411 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2412 idx[r] = cstart + diagIdx[r]; 2413 } else idx[r] = offdiagIdx[r]; 2414 } 2415 } else { 2416 a[r] = offdiagA[r]; 2417 if (idx) idx[r] = offdiagIdx[r]; 2418 } 2419 } 2420 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2421 PetscCall(VecRestoreArrayWrite(v, &a)); 2422 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2423 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2424 PetscCall(VecDestroy(&diagV)); 2425 PetscCall(VecDestroy(&offdiagV)); 2426 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2431 { 2432 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2433 PetscInt m = A->rmap->n,n = A->cmap->n; 2434 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2435 PetscInt *cmap = mat->garray; 2436 PetscInt *diagIdx, *offdiagIdx; 2437 Vec diagV, offdiagV; 2438 PetscScalar *a, *diagA, *offdiagA; 2439 const PetscScalar *ba,*bav; 2440 PetscInt r,j,col,ncols,*bi,*bj; 2441 Mat B = mat->B; 2442 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2443 2444 PetscFunctionBegin; 2445 /* When a process holds entire A and other processes have no entry */ 2446 if (A->cmap->N == n) { 2447 PetscCall(VecGetArrayWrite(v,&diagA)); 2448 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2449 PetscCall(MatGetRowMax(mat->A,diagV,idx)); 2450 PetscCall(VecDestroy(&diagV)); 2451 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2452 PetscFunctionReturn(0); 2453 } else if (n == 0) { 2454 if (m) { 2455 PetscCall(VecGetArrayWrite(v,&a)); 2456 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2457 PetscCall(VecRestoreArrayWrite(v,&a)); 2458 } 2459 PetscFunctionReturn(0); 2460 } 2461 2462 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2463 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2464 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2465 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2466 2467 /* Get offdiagIdx[] for implicit 0.0 */ 2468 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2469 ba = bav; 2470 bi = b->i; 2471 bj = b->j; 2472 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2473 for (r = 0; r < m; r++) { 2474 ncols = bi[r+1] - bi[r]; 2475 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2476 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2477 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2478 offdiagA[r] = 0.0; 2479 2480 /* Find first hole in the cmap */ 2481 for (j=0; j<ncols; j++) { 2482 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2483 if (col > j && j < cstart) { 2484 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2485 break; 2486 } else if (col > j + n && j >= cstart) { 2487 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2488 break; 2489 } 2490 } 2491 if (j == ncols && ncols < A->cmap->N - n) { 2492 /* a hole is outside compressed Bcols */ 2493 if (ncols == 0) { 2494 if (cstart) { 2495 offdiagIdx[r] = 0; 2496 } else offdiagIdx[r] = cend; 2497 } else { /* ncols > 0 */ 2498 offdiagIdx[r] = cmap[ncols-1] + 1; 2499 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2500 } 2501 } 2502 } 2503 2504 for (j=0; j<ncols; j++) { 2505 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2506 ba++; bj++; 2507 } 2508 } 2509 2510 PetscCall(VecGetArrayWrite(v, &a)); 2511 PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 2512 for (r = 0; r < m; ++r) { 2513 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2514 a[r] = diagA[r]; 2515 if (idx) idx[r] = cstart + diagIdx[r]; 2516 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2517 a[r] = diagA[r]; 2518 if (idx) { 2519 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2520 idx[r] = cstart + diagIdx[r]; 2521 } else idx[r] = offdiagIdx[r]; 2522 } 2523 } else { 2524 a[r] = offdiagA[r]; 2525 if (idx) idx[r] = offdiagIdx[r]; 2526 } 2527 } 2528 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2529 PetscCall(VecRestoreArrayWrite(v, &a)); 2530 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2531 PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 2532 PetscCall(VecDestroy(&diagV)); 2533 PetscCall(VecDestroy(&offdiagV)); 2534 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2539 { 2540 Mat *dummy; 2541 2542 PetscFunctionBegin; 2543 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2544 *newmat = *dummy; 2545 PetscCall(PetscFree(dummy)); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2550 { 2551 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2552 2553 PetscFunctionBegin; 2554 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2555 A->factorerrortype = a->A->factorerrortype; 2556 PetscFunctionReturn(0); 2557 } 2558 2559 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2560 { 2561 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2562 2563 PetscFunctionBegin; 2564 PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2565 PetscCall(MatSetRandom(aij->A,rctx)); 2566 if (x->assembled) { 2567 PetscCall(MatSetRandom(aij->B,rctx)); 2568 } else { 2569 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2570 } 2571 PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 2572 PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2577 { 2578 PetscFunctionBegin; 2579 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2580 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2581 PetscFunctionReturn(0); 2582 } 2583 2584 /*@ 2585 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2586 2587 Collective on Mat 2588 2589 Input Parameters: 2590 + A - the matrix 2591 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2592 2593 Level: advanced 2594 2595 @*/ 2596 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2597 { 2598 PetscFunctionBegin; 2599 PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2600 PetscFunctionReturn(0); 2601 } 2602 2603 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2604 { 2605 PetscBool sc = PETSC_FALSE,flg; 2606 2607 PetscFunctionBegin; 2608 PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options"); 2609 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2610 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2611 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2612 PetscOptionsHeadEnd(); 2613 PetscFunctionReturn(0); 2614 } 2615 2616 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2617 { 2618 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2619 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2620 2621 PetscFunctionBegin; 2622 if (!Y->preallocated) { 2623 PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2624 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2625 PetscInt nonew = aij->nonew; 2626 PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2627 aij->nonew = nonew; 2628 } 2629 PetscCall(MatShift_Basic(Y,a)); 2630 PetscFunctionReturn(0); 2631 } 2632 2633 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2634 { 2635 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2636 2637 PetscFunctionBegin; 2638 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2639 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2640 if (d) { 2641 PetscInt rstart; 2642 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2643 *d += rstart; 2644 2645 } 2646 PetscFunctionReturn(0); 2647 } 2648 2649 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2650 { 2651 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2652 2653 PetscFunctionBegin; 2654 PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2655 PetscFunctionReturn(0); 2656 } 2657 2658 /* -------------------------------------------------------------------*/ 2659 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2660 MatGetRow_MPIAIJ, 2661 MatRestoreRow_MPIAIJ, 2662 MatMult_MPIAIJ, 2663 /* 4*/ MatMultAdd_MPIAIJ, 2664 MatMultTranspose_MPIAIJ, 2665 MatMultTransposeAdd_MPIAIJ, 2666 NULL, 2667 NULL, 2668 NULL, 2669 /*10*/ NULL, 2670 NULL, 2671 NULL, 2672 MatSOR_MPIAIJ, 2673 MatTranspose_MPIAIJ, 2674 /*15*/ MatGetInfo_MPIAIJ, 2675 MatEqual_MPIAIJ, 2676 MatGetDiagonal_MPIAIJ, 2677 MatDiagonalScale_MPIAIJ, 2678 MatNorm_MPIAIJ, 2679 /*20*/ MatAssemblyBegin_MPIAIJ, 2680 MatAssemblyEnd_MPIAIJ, 2681 MatSetOption_MPIAIJ, 2682 MatZeroEntries_MPIAIJ, 2683 /*24*/ MatZeroRows_MPIAIJ, 2684 NULL, 2685 NULL, 2686 NULL, 2687 NULL, 2688 /*29*/ MatSetUp_MPIAIJ, 2689 NULL, 2690 NULL, 2691 MatGetDiagonalBlock_MPIAIJ, 2692 NULL, 2693 /*34*/ MatDuplicate_MPIAIJ, 2694 NULL, 2695 NULL, 2696 NULL, 2697 NULL, 2698 /*39*/ MatAXPY_MPIAIJ, 2699 MatCreateSubMatrices_MPIAIJ, 2700 MatIncreaseOverlap_MPIAIJ, 2701 MatGetValues_MPIAIJ, 2702 MatCopy_MPIAIJ, 2703 /*44*/ MatGetRowMax_MPIAIJ, 2704 MatScale_MPIAIJ, 2705 MatShift_MPIAIJ, 2706 MatDiagonalSet_MPIAIJ, 2707 MatZeroRowsColumns_MPIAIJ, 2708 /*49*/ MatSetRandom_MPIAIJ, 2709 MatGetRowIJ_MPIAIJ, 2710 MatRestoreRowIJ_MPIAIJ, 2711 NULL, 2712 NULL, 2713 /*54*/ MatFDColoringCreate_MPIXAIJ, 2714 NULL, 2715 MatSetUnfactored_MPIAIJ, 2716 MatPermute_MPIAIJ, 2717 NULL, 2718 /*59*/ MatCreateSubMatrix_MPIAIJ, 2719 MatDestroy_MPIAIJ, 2720 MatView_MPIAIJ, 2721 NULL, 2722 NULL, 2723 /*64*/ NULL, 2724 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2725 NULL, 2726 NULL, 2727 NULL, 2728 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2729 MatGetRowMinAbs_MPIAIJ, 2730 NULL, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*75*/ MatFDColoringApply_AIJ, 2735 MatSetFromOptions_MPIAIJ, 2736 NULL, 2737 NULL, 2738 MatFindZeroDiagonals_MPIAIJ, 2739 /*80*/ NULL, 2740 NULL, 2741 NULL, 2742 /*83*/ MatLoad_MPIAIJ, 2743 MatIsSymmetric_MPIAIJ, 2744 NULL, 2745 NULL, 2746 NULL, 2747 NULL, 2748 /*89*/ NULL, 2749 NULL, 2750 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2751 NULL, 2752 NULL, 2753 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2754 NULL, 2755 NULL, 2756 NULL, 2757 MatBindToCPU_MPIAIJ, 2758 /*99*/ MatProductSetFromOptions_MPIAIJ, 2759 NULL, 2760 NULL, 2761 MatConjugate_MPIAIJ, 2762 NULL, 2763 /*104*/MatSetValuesRow_MPIAIJ, 2764 MatRealPart_MPIAIJ, 2765 MatImaginaryPart_MPIAIJ, 2766 NULL, 2767 NULL, 2768 /*109*/NULL, 2769 NULL, 2770 MatGetRowMin_MPIAIJ, 2771 NULL, 2772 MatMissingDiagonal_MPIAIJ, 2773 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2774 NULL, 2775 MatGetGhosts_MPIAIJ, 2776 NULL, 2777 NULL, 2778 /*119*/MatMultDiagonalBlock_MPIAIJ, 2779 NULL, 2780 NULL, 2781 NULL, 2782 MatGetMultiProcBlock_MPIAIJ, 2783 /*124*/MatFindNonzeroRows_MPIAIJ, 2784 MatGetColumnReductions_MPIAIJ, 2785 MatInvertBlockDiagonal_MPIAIJ, 2786 MatInvertVariableBlockDiagonal_MPIAIJ, 2787 MatCreateSubMatricesMPI_MPIAIJ, 2788 /*129*/NULL, 2789 NULL, 2790 NULL, 2791 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2792 NULL, 2793 /*134*/NULL, 2794 NULL, 2795 NULL, 2796 NULL, 2797 NULL, 2798 /*139*/MatSetBlockSizes_MPIAIJ, 2799 NULL, 2800 NULL, 2801 MatFDColoringSetUp_MPIXAIJ, 2802 MatFindOffBlockDiagonalEntries_MPIAIJ, 2803 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2804 /*145*/NULL, 2805 NULL, 2806 NULL 2807 }; 2808 2809 /* ----------------------------------------------------------------------------------------*/ 2810 2811 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2812 { 2813 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2814 2815 PetscFunctionBegin; 2816 PetscCall(MatStoreValues(aij->A)); 2817 PetscCall(MatStoreValues(aij->B)); 2818 PetscFunctionReturn(0); 2819 } 2820 2821 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2822 { 2823 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2824 2825 PetscFunctionBegin; 2826 PetscCall(MatRetrieveValues(aij->A)); 2827 PetscCall(MatRetrieveValues(aij->B)); 2828 PetscFunctionReturn(0); 2829 } 2830 2831 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2832 { 2833 Mat_MPIAIJ *b; 2834 PetscMPIInt size; 2835 2836 PetscFunctionBegin; 2837 PetscCall(PetscLayoutSetUp(B->rmap)); 2838 PetscCall(PetscLayoutSetUp(B->cmap)); 2839 b = (Mat_MPIAIJ*)B->data; 2840 2841 #if defined(PETSC_USE_CTABLE) 2842 PetscCall(PetscTableDestroy(&b->colmap)); 2843 #else 2844 PetscCall(PetscFree(b->colmap)); 2845 #endif 2846 PetscCall(PetscFree(b->garray)); 2847 PetscCall(VecDestroy(&b->lvec)); 2848 PetscCall(VecScatterDestroy(&b->Mvctx)); 2849 2850 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2851 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2852 PetscCall(MatDestroy(&b->B)); 2853 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2854 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2855 PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 2856 PetscCall(MatSetType(b->B,MATSEQAIJ)); 2857 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2858 2859 if (!B->preallocated) { 2860 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2861 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2862 PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 2863 PetscCall(MatSetType(b->A,MATSEQAIJ)); 2864 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2865 } 2866 2867 PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 2868 PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2869 B->preallocated = PETSC_TRUE; 2870 B->was_assembled = PETSC_FALSE; 2871 B->assembled = PETSC_FALSE; 2872 PetscFunctionReturn(0); 2873 } 2874 2875 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2876 { 2877 Mat_MPIAIJ *b; 2878 2879 PetscFunctionBegin; 2880 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2881 PetscCall(PetscLayoutSetUp(B->rmap)); 2882 PetscCall(PetscLayoutSetUp(B->cmap)); 2883 b = (Mat_MPIAIJ*)B->data; 2884 2885 #if defined(PETSC_USE_CTABLE) 2886 PetscCall(PetscTableDestroy(&b->colmap)); 2887 #else 2888 PetscCall(PetscFree(b->colmap)); 2889 #endif 2890 PetscCall(PetscFree(b->garray)); 2891 PetscCall(VecDestroy(&b->lvec)); 2892 PetscCall(VecScatterDestroy(&b->Mvctx)); 2893 2894 PetscCall(MatResetPreallocation(b->A)); 2895 PetscCall(MatResetPreallocation(b->B)); 2896 B->preallocated = PETSC_TRUE; 2897 B->was_assembled = PETSC_FALSE; 2898 B->assembled = PETSC_FALSE; 2899 PetscFunctionReturn(0); 2900 } 2901 2902 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2903 { 2904 Mat mat; 2905 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2906 2907 PetscFunctionBegin; 2908 *newmat = NULL; 2909 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2910 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2911 PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 2912 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2913 a = (Mat_MPIAIJ*)mat->data; 2914 2915 mat->factortype = matin->factortype; 2916 mat->assembled = matin->assembled; 2917 mat->insertmode = NOT_SET_VALUES; 2918 mat->preallocated = matin->preallocated; 2919 2920 a->size = oldmat->size; 2921 a->rank = oldmat->rank; 2922 a->donotstash = oldmat->donotstash; 2923 a->roworiented = oldmat->roworiented; 2924 a->rowindices = NULL; 2925 a->rowvalues = NULL; 2926 a->getrowactive = PETSC_FALSE; 2927 2928 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2929 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2930 2931 if (oldmat->colmap) { 2932 #if defined(PETSC_USE_CTABLE) 2933 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2934 #else 2935 PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 2936 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 2937 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2938 #endif 2939 } else a->colmap = NULL; 2940 if (oldmat->garray) { 2941 PetscInt len; 2942 len = oldmat->B->cmap->n; 2943 PetscCall(PetscMalloc1(len+1,&a->garray)); 2944 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2945 if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2946 } else a->garray = NULL; 2947 2948 /* It may happen MatDuplicate is called with a non-assembled matrix 2949 In fact, MatDuplicate only requires the matrix to be preallocated 2950 This may happen inside a DMCreateMatrix_Shell */ 2951 if (oldmat->lvec) { 2952 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2953 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2954 } 2955 if (oldmat->Mvctx) { 2956 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2957 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2958 } 2959 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2960 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2961 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2962 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2963 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2964 *newmat = mat; 2965 PetscFunctionReturn(0); 2966 } 2967 2968 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2969 { 2970 PetscBool isbinary, ishdf5; 2971 2972 PetscFunctionBegin; 2973 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2974 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2975 /* force binary viewer to load .info file if it has not yet done so */ 2976 PetscCall(PetscViewerSetUp(viewer)); 2977 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2978 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2979 if (isbinary) { 2980 PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 2981 } else if (ishdf5) { 2982 #if defined(PETSC_HAVE_HDF5) 2983 PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 2984 #else 2985 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2986 #endif 2987 } else { 2988 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2989 } 2990 PetscFunctionReturn(0); 2991 } 2992 2993 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2994 { 2995 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2996 PetscInt *rowidxs,*colidxs; 2997 PetscScalar *matvals; 2998 2999 PetscFunctionBegin; 3000 PetscCall(PetscViewerSetUp(viewer)); 3001 3002 /* read in matrix header */ 3003 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3004 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3005 M = header[1]; N = header[2]; nz = header[3]; 3006 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3007 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3008 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3009 3010 /* set block sizes from the viewer's .info file */ 3011 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3012 /* set global sizes if not set already */ 3013 if (mat->rmap->N < 0) mat->rmap->N = M; 3014 if (mat->cmap->N < 0) mat->cmap->N = N; 3015 PetscCall(PetscLayoutSetUp(mat->rmap)); 3016 PetscCall(PetscLayoutSetUp(mat->cmap)); 3017 3018 /* check if the matrix sizes are correct */ 3019 PetscCall(MatGetSize(mat,&rows,&cols)); 3020 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3021 3022 /* read in row lengths and build row indices */ 3023 PetscCall(MatGetLocalSize(mat,&m,NULL)); 3024 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3025 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3026 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3027 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3028 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3029 /* read in column indices and matrix values */ 3030 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3031 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3032 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3033 /* store matrix indices and values */ 3034 PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 3035 PetscCall(PetscFree(rowidxs)); 3036 PetscCall(PetscFree2(colidxs,matvals)); 3037 PetscFunctionReturn(0); 3038 } 3039 3040 /* Not scalable because of ISAllGather() unless getting all columns. */ 3041 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3042 { 3043 IS iscol_local; 3044 PetscBool isstride; 3045 PetscMPIInt lisstride=0,gisstride; 3046 3047 PetscFunctionBegin; 3048 /* check if we are grabbing all columns*/ 3049 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 3050 3051 if (isstride) { 3052 PetscInt start,len,mstart,mlen; 3053 PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 3054 PetscCall(ISGetLocalSize(iscol,&len)); 3055 PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3056 if (mstart == start && mlen-mstart == len) lisstride = 1; 3057 } 3058 3059 PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3060 if (gisstride) { 3061 PetscInt N; 3062 PetscCall(MatGetSize(mat,NULL,&N)); 3063 PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 3064 PetscCall(ISSetIdentity(iscol_local)); 3065 PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3066 } else { 3067 PetscInt cbs; 3068 PetscCall(ISGetBlockSize(iscol,&cbs)); 3069 PetscCall(ISAllGather(iscol,&iscol_local)); 3070 PetscCall(ISSetBlockSize(iscol_local,cbs)); 3071 } 3072 3073 *isseq = iscol_local; 3074 PetscFunctionReturn(0); 3075 } 3076 3077 /* 3078 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3079 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3080 3081 Input Parameters: 3082 mat - matrix 3083 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3084 i.e., mat->rstart <= isrow[i] < mat->rend 3085 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3086 i.e., mat->cstart <= iscol[i] < mat->cend 3087 Output Parameter: 3088 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3089 iscol_o - sequential column index set for retrieving mat->B 3090 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3091 */ 3092 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3093 { 3094 Vec x,cmap; 3095 const PetscInt *is_idx; 3096 PetscScalar *xarray,*cmaparray; 3097 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3098 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3099 Mat B=a->B; 3100 Vec lvec=a->lvec,lcmap; 3101 PetscInt i,cstart,cend,Bn=B->cmap->N; 3102 MPI_Comm comm; 3103 VecScatter Mvctx=a->Mvctx; 3104 3105 PetscFunctionBegin; 3106 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3107 PetscCall(ISGetLocalSize(iscol,&ncols)); 3108 3109 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3110 PetscCall(MatCreateVecs(mat,&x,NULL)); 3111 PetscCall(VecSet(x,-1.0)); 3112 PetscCall(VecDuplicate(x,&cmap)); 3113 PetscCall(VecSet(cmap,-1.0)); 3114 3115 /* Get start indices */ 3116 PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3117 isstart -= ncols; 3118 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3119 3120 PetscCall(ISGetIndices(iscol,&is_idx)); 3121 PetscCall(VecGetArray(x,&xarray)); 3122 PetscCall(VecGetArray(cmap,&cmaparray)); 3123 PetscCall(PetscMalloc1(ncols,&idx)); 3124 for (i=0; i<ncols; i++) { 3125 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3126 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3127 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3128 } 3129 PetscCall(VecRestoreArray(x,&xarray)); 3130 PetscCall(VecRestoreArray(cmap,&cmaparray)); 3131 PetscCall(ISRestoreIndices(iscol,&is_idx)); 3132 3133 /* Get iscol_d */ 3134 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 3135 PetscCall(ISGetBlockSize(iscol,&i)); 3136 PetscCall(ISSetBlockSize(*iscol_d,i)); 3137 3138 /* Get isrow_d */ 3139 PetscCall(ISGetLocalSize(isrow,&m)); 3140 rstart = mat->rmap->rstart; 3141 PetscCall(PetscMalloc1(m,&idx)); 3142 PetscCall(ISGetIndices(isrow,&is_idx)); 3143 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3144 PetscCall(ISRestoreIndices(isrow,&is_idx)); 3145 3146 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 3147 PetscCall(ISGetBlockSize(isrow,&i)); 3148 PetscCall(ISSetBlockSize(*isrow_d,i)); 3149 3150 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3151 PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3152 PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3153 3154 PetscCall(VecDuplicate(lvec,&lcmap)); 3155 3156 PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3157 PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3158 3159 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3160 /* off-process column indices */ 3161 count = 0; 3162 PetscCall(PetscMalloc1(Bn,&idx)); 3163 PetscCall(PetscMalloc1(Bn,&cmap1)); 3164 3165 PetscCall(VecGetArray(lvec,&xarray)); 3166 PetscCall(VecGetArray(lcmap,&cmaparray)); 3167 for (i=0; i<Bn; i++) { 3168 if (PetscRealPart(xarray[i]) > -1.0) { 3169 idx[count] = i; /* local column index in off-diagonal part B */ 3170 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3171 count++; 3172 } 3173 } 3174 PetscCall(VecRestoreArray(lvec,&xarray)); 3175 PetscCall(VecRestoreArray(lcmap,&cmaparray)); 3176 3177 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3178 /* cannot ensure iscol_o has same blocksize as iscol! */ 3179 3180 PetscCall(PetscFree(idx)); 3181 *garray = cmap1; 3182 3183 PetscCall(VecDestroy(&x)); 3184 PetscCall(VecDestroy(&cmap)); 3185 PetscCall(VecDestroy(&lcmap)); 3186 PetscFunctionReturn(0); 3187 } 3188 3189 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3190 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3191 { 3192 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3193 Mat M = NULL; 3194 MPI_Comm comm; 3195 IS iscol_d,isrow_d,iscol_o; 3196 Mat Asub = NULL,Bsub = NULL; 3197 PetscInt n; 3198 3199 PetscFunctionBegin; 3200 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3201 3202 if (call == MAT_REUSE_MATRIX) { 3203 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3204 PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 3205 PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3206 3207 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 3208 PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3209 3210 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 3211 PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3212 3213 /* Update diagonal and off-diagonal portions of submat */ 3214 asub = (Mat_MPIAIJ*)(*submat)->data; 3215 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 3216 PetscCall(ISGetLocalSize(iscol_o,&n)); 3217 if (n) { 3218 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 3219 } 3220 PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 3221 PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 3222 3223 } else { /* call == MAT_INITIAL_MATRIX) */ 3224 const PetscInt *garray; 3225 PetscInt BsubN; 3226 3227 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3228 PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 3229 3230 /* Create local submatrices Asub and Bsub */ 3231 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 3232 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 3233 3234 /* Create submatrix M */ 3235 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 3236 3237 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3238 asub = (Mat_MPIAIJ*)M->data; 3239 3240 PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3241 n = asub->B->cmap->N; 3242 if (BsubN > n) { 3243 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3244 const PetscInt *idx; 3245 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3246 PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 3247 3248 PetscCall(PetscMalloc1(n,&idx_new)); 3249 j = 0; 3250 PetscCall(ISGetIndices(iscol_o,&idx)); 3251 for (i=0; i<n; i++) { 3252 if (j >= BsubN) break; 3253 while (subgarray[i] > garray[j]) j++; 3254 3255 if (subgarray[i] == garray[j]) { 3256 idx_new[i] = idx[j++]; 3257 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3258 } 3259 PetscCall(ISRestoreIndices(iscol_o,&idx)); 3260 3261 PetscCall(ISDestroy(&iscol_o)); 3262 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 3263 3264 } else if (BsubN < n) { 3265 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3266 } 3267 3268 PetscCall(PetscFree(garray)); 3269 *submat = M; 3270 3271 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3272 PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 3273 PetscCall(ISDestroy(&isrow_d)); 3274 3275 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 3276 PetscCall(ISDestroy(&iscol_d)); 3277 3278 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 3279 PetscCall(ISDestroy(&iscol_o)); 3280 } 3281 PetscFunctionReturn(0); 3282 } 3283 3284 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3285 { 3286 IS iscol_local=NULL,isrow_d; 3287 PetscInt csize; 3288 PetscInt n,i,j,start,end; 3289 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3290 MPI_Comm comm; 3291 3292 PetscFunctionBegin; 3293 /* If isrow has same processor distribution as mat, 3294 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3295 if (call == MAT_REUSE_MATRIX) { 3296 PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3297 if (isrow_d) { 3298 sameRowDist = PETSC_TRUE; 3299 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3300 } else { 3301 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3302 if (iscol_local) { 3303 sameRowDist = PETSC_TRUE; 3304 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3305 } 3306 } 3307 } else { 3308 /* Check if isrow has same processor distribution as mat */ 3309 sameDist[0] = PETSC_FALSE; 3310 PetscCall(ISGetLocalSize(isrow,&n)); 3311 if (!n) { 3312 sameDist[0] = PETSC_TRUE; 3313 } else { 3314 PetscCall(ISGetMinMax(isrow,&i,&j)); 3315 PetscCall(MatGetOwnershipRange(mat,&start,&end)); 3316 if (i >= start && j < end) { 3317 sameDist[0] = PETSC_TRUE; 3318 } 3319 } 3320 3321 /* Check if iscol has same processor distribution as mat */ 3322 sameDist[1] = PETSC_FALSE; 3323 PetscCall(ISGetLocalSize(iscol,&n)); 3324 if (!n) { 3325 sameDist[1] = PETSC_TRUE; 3326 } else { 3327 PetscCall(ISGetMinMax(iscol,&i,&j)); 3328 PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 3329 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3330 } 3331 3332 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3333 PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 3334 sameRowDist = tsameDist[0]; 3335 } 3336 3337 if (sameRowDist) { 3338 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3339 /* isrow and iscol have same processor distribution as mat */ 3340 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 3341 PetscFunctionReturn(0); 3342 } else { /* sameRowDist */ 3343 /* isrow has same processor distribution as mat */ 3344 if (call == MAT_INITIAL_MATRIX) { 3345 PetscBool sorted; 3346 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3347 PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 3348 PetscCall(ISGetSize(iscol,&i)); 3349 PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3350 3351 PetscCall(ISSorted(iscol_local,&sorted)); 3352 if (sorted) { 3353 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3354 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 3355 PetscFunctionReturn(0); 3356 } 3357 } else { /* call == MAT_REUSE_MATRIX */ 3358 IS iscol_sub; 3359 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3360 if (iscol_sub) { 3361 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 3362 PetscFunctionReturn(0); 3363 } 3364 } 3365 } 3366 } 3367 3368 /* General case: iscol -> iscol_local which has global size of iscol */ 3369 if (call == MAT_REUSE_MATRIX) { 3370 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 3371 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3372 } else { 3373 if (!iscol_local) { 3374 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3375 } 3376 } 3377 3378 PetscCall(ISGetLocalSize(iscol,&csize)); 3379 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 3380 3381 if (call == MAT_INITIAL_MATRIX) { 3382 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 3383 PetscCall(ISDestroy(&iscol_local)); 3384 } 3385 PetscFunctionReturn(0); 3386 } 3387 3388 /*@C 3389 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3390 and "off-diagonal" part of the matrix in CSR format. 3391 3392 Collective 3393 3394 Input Parameters: 3395 + comm - MPI communicator 3396 . A - "diagonal" portion of matrix 3397 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3398 - garray - global index of B columns 3399 3400 Output Parameter: 3401 . mat - the matrix, with input A as its local diagonal matrix 3402 Level: advanced 3403 3404 Notes: 3405 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3406 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3407 3408 .seealso: `MatCreateMPIAIJWithSplitArrays()` 3409 @*/ 3410 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3411 { 3412 Mat_MPIAIJ *maij; 3413 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3414 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3415 const PetscScalar *oa; 3416 Mat Bnew; 3417 PetscInt m,n,N; 3418 3419 PetscFunctionBegin; 3420 PetscCall(MatCreate(comm,mat)); 3421 PetscCall(MatGetSize(A,&m,&n)); 3422 PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3423 PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3424 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3425 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3426 3427 /* Get global columns of mat */ 3428 PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3429 3430 PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 3431 PetscCall(MatSetType(*mat,MATMPIAIJ)); 3432 PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3433 maij = (Mat_MPIAIJ*)(*mat)->data; 3434 3435 (*mat)->preallocated = PETSC_TRUE; 3436 3437 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3438 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3439 3440 /* Set A as diagonal portion of *mat */ 3441 maij->A = A; 3442 3443 nz = oi[m]; 3444 for (i=0; i<nz; i++) { 3445 col = oj[i]; 3446 oj[i] = garray[col]; 3447 } 3448 3449 /* Set Bnew as off-diagonal portion of *mat */ 3450 PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 3451 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 3452 PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3453 bnew = (Mat_SeqAIJ*)Bnew->data; 3454 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3455 maij->B = Bnew; 3456 3457 PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3458 3459 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3460 b->free_a = PETSC_FALSE; 3461 b->free_ij = PETSC_FALSE; 3462 PetscCall(MatDestroy(&B)); 3463 3464 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3465 bnew->free_a = PETSC_TRUE; 3466 bnew->free_ij = PETSC_TRUE; 3467 3468 /* condense columns of maij->B */ 3469 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3470 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3471 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3472 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 3473 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3474 PetscFunctionReturn(0); 3475 } 3476 3477 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3478 3479 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3480 { 3481 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3482 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3483 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3484 Mat M,Msub,B=a->B; 3485 MatScalar *aa; 3486 Mat_SeqAIJ *aij; 3487 PetscInt *garray = a->garray,*colsub,Ncols; 3488 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3489 IS iscol_sub,iscmap; 3490 const PetscInt *is_idx,*cmap; 3491 PetscBool allcolumns=PETSC_FALSE; 3492 MPI_Comm comm; 3493 3494 PetscFunctionBegin; 3495 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3496 if (call == MAT_REUSE_MATRIX) { 3497 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3498 PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3499 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3500 3501 PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 3502 PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3503 3504 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 3505 PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3506 3507 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3508 3509 } else { /* call == MAT_INITIAL_MATRIX) */ 3510 PetscBool flg; 3511 3512 PetscCall(ISGetLocalSize(iscol,&n)); 3513 PetscCall(ISGetSize(iscol,&Ncols)); 3514 3515 /* (1) iscol -> nonscalable iscol_local */ 3516 /* Check for special case: each processor gets entire matrix columns */ 3517 PetscCall(ISIdentity(iscol_local,&flg)); 3518 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3519 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3520 if (allcolumns) { 3521 iscol_sub = iscol_local; 3522 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3523 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3524 3525 } else { 3526 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3527 PetscInt *idx,*cmap1,k; 3528 PetscCall(PetscMalloc1(Ncols,&idx)); 3529 PetscCall(PetscMalloc1(Ncols,&cmap1)); 3530 PetscCall(ISGetIndices(iscol_local,&is_idx)); 3531 count = 0; 3532 k = 0; 3533 for (i=0; i<Ncols; i++) { 3534 j = is_idx[i]; 3535 if (j >= cstart && j < cend) { 3536 /* diagonal part of mat */ 3537 idx[count] = j; 3538 cmap1[count++] = i; /* column index in submat */ 3539 } else if (Bn) { 3540 /* off-diagonal part of mat */ 3541 if (j == garray[k]) { 3542 idx[count] = j; 3543 cmap1[count++] = i; /* column index in submat */ 3544 } else if (j > garray[k]) { 3545 while (j > garray[k] && k < Bn-1) k++; 3546 if (j == garray[k]) { 3547 idx[count] = j; 3548 cmap1[count++] = i; /* column index in submat */ 3549 } 3550 } 3551 } 3552 } 3553 PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 3554 3555 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 3556 PetscCall(ISGetBlockSize(iscol,&cbs)); 3557 PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3558 3559 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3560 } 3561 3562 /* (3) Create sequential Msub */ 3563 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3564 } 3565 3566 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3567 aij = (Mat_SeqAIJ*)(Msub)->data; 3568 ii = aij->i; 3569 PetscCall(ISGetIndices(iscmap,&cmap)); 3570 3571 /* 3572 m - number of local rows 3573 Ncols - number of columns (same on all processors) 3574 rstart - first row in new global matrix generated 3575 */ 3576 PetscCall(MatGetSize(Msub,&m,NULL)); 3577 3578 if (call == MAT_INITIAL_MATRIX) { 3579 /* (4) Create parallel newmat */ 3580 PetscMPIInt rank,size; 3581 PetscInt csize; 3582 3583 PetscCallMPI(MPI_Comm_size(comm,&size)); 3584 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3585 3586 /* 3587 Determine the number of non-zeros in the diagonal and off-diagonal 3588 portions of the matrix in order to do correct preallocation 3589 */ 3590 3591 /* first get start and end of "diagonal" columns */ 3592 PetscCall(ISGetLocalSize(iscol,&csize)); 3593 if (csize == PETSC_DECIDE) { 3594 PetscCall(ISGetSize(isrow,&mglobal)); 3595 if (mglobal == Ncols) { /* square matrix */ 3596 nlocal = m; 3597 } else { 3598 nlocal = Ncols/size + ((Ncols % size) > rank); 3599 } 3600 } else { 3601 nlocal = csize; 3602 } 3603 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3604 rstart = rend - nlocal; 3605 PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3606 3607 /* next, compute all the lengths */ 3608 jj = aij->j; 3609 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3610 olens = dlens + m; 3611 for (i=0; i<m; i++) { 3612 jend = ii[i+1] - ii[i]; 3613 olen = 0; 3614 dlen = 0; 3615 for (j=0; j<jend; j++) { 3616 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3617 else dlen++; 3618 jj++; 3619 } 3620 olens[i] = olen; 3621 dlens[i] = dlen; 3622 } 3623 3624 PetscCall(ISGetBlockSize(isrow,&bs)); 3625 PetscCall(ISGetBlockSize(iscol,&cbs)); 3626 3627 PetscCall(MatCreate(comm,&M)); 3628 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 3629 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3630 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3631 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3632 PetscCall(PetscFree(dlens)); 3633 3634 } else { /* call == MAT_REUSE_MATRIX */ 3635 M = *newmat; 3636 PetscCall(MatGetLocalSize(M,&i,NULL)); 3637 PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3638 PetscCall(MatZeroEntries(M)); 3639 /* 3640 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3641 rather than the slower MatSetValues(). 3642 */ 3643 M->was_assembled = PETSC_TRUE; 3644 M->assembled = PETSC_FALSE; 3645 } 3646 3647 /* (5) Set values of Msub to *newmat */ 3648 PetscCall(PetscMalloc1(count,&colsub)); 3649 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 3650 3651 jj = aij->j; 3652 PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3653 for (i=0; i<m; i++) { 3654 row = rstart + i; 3655 nz = ii[i+1] - ii[i]; 3656 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3657 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 3658 jj += nz; aa += nz; 3659 } 3660 PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 3661 PetscCall(ISRestoreIndices(iscmap,&cmap)); 3662 3663 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3664 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3665 3666 PetscCall(PetscFree(colsub)); 3667 3668 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3669 if (call == MAT_INITIAL_MATRIX) { 3670 *newmat = M; 3671 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 3672 PetscCall(MatDestroy(&Msub)); 3673 3674 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 3675 PetscCall(ISDestroy(&iscol_sub)); 3676 3677 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 3678 PetscCall(ISDestroy(&iscmap)); 3679 3680 if (iscol_local) { 3681 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 3682 PetscCall(ISDestroy(&iscol_local)); 3683 } 3684 } 3685 PetscFunctionReturn(0); 3686 } 3687 3688 /* 3689 Not great since it makes two copies of the submatrix, first an SeqAIJ 3690 in local and then by concatenating the local matrices the end result. 3691 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3692 3693 Note: This requires a sequential iscol with all indices. 3694 */ 3695 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3696 { 3697 PetscMPIInt rank,size; 3698 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3699 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3700 Mat M,Mreuse; 3701 MatScalar *aa,*vwork; 3702 MPI_Comm comm; 3703 Mat_SeqAIJ *aij; 3704 PetscBool colflag,allcolumns=PETSC_FALSE; 3705 3706 PetscFunctionBegin; 3707 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3708 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3709 PetscCallMPI(MPI_Comm_size(comm,&size)); 3710 3711 /* Check for special case: each processor gets entire matrix columns */ 3712 PetscCall(ISIdentity(iscol,&colflag)); 3713 PetscCall(ISGetLocalSize(iscol,&n)); 3714 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3715 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3716 3717 if (call == MAT_REUSE_MATRIX) { 3718 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 3719 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3720 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3721 } else { 3722 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3723 } 3724 3725 /* 3726 m - number of local rows 3727 n - number of columns (same on all processors) 3728 rstart - first row in new global matrix generated 3729 */ 3730 PetscCall(MatGetSize(Mreuse,&m,&n)); 3731 PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3732 if (call == MAT_INITIAL_MATRIX) { 3733 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3734 ii = aij->i; 3735 jj = aij->j; 3736 3737 /* 3738 Determine the number of non-zeros in the diagonal and off-diagonal 3739 portions of the matrix in order to do correct preallocation 3740 */ 3741 3742 /* first get start and end of "diagonal" columns */ 3743 if (csize == PETSC_DECIDE) { 3744 PetscCall(ISGetSize(isrow,&mglobal)); 3745 if (mglobal == n) { /* square matrix */ 3746 nlocal = m; 3747 } else { 3748 nlocal = n/size + ((n % size) > rank); 3749 } 3750 } else { 3751 nlocal = csize; 3752 } 3753 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3754 rstart = rend - nlocal; 3755 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3756 3757 /* next, compute all the lengths */ 3758 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3759 olens = dlens + m; 3760 for (i=0; i<m; i++) { 3761 jend = ii[i+1] - ii[i]; 3762 olen = 0; 3763 dlen = 0; 3764 for (j=0; j<jend; j++) { 3765 if (*jj < rstart || *jj >= rend) olen++; 3766 else dlen++; 3767 jj++; 3768 } 3769 olens[i] = olen; 3770 dlens[i] = dlen; 3771 } 3772 PetscCall(MatCreate(comm,&M)); 3773 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 3774 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3775 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3776 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3777 PetscCall(PetscFree(dlens)); 3778 } else { 3779 PetscInt ml,nl; 3780 3781 M = *newmat; 3782 PetscCall(MatGetLocalSize(M,&ml,&nl)); 3783 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3784 PetscCall(MatZeroEntries(M)); 3785 /* 3786 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3787 rather than the slower MatSetValues(). 3788 */ 3789 M->was_assembled = PETSC_TRUE; 3790 M->assembled = PETSC_FALSE; 3791 } 3792 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3793 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3794 ii = aij->i; 3795 jj = aij->j; 3796 3797 /* trigger copy to CPU if needed */ 3798 PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3799 for (i=0; i<m; i++) { 3800 row = rstart + i; 3801 nz = ii[i+1] - ii[i]; 3802 cwork = jj; jj += nz; 3803 vwork = aa; aa += nz; 3804 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3805 } 3806 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3807 3808 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3809 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3810 *newmat = M; 3811 3812 /* save submatrix used in processor for next request */ 3813 if (call == MAT_INITIAL_MATRIX) { 3814 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 3815 PetscCall(MatDestroy(&Mreuse)); 3816 } 3817 PetscFunctionReturn(0); 3818 } 3819 3820 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3821 { 3822 PetscInt m,cstart, cend,j,nnz,i,d; 3823 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3824 const PetscInt *JJ; 3825 PetscBool nooffprocentries; 3826 3827 PetscFunctionBegin; 3828 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3829 3830 PetscCall(PetscLayoutSetUp(B->rmap)); 3831 PetscCall(PetscLayoutSetUp(B->cmap)); 3832 m = B->rmap->n; 3833 cstart = B->cmap->rstart; 3834 cend = B->cmap->rend; 3835 rstart = B->rmap->rstart; 3836 3837 PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3838 3839 if (PetscDefined(USE_DEBUG)) { 3840 for (i=0; i<m; i++) { 3841 nnz = Ii[i+1]- Ii[i]; 3842 JJ = J + Ii[i]; 3843 PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3844 PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3845 PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3846 } 3847 } 3848 3849 for (i=0; i<m; i++) { 3850 nnz = Ii[i+1]- Ii[i]; 3851 JJ = J + Ii[i]; 3852 nnz_max = PetscMax(nnz_max,nnz); 3853 d = 0; 3854 for (j=0; j<nnz; j++) { 3855 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3856 } 3857 d_nnz[i] = d; 3858 o_nnz[i] = nnz - d; 3859 } 3860 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 3861 PetscCall(PetscFree2(d_nnz,o_nnz)); 3862 3863 for (i=0; i<m; i++) { 3864 ii = i + rstart; 3865 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3866 } 3867 nooffprocentries = B->nooffprocentries; 3868 B->nooffprocentries = PETSC_TRUE; 3869 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3870 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3871 B->nooffprocentries = nooffprocentries; 3872 3873 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3874 PetscFunctionReturn(0); 3875 } 3876 3877 /*@ 3878 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3879 (the default parallel PETSc format). 3880 3881 Collective 3882 3883 Input Parameters: 3884 + B - the matrix 3885 . i - the indices into j for the start of each local row (starts with zero) 3886 . j - the column indices for each local row (starts with zero) 3887 - v - optional values in the matrix 3888 3889 Level: developer 3890 3891 Notes: 3892 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3893 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3894 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3895 3896 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3897 3898 The format which is used for the sparse matrix input, is equivalent to a 3899 row-major ordering.. i.e for the following matrix, the input data expected is 3900 as shown 3901 3902 $ 1 0 0 3903 $ 2 0 3 P0 3904 $ ------- 3905 $ 4 5 6 P1 3906 $ 3907 $ Process0 [P0]: rows_owned=[0,1] 3908 $ i = {0,1,3} [size = nrow+1 = 2+1] 3909 $ j = {0,0,2} [size = 3] 3910 $ v = {1,2,3} [size = 3] 3911 $ 3912 $ Process1 [P1]: rows_owned=[2] 3913 $ i = {0,3} [size = nrow+1 = 1+1] 3914 $ j = {0,1,2} [size = 3] 3915 $ v = {4,5,6} [size = 3] 3916 3917 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3918 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3919 @*/ 3920 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3921 { 3922 PetscFunctionBegin; 3923 PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3924 PetscFunctionReturn(0); 3925 } 3926 3927 /*@C 3928 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3929 (the default parallel PETSc format). For good matrix assembly performance 3930 the user should preallocate the matrix storage by setting the parameters 3931 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3932 performance can be increased by more than a factor of 50. 3933 3934 Collective 3935 3936 Input Parameters: 3937 + B - the matrix 3938 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3939 (same value is used for all local rows) 3940 . d_nnz - array containing the number of nonzeros in the various rows of the 3941 DIAGONAL portion of the local submatrix (possibly different for each row) 3942 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3943 The size of this array is equal to the number of local rows, i.e 'm'. 3944 For matrices that will be factored, you must leave room for (and set) 3945 the diagonal entry even if it is zero. 3946 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3947 submatrix (same value is used for all local rows). 3948 - o_nnz - array containing the number of nonzeros in the various rows of the 3949 OFF-DIAGONAL portion of the local submatrix (possibly different for 3950 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3951 structure. The size of this array is equal to the number 3952 of local rows, i.e 'm'. 3953 3954 If the *_nnz parameter is given then the *_nz parameter is ignored 3955 3956 The AIJ format (also called the Yale sparse matrix format or 3957 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3958 storage. The stored row and column indices begin with zero. 3959 See Users-Manual: ch_mat for details. 3960 3961 The parallel matrix is partitioned such that the first m0 rows belong to 3962 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3963 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3964 3965 The DIAGONAL portion of the local submatrix of a processor can be defined 3966 as the submatrix which is obtained by extraction the part corresponding to 3967 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3968 first row that belongs to the processor, r2 is the last row belonging to 3969 the this processor, and c1-c2 is range of indices of the local part of a 3970 vector suitable for applying the matrix to. This is an mxn matrix. In the 3971 common case of a square matrix, the row and column ranges are the same and 3972 the DIAGONAL part is also square. The remaining portion of the local 3973 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3974 3975 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3976 3977 You can call MatGetInfo() to get information on how effective the preallocation was; 3978 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3979 You can also run with the option -info and look for messages with the string 3980 malloc in them to see if additional memory allocation was needed. 3981 3982 Example usage: 3983 3984 Consider the following 8x8 matrix with 34 non-zero values, that is 3985 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3986 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3987 as follows: 3988 3989 .vb 3990 1 2 0 | 0 3 0 | 0 4 3991 Proc0 0 5 6 | 7 0 0 | 8 0 3992 9 0 10 | 11 0 0 | 12 0 3993 ------------------------------------- 3994 13 0 14 | 15 16 17 | 0 0 3995 Proc1 0 18 0 | 19 20 21 | 0 0 3996 0 0 0 | 22 23 0 | 24 0 3997 ------------------------------------- 3998 Proc2 25 26 27 | 0 0 28 | 29 0 3999 30 0 0 | 31 32 33 | 0 34 4000 .ve 4001 4002 This can be represented as a collection of submatrices as: 4003 4004 .vb 4005 A B C 4006 D E F 4007 G H I 4008 .ve 4009 4010 Where the submatrices A,B,C are owned by proc0, D,E,F are 4011 owned by proc1, G,H,I are owned by proc2. 4012 4013 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4014 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4015 The 'M','N' parameters are 8,8, and have the same values on all procs. 4016 4017 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4018 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4019 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4020 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4021 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4022 matrix, ans [DF] as another SeqAIJ matrix. 4023 4024 When d_nz, o_nz parameters are specified, d_nz storage elements are 4025 allocated for every row of the local diagonal submatrix, and o_nz 4026 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4027 One way to choose d_nz and o_nz is to use the max nonzerors per local 4028 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4029 In this case, the values of d_nz,o_nz are: 4030 .vb 4031 proc0 : dnz = 2, o_nz = 2 4032 proc1 : dnz = 3, o_nz = 2 4033 proc2 : dnz = 1, o_nz = 4 4034 .ve 4035 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4036 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4037 for proc3. i.e we are using 12+15+10=37 storage locations to store 4038 34 values. 4039 4040 When d_nnz, o_nnz parameters are specified, the storage is specified 4041 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4042 In the above case the values for d_nnz,o_nnz are: 4043 .vb 4044 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4045 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4046 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4047 .ve 4048 Here the space allocated is sum of all the above values i.e 34, and 4049 hence pre-allocation is perfect. 4050 4051 Level: intermediate 4052 4053 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4054 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4055 @*/ 4056 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4057 { 4058 PetscFunctionBegin; 4059 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4060 PetscValidType(B,1); 4061 PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4062 PetscFunctionReturn(0); 4063 } 4064 4065 /*@ 4066 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4067 CSR format for the local rows. 4068 4069 Collective 4070 4071 Input Parameters: 4072 + comm - MPI communicator 4073 . m - number of local rows (Cannot be PETSC_DECIDE) 4074 . n - This value should be the same as the local size used in creating the 4075 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4076 calculated if N is given) For square matrices n is almost always m. 4077 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4078 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4079 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4080 . j - column indices 4081 - a - matrix values 4082 4083 Output Parameter: 4084 . mat - the matrix 4085 4086 Level: intermediate 4087 4088 Notes: 4089 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4090 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4091 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4092 4093 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4094 4095 The format which is used for the sparse matrix input, is equivalent to a 4096 row-major ordering.. i.e for the following matrix, the input data expected is 4097 as shown 4098 4099 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4100 4101 $ 1 0 0 4102 $ 2 0 3 P0 4103 $ ------- 4104 $ 4 5 6 P1 4105 $ 4106 $ Process0 [P0]: rows_owned=[0,1] 4107 $ i = {0,1,3} [size = nrow+1 = 2+1] 4108 $ j = {0,0,2} [size = 3] 4109 $ v = {1,2,3} [size = 3] 4110 $ 4111 $ Process1 [P1]: rows_owned=[2] 4112 $ i = {0,3} [size = nrow+1 = 1+1] 4113 $ j = {0,1,2} [size = 3] 4114 $ v = {4,5,6} [size = 3] 4115 4116 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4117 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4118 @*/ 4119 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4120 { 4121 PetscFunctionBegin; 4122 PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4123 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4124 PetscCall(MatCreate(comm,mat)); 4125 PetscCall(MatSetSizes(*mat,m,n,M,N)); 4126 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4127 PetscCall(MatSetType(*mat,MATMPIAIJ)); 4128 PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 4129 PetscFunctionReturn(0); 4130 } 4131 4132 /*@ 4133 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4134 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4135 4136 Collective 4137 4138 Input Parameters: 4139 + mat - the matrix 4140 . m - number of local rows (Cannot be PETSC_DECIDE) 4141 . n - This value should be the same as the local size used in creating the 4142 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4143 calculated if N is given) For square matrices n is almost always m. 4144 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4145 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4146 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4147 . J - column indices 4148 - v - matrix values 4149 4150 Level: intermediate 4151 4152 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4153 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4154 @*/ 4155 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4156 { 4157 PetscInt cstart,nnz,i,j; 4158 PetscInt *ld; 4159 PetscBool nooffprocentries; 4160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4161 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4162 PetscScalar *ad,*ao; 4163 const PetscInt *Adi = Ad->i; 4164 PetscInt ldi,Iii,md; 4165 4166 PetscFunctionBegin; 4167 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4168 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4169 PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4170 PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4171 4172 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4173 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4174 cstart = mat->cmap->rstart; 4175 if (!Aij->ld) { 4176 /* count number of entries below block diagonal */ 4177 PetscCall(PetscCalloc1(m,&ld)); 4178 Aij->ld = ld; 4179 for (i=0; i<m; i++) { 4180 nnz = Ii[i+1]- Ii[i]; 4181 j = 0; 4182 while (J[j] < cstart && j < nnz) {j++;} 4183 J += nnz; 4184 ld[i] = j; 4185 } 4186 } else { 4187 ld = Aij->ld; 4188 } 4189 4190 for (i=0; i<m; i++) { 4191 nnz = Ii[i+1]- Ii[i]; 4192 Iii = Ii[i]; 4193 ldi = ld[i]; 4194 md = Adi[i+1]-Adi[i]; 4195 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4196 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4197 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4198 ad += md; 4199 ao += nnz - md; 4200 } 4201 nooffprocentries = mat->nooffprocentries; 4202 mat->nooffprocentries = PETSC_TRUE; 4203 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4204 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4205 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4206 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4207 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4208 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4209 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4210 mat->nooffprocentries = nooffprocentries; 4211 PetscFunctionReturn(0); 4212 } 4213 4214 /*@C 4215 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4216 (the default parallel PETSc format). For good matrix assembly performance 4217 the user should preallocate the matrix storage by setting the parameters 4218 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4219 performance can be increased by more than a factor of 50. 4220 4221 Collective 4222 4223 Input Parameters: 4224 + comm - MPI communicator 4225 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4226 This value should be the same as the local size used in creating the 4227 y vector for the matrix-vector product y = Ax. 4228 . n - This value should be the same as the local size used in creating the 4229 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4230 calculated if N is given) For square matrices n is almost always m. 4231 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4232 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4233 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4234 (same value is used for all local rows) 4235 . d_nnz - array containing the number of nonzeros in the various rows of the 4236 DIAGONAL portion of the local submatrix (possibly different for each row) 4237 or NULL, if d_nz is used to specify the nonzero structure. 4238 The size of this array is equal to the number of local rows, i.e 'm'. 4239 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4240 submatrix (same value is used for all local rows). 4241 - o_nnz - array containing the number of nonzeros in the various rows of the 4242 OFF-DIAGONAL portion of the local submatrix (possibly different for 4243 each row) or NULL, if o_nz is used to specify the nonzero 4244 structure. The size of this array is equal to the number 4245 of local rows, i.e 'm'. 4246 4247 Output Parameter: 4248 . A - the matrix 4249 4250 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4251 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4252 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4253 4254 Notes: 4255 If the *_nnz parameter is given then the *_nz parameter is ignored 4256 4257 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4258 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4259 storage requirements for this matrix. 4260 4261 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4262 processor than it must be used on all processors that share the object for 4263 that argument. 4264 4265 The user MUST specify either the local or global matrix dimensions 4266 (possibly both). 4267 4268 The parallel matrix is partitioned across processors such that the 4269 first m0 rows belong to process 0, the next m1 rows belong to 4270 process 1, the next m2 rows belong to process 2 etc.. where 4271 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4272 values corresponding to [m x N] submatrix. 4273 4274 The columns are logically partitioned with the n0 columns belonging 4275 to 0th partition, the next n1 columns belonging to the next 4276 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4277 4278 The DIAGONAL portion of the local submatrix on any given processor 4279 is the submatrix corresponding to the rows and columns m,n 4280 corresponding to the given processor. i.e diagonal matrix on 4281 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4282 etc. The remaining portion of the local submatrix [m x (N-n)] 4283 constitute the OFF-DIAGONAL portion. The example below better 4284 illustrates this concept. 4285 4286 For a square global matrix we define each processor's diagonal portion 4287 to be its local rows and the corresponding columns (a square submatrix); 4288 each processor's off-diagonal portion encompasses the remainder of the 4289 local matrix (a rectangular submatrix). 4290 4291 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4292 4293 When calling this routine with a single process communicator, a matrix of 4294 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4295 type of communicator, use the construction mechanism 4296 .vb 4297 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4298 .ve 4299 4300 $ MatCreate(...,&A); 4301 $ MatSetType(A,MATMPIAIJ); 4302 $ MatSetSizes(A, m,n,M,N); 4303 $ MatMPIAIJSetPreallocation(A,...); 4304 4305 By default, this format uses inodes (identical nodes) when possible. 4306 We search for consecutive rows with the same nonzero structure, thereby 4307 reusing matrix information to achieve increased efficiency. 4308 4309 Options Database Keys: 4310 + -mat_no_inode - Do not use inodes 4311 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4312 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4313 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4314 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4315 4316 Example usage: 4317 4318 Consider the following 8x8 matrix with 34 non-zero values, that is 4319 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4320 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4321 as follows 4322 4323 .vb 4324 1 2 0 | 0 3 0 | 0 4 4325 Proc0 0 5 6 | 7 0 0 | 8 0 4326 9 0 10 | 11 0 0 | 12 0 4327 ------------------------------------- 4328 13 0 14 | 15 16 17 | 0 0 4329 Proc1 0 18 0 | 19 20 21 | 0 0 4330 0 0 0 | 22 23 0 | 24 0 4331 ------------------------------------- 4332 Proc2 25 26 27 | 0 0 28 | 29 0 4333 30 0 0 | 31 32 33 | 0 34 4334 .ve 4335 4336 This can be represented as a collection of submatrices as 4337 4338 .vb 4339 A B C 4340 D E F 4341 G H I 4342 .ve 4343 4344 Where the submatrices A,B,C are owned by proc0, D,E,F are 4345 owned by proc1, G,H,I are owned by proc2. 4346 4347 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4348 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4349 The 'M','N' parameters are 8,8, and have the same values on all procs. 4350 4351 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4352 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4353 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4354 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4355 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4356 matrix, ans [DF] as another SeqAIJ matrix. 4357 4358 When d_nz, o_nz parameters are specified, d_nz storage elements are 4359 allocated for every row of the local diagonal submatrix, and o_nz 4360 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4361 One way to choose d_nz and o_nz is to use the max nonzerors per local 4362 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4363 In this case, the values of d_nz,o_nz are 4364 .vb 4365 proc0 : dnz = 2, o_nz = 2 4366 proc1 : dnz = 3, o_nz = 2 4367 proc2 : dnz = 1, o_nz = 4 4368 .ve 4369 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4370 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4371 for proc3. i.e we are using 12+15+10=37 storage locations to store 4372 34 values. 4373 4374 When d_nnz, o_nnz parameters are specified, the storage is specified 4375 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4376 In the above case the values for d_nnz,o_nnz are 4377 .vb 4378 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4379 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4380 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4381 .ve 4382 Here the space allocated is sum of all the above values i.e 34, and 4383 hence pre-allocation is perfect. 4384 4385 Level: intermediate 4386 4387 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4388 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4389 @*/ 4390 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4391 { 4392 PetscMPIInt size; 4393 4394 PetscFunctionBegin; 4395 PetscCall(MatCreate(comm,A)); 4396 PetscCall(MatSetSizes(*A,m,n,M,N)); 4397 PetscCallMPI(MPI_Comm_size(comm,&size)); 4398 if (size > 1) { 4399 PetscCall(MatSetType(*A,MATMPIAIJ)); 4400 PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4401 } else { 4402 PetscCall(MatSetType(*A,MATSEQAIJ)); 4403 PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4404 } 4405 PetscFunctionReturn(0); 4406 } 4407 4408 /*@C 4409 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4410 4411 Not collective 4412 4413 Input Parameter: 4414 . A - The MPIAIJ matrix 4415 4416 Output Parameters: 4417 + Ad - The local diagonal block as a SeqAIJ matrix 4418 . Ao - The local off-diagonal block as a SeqAIJ matrix 4419 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4420 4421 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4422 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4423 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4424 local column numbers to global column numbers in the original matrix. 4425 4426 Level: intermediate 4427 4428 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4429 @*/ 4430 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4431 { 4432 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4433 PetscBool flg; 4434 4435 PetscFunctionBegin; 4436 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 4437 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4438 if (Ad) *Ad = a->A; 4439 if (Ao) *Ao = a->B; 4440 if (colmap) *colmap = a->garray; 4441 PetscFunctionReturn(0); 4442 } 4443 4444 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4445 { 4446 PetscInt m,N,i,rstart,nnz,Ii; 4447 PetscInt *indx; 4448 PetscScalar *values; 4449 MatType rootType; 4450 4451 PetscFunctionBegin; 4452 PetscCall(MatGetSize(inmat,&m,&N)); 4453 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4454 PetscInt *dnz,*onz,sum,bs,cbs; 4455 4456 if (n == PETSC_DECIDE) { 4457 PetscCall(PetscSplitOwnership(comm,&n,&N)); 4458 } 4459 /* Check sum(n) = N */ 4460 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 4461 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4462 4463 PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 4464 rstart -= m; 4465 4466 MatPreallocateBegin(comm,m,n,dnz,onz); 4467 for (i=0; i<m; i++) { 4468 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4469 PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 4470 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4471 } 4472 4473 PetscCall(MatCreate(comm,outmat)); 4474 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4475 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 4476 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 4477 PetscCall(MatGetRootType_Private(inmat,&rootType)); 4478 PetscCall(MatSetType(*outmat,rootType)); 4479 PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 4480 PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4481 MatPreallocateEnd(dnz,onz); 4482 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 4483 } 4484 4485 /* numeric phase */ 4486 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 4487 for (i=0; i<m; i++) { 4488 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4489 Ii = i + rstart; 4490 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 4491 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4492 } 4493 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 4494 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4495 PetscFunctionReturn(0); 4496 } 4497 4498 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4499 { 4500 PetscMPIInt rank; 4501 PetscInt m,N,i,rstart,nnz; 4502 size_t len; 4503 const PetscInt *indx; 4504 PetscViewer out; 4505 char *name; 4506 Mat B; 4507 const PetscScalar *values; 4508 4509 PetscFunctionBegin; 4510 PetscCall(MatGetLocalSize(A,&m,NULL)); 4511 PetscCall(MatGetSize(A,NULL,&N)); 4512 /* Should this be the type of the diagonal block of A? */ 4513 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 4514 PetscCall(MatSetSizes(B,m,N,m,N)); 4515 PetscCall(MatSetBlockSizesFromMats(B,A,A)); 4516 PetscCall(MatSetType(B,MATSEQAIJ)); 4517 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 4518 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4519 for (i=0; i<m; i++) { 4520 PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 4521 PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 4522 PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4523 } 4524 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 4525 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4526 4527 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 4528 PetscCall(PetscStrlen(outfile,&len)); 4529 PetscCall(PetscMalloc1(len+6,&name)); 4530 PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 4531 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 4532 PetscCall(PetscFree(name)); 4533 PetscCall(MatView(B,out)); 4534 PetscCall(PetscViewerDestroy(&out)); 4535 PetscCall(MatDestroy(&B)); 4536 PetscFunctionReturn(0); 4537 } 4538 4539 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4540 { 4541 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4542 4543 PetscFunctionBegin; 4544 if (!merge) PetscFunctionReturn(0); 4545 PetscCall(PetscFree(merge->id_r)); 4546 PetscCall(PetscFree(merge->len_s)); 4547 PetscCall(PetscFree(merge->len_r)); 4548 PetscCall(PetscFree(merge->bi)); 4549 PetscCall(PetscFree(merge->bj)); 4550 PetscCall(PetscFree(merge->buf_ri[0])); 4551 PetscCall(PetscFree(merge->buf_ri)); 4552 PetscCall(PetscFree(merge->buf_rj[0])); 4553 PetscCall(PetscFree(merge->buf_rj)); 4554 PetscCall(PetscFree(merge->coi)); 4555 PetscCall(PetscFree(merge->coj)); 4556 PetscCall(PetscFree(merge->owners_co)); 4557 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4558 PetscCall(PetscFree(merge)); 4559 PetscFunctionReturn(0); 4560 } 4561 4562 #include <../src/mat/utils/freespace.h> 4563 #include <petscbt.h> 4564 4565 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4566 { 4567 MPI_Comm comm; 4568 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4569 PetscMPIInt size,rank,taga,*len_s; 4570 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4571 PetscInt proc,m; 4572 PetscInt **buf_ri,**buf_rj; 4573 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4574 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4575 MPI_Request *s_waits,*r_waits; 4576 MPI_Status *status; 4577 const MatScalar *aa,*a_a; 4578 MatScalar **abuf_r,*ba_i; 4579 Mat_Merge_SeqsToMPI *merge; 4580 PetscContainer container; 4581 4582 PetscFunctionBegin; 4583 PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 4584 PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 4585 4586 PetscCallMPI(MPI_Comm_size(comm,&size)); 4587 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4588 4589 PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 4590 PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4591 PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 4592 PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4593 aa = a_a; 4594 4595 bi = merge->bi; 4596 bj = merge->bj; 4597 buf_ri = merge->buf_ri; 4598 buf_rj = merge->buf_rj; 4599 4600 PetscCall(PetscMalloc1(size,&status)); 4601 owners = merge->rowmap->range; 4602 len_s = merge->len_s; 4603 4604 /* send and recv matrix values */ 4605 /*-----------------------------*/ 4606 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 4607 PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 4608 4609 PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 4610 for (proc=0,k=0; proc<size; proc++) { 4611 if (!len_s[proc]) continue; 4612 i = owners[proc]; 4613 PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 4614 k++; 4615 } 4616 4617 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 4618 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 4619 PetscCall(PetscFree(status)); 4620 4621 PetscCall(PetscFree(s_waits)); 4622 PetscCall(PetscFree(r_waits)); 4623 4624 /* insert mat values of mpimat */ 4625 /*----------------------------*/ 4626 PetscCall(PetscMalloc1(N,&ba_i)); 4627 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4628 4629 for (k=0; k<merge->nrecv; k++) { 4630 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4631 nrows = *(buf_ri_k[k]); 4632 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4633 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4634 } 4635 4636 /* set values of ba */ 4637 m = merge->rowmap->n; 4638 for (i=0; i<m; i++) { 4639 arow = owners[rank] + i; 4640 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4641 bnzi = bi[i+1] - bi[i]; 4642 PetscCall(PetscArrayzero(ba_i,bnzi)); 4643 4644 /* add local non-zero vals of this proc's seqmat into ba */ 4645 anzi = ai[arow+1] - ai[arow]; 4646 aj = a->j + ai[arow]; 4647 aa = a_a + ai[arow]; 4648 nextaj = 0; 4649 for (j=0; nextaj<anzi; j++) { 4650 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4651 ba_i[j] += aa[nextaj++]; 4652 } 4653 } 4654 4655 /* add received vals into ba */ 4656 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4657 /* i-th row */ 4658 if (i == *nextrow[k]) { 4659 anzi = *(nextai[k]+1) - *nextai[k]; 4660 aj = buf_rj[k] + *(nextai[k]); 4661 aa = abuf_r[k] + *(nextai[k]); 4662 nextaj = 0; 4663 for (j=0; nextaj<anzi; j++) { 4664 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4665 ba_i[j] += aa[nextaj++]; 4666 } 4667 } 4668 nextrow[k]++; nextai[k]++; 4669 } 4670 } 4671 PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 4672 } 4673 PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 4674 PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 4675 PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 4676 4677 PetscCall(PetscFree(abuf_r[0])); 4678 PetscCall(PetscFree(abuf_r)); 4679 PetscCall(PetscFree(ba_i)); 4680 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4681 PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 4682 PetscFunctionReturn(0); 4683 } 4684 4685 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4686 { 4687 Mat B_mpi; 4688 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4689 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4690 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4691 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4692 PetscInt len,proc,*dnz,*onz,bs,cbs; 4693 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4694 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4695 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4696 MPI_Status *status; 4697 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4698 PetscBT lnkbt; 4699 Mat_Merge_SeqsToMPI *merge; 4700 PetscContainer container; 4701 4702 PetscFunctionBegin; 4703 PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 4704 4705 /* make sure it is a PETSc comm */ 4706 PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 4707 PetscCallMPI(MPI_Comm_size(comm,&size)); 4708 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4709 4710 PetscCall(PetscNew(&merge)); 4711 PetscCall(PetscMalloc1(size,&status)); 4712 4713 /* determine row ownership */ 4714 /*---------------------------------------------------------*/ 4715 PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 4716 PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 4717 PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 4718 PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 4719 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4720 PetscCall(PetscMalloc1(size,&len_si)); 4721 PetscCall(PetscMalloc1(size,&merge->len_s)); 4722 4723 m = merge->rowmap->n; 4724 owners = merge->rowmap->range; 4725 4726 /* determine the number of messages to send, their lengths */ 4727 /*---------------------------------------------------------*/ 4728 len_s = merge->len_s; 4729 4730 len = 0; /* length of buf_si[] */ 4731 merge->nsend = 0; 4732 for (proc=0; proc<size; proc++) { 4733 len_si[proc] = 0; 4734 if (proc == rank) { 4735 len_s[proc] = 0; 4736 } else { 4737 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4738 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4739 } 4740 if (len_s[proc]) { 4741 merge->nsend++; 4742 nrows = 0; 4743 for (i=owners[proc]; i<owners[proc+1]; i++) { 4744 if (ai[i+1] > ai[i]) nrows++; 4745 } 4746 len_si[proc] = 2*(nrows+1); 4747 len += len_si[proc]; 4748 } 4749 } 4750 4751 /* determine the number and length of messages to receive for ij-structure */ 4752 /*-------------------------------------------------------------------------*/ 4753 PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 4754 PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4755 4756 /* post the Irecv of j-structure */ 4757 /*-------------------------------*/ 4758 PetscCall(PetscCommGetNewTag(comm,&tagj)); 4759 PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 4760 4761 /* post the Isend of j-structure */ 4762 /*--------------------------------*/ 4763 PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 4764 4765 for (proc=0, k=0; proc<size; proc++) { 4766 if (!len_s[proc]) continue; 4767 i = owners[proc]; 4768 PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 4769 k++; 4770 } 4771 4772 /* receives and sends of j-structure are complete */ 4773 /*------------------------------------------------*/ 4774 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 4775 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 4776 4777 /* send and recv i-structure */ 4778 /*---------------------------*/ 4779 PetscCall(PetscCommGetNewTag(comm,&tagi)); 4780 PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 4781 4782 PetscCall(PetscMalloc1(len+1,&buf_s)); 4783 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4784 for (proc=0,k=0; proc<size; proc++) { 4785 if (!len_s[proc]) continue; 4786 /* form outgoing message for i-structure: 4787 buf_si[0]: nrows to be sent 4788 [1:nrows]: row index (global) 4789 [nrows+1:2*nrows+1]: i-structure index 4790 */ 4791 /*-------------------------------------------*/ 4792 nrows = len_si[proc]/2 - 1; 4793 buf_si_i = buf_si + nrows+1; 4794 buf_si[0] = nrows; 4795 buf_si_i[0] = 0; 4796 nrows = 0; 4797 for (i=owners[proc]; i<owners[proc+1]; i++) { 4798 anzi = ai[i+1] - ai[i]; 4799 if (anzi) { 4800 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4801 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4802 nrows++; 4803 } 4804 } 4805 PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 4806 k++; 4807 buf_si += len_si[proc]; 4808 } 4809 4810 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 4811 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 4812 4813 PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 4814 for (i=0; i<merge->nrecv; i++) { 4815 PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 4816 } 4817 4818 PetscCall(PetscFree(len_si)); 4819 PetscCall(PetscFree(len_ri)); 4820 PetscCall(PetscFree(rj_waits)); 4821 PetscCall(PetscFree2(si_waits,sj_waits)); 4822 PetscCall(PetscFree(ri_waits)); 4823 PetscCall(PetscFree(buf_s)); 4824 PetscCall(PetscFree(status)); 4825 4826 /* compute a local seq matrix in each processor */ 4827 /*----------------------------------------------*/ 4828 /* allocate bi array and free space for accumulating nonzero column info */ 4829 PetscCall(PetscMalloc1(m+1,&bi)); 4830 bi[0] = 0; 4831 4832 /* create and initialize a linked list */ 4833 nlnk = N+1; 4834 PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 4835 4836 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4837 len = ai[owners[rank+1]] - ai[owners[rank]]; 4838 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 4839 4840 current_space = free_space; 4841 4842 /* determine symbolic info for each local row */ 4843 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4844 4845 for (k=0; k<merge->nrecv; k++) { 4846 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4847 nrows = *buf_ri_k[k]; 4848 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4849 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4850 } 4851 4852 MatPreallocateBegin(comm,m,n,dnz,onz); 4853 len = 0; 4854 for (i=0; i<m; i++) { 4855 bnzi = 0; 4856 /* add local non-zero cols of this proc's seqmat into lnk */ 4857 arow = owners[rank] + i; 4858 anzi = ai[arow+1] - ai[arow]; 4859 aj = a->j + ai[arow]; 4860 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4861 bnzi += nlnk; 4862 /* add received col data into lnk */ 4863 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4864 if (i == *nextrow[k]) { /* i-th row */ 4865 anzi = *(nextai[k]+1) - *nextai[k]; 4866 aj = buf_rj[k] + *nextai[k]; 4867 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4868 bnzi += nlnk; 4869 nextrow[k]++; nextai[k]++; 4870 } 4871 } 4872 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4873 4874 /* if free space is not available, make more free space */ 4875 if (current_space->local_remaining<bnzi) { 4876 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 4877 nspacedouble++; 4878 } 4879 /* copy data into free space, then initialize lnk */ 4880 PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 4881 PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4882 4883 current_space->array += bnzi; 4884 current_space->local_used += bnzi; 4885 current_space->local_remaining -= bnzi; 4886 4887 bi[i+1] = bi[i] + bnzi; 4888 } 4889 4890 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4891 4892 PetscCall(PetscMalloc1(bi[m]+1,&bj)); 4893 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4894 PetscCall(PetscLLDestroy(lnk,lnkbt)); 4895 4896 /* create symbolic parallel matrix B_mpi */ 4897 /*---------------------------------------*/ 4898 PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 4899 PetscCall(MatCreate(comm,&B_mpi)); 4900 if (n==PETSC_DECIDE) { 4901 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 4902 } else { 4903 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4904 } 4905 PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 4906 PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 4907 PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 4908 MatPreallocateEnd(dnz,onz); 4909 PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 4910 4911 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4912 B_mpi->assembled = PETSC_FALSE; 4913 merge->bi = bi; 4914 merge->bj = bj; 4915 merge->buf_ri = buf_ri; 4916 merge->buf_rj = buf_rj; 4917 merge->coi = NULL; 4918 merge->coj = NULL; 4919 merge->owners_co = NULL; 4920 4921 PetscCall(PetscCommDestroy(&comm)); 4922 4923 /* attach the supporting struct to B_mpi for reuse */ 4924 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 4925 PetscCall(PetscContainerSetPointer(container,merge)); 4926 PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 4927 PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 4928 PetscCall(PetscContainerDestroy(&container)); 4929 *mpimat = B_mpi; 4930 4931 PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 4932 PetscFunctionReturn(0); 4933 } 4934 4935 /*@C 4936 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4937 matrices from each processor 4938 4939 Collective 4940 4941 Input Parameters: 4942 + comm - the communicators the parallel matrix will live on 4943 . seqmat - the input sequential matrices 4944 . m - number of local rows (or PETSC_DECIDE) 4945 . n - number of local columns (or PETSC_DECIDE) 4946 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4947 4948 Output Parameter: 4949 . mpimat - the parallel matrix generated 4950 4951 Level: advanced 4952 4953 Notes: 4954 The dimensions of the sequential matrix in each processor MUST be the same. 4955 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4956 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4957 @*/ 4958 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4959 { 4960 PetscMPIInt size; 4961 4962 PetscFunctionBegin; 4963 PetscCallMPI(MPI_Comm_size(comm,&size)); 4964 if (size == 1) { 4965 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4966 if (scall == MAT_INITIAL_MATRIX) { 4967 PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 4968 } else { 4969 PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 4970 } 4971 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4972 PetscFunctionReturn(0); 4973 } 4974 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4975 if (scall == MAT_INITIAL_MATRIX) { 4976 PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 4977 } 4978 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 4979 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4980 PetscFunctionReturn(0); 4981 } 4982 4983 /*@ 4984 MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4985 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4986 with MatGetSize() 4987 4988 Not Collective 4989 4990 Input Parameters: 4991 + A - the matrix 4992 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4993 4994 Output Parameter: 4995 . A_loc - the local sequential matrix generated 4996 4997 Level: developer 4998 4999 Notes: 5000 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5001 5002 Destroy the matrix with MatDestroy() 5003 5004 .seealso: MatMPIAIJGetLocalMat() 5005 5006 @*/ 5007 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc) 5008 { 5009 PetscBool mpi; 5010 5011 PetscFunctionBegin; 5012 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi)); 5013 if (mpi) { 5014 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc)); 5015 } else { 5016 *A_loc = A; 5017 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5018 } 5019 PetscFunctionReturn(0); 5020 } 5021 5022 /*@ 5023 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5024 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5025 with MatGetSize() 5026 5027 Not Collective 5028 5029 Input Parameters: 5030 + A - the matrix 5031 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5032 5033 Output Parameter: 5034 . A_loc - the local sequential matrix generated 5035 5036 Level: developer 5037 5038 Notes: 5039 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5040 5041 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5042 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5043 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5044 modify the values of the returned A_loc. 5045 5046 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5047 @*/ 5048 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5049 { 5050 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5051 Mat_SeqAIJ *mat,*a,*b; 5052 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5053 const PetscScalar *aa,*ba,*aav,*bav; 5054 PetscScalar *ca,*cam; 5055 PetscMPIInt size; 5056 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5057 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5058 PetscBool match; 5059 5060 PetscFunctionBegin; 5061 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 5062 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5063 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5064 if (size == 1) { 5065 if (scall == MAT_INITIAL_MATRIX) { 5066 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5067 *A_loc = mpimat->A; 5068 } else if (scall == MAT_REUSE_MATRIX) { 5069 PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 5070 } 5071 PetscFunctionReturn(0); 5072 } 5073 5074 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5075 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5076 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5077 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5078 PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 5079 PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5080 aa = aav; 5081 ba = bav; 5082 if (scall == MAT_INITIAL_MATRIX) { 5083 PetscCall(PetscMalloc1(1+am,&ci)); 5084 ci[0] = 0; 5085 for (i=0; i<am; i++) { 5086 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5087 } 5088 PetscCall(PetscMalloc1(1+ci[am],&cj)); 5089 PetscCall(PetscMalloc1(1+ci[am],&ca)); 5090 k = 0; 5091 for (i=0; i<am; i++) { 5092 ncols_o = bi[i+1] - bi[i]; 5093 ncols_d = ai[i+1] - ai[i]; 5094 /* off-diagonal portion of A */ 5095 for (jo=0; jo<ncols_o; jo++) { 5096 col = cmap[*bj]; 5097 if (col >= cstart) break; 5098 cj[k] = col; bj++; 5099 ca[k++] = *ba++; 5100 } 5101 /* diagonal portion of A */ 5102 for (j=0; j<ncols_d; j++) { 5103 cj[k] = cstart + *aj++; 5104 ca[k++] = *aa++; 5105 } 5106 /* off-diagonal portion of A */ 5107 for (j=jo; j<ncols_o; j++) { 5108 cj[k] = cmap[*bj++]; 5109 ca[k++] = *ba++; 5110 } 5111 } 5112 /* put together the new matrix */ 5113 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5114 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5115 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5116 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5117 mat->free_a = PETSC_TRUE; 5118 mat->free_ij = PETSC_TRUE; 5119 mat->nonew = 0; 5120 } else if (scall == MAT_REUSE_MATRIX) { 5121 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5122 ci = mat->i; 5123 cj = mat->j; 5124 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 5125 for (i=0; i<am; i++) { 5126 /* off-diagonal portion of A */ 5127 ncols_o = bi[i+1] - bi[i]; 5128 for (jo=0; jo<ncols_o; jo++) { 5129 col = cmap[*bj]; 5130 if (col >= cstart) break; 5131 *cam++ = *ba++; bj++; 5132 } 5133 /* diagonal portion of A */ 5134 ncols_d = ai[i+1] - ai[i]; 5135 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5136 /* off-diagonal portion of A */ 5137 for (j=jo; j<ncols_o; j++) { 5138 *cam++ = *ba++; bj++; 5139 } 5140 } 5141 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 5142 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5143 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 5144 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 5145 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5146 PetscFunctionReturn(0); 5147 } 5148 5149 /*@ 5150 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5151 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5152 5153 Not Collective 5154 5155 Input Parameters: 5156 + A - the matrix 5157 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5158 5159 Output Parameters: 5160 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5161 - A_loc - the local sequential matrix generated 5162 5163 Level: developer 5164 5165 Notes: 5166 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5167 5168 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5169 5170 @*/ 5171 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5172 { 5173 Mat Ao,Ad; 5174 const PetscInt *cmap; 5175 PetscMPIInt size; 5176 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5177 5178 PetscFunctionBegin; 5179 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 5180 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5181 if (size == 1) { 5182 if (scall == MAT_INITIAL_MATRIX) { 5183 PetscCall(PetscObjectReference((PetscObject)Ad)); 5184 *A_loc = Ad; 5185 } else if (scall == MAT_REUSE_MATRIX) { 5186 PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5187 } 5188 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5189 PetscFunctionReturn(0); 5190 } 5191 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 5192 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5193 if (f) { 5194 PetscCall((*f)(A,scall,glob,A_loc)); 5195 } else { 5196 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5197 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5198 Mat_SeqAIJ *c; 5199 PetscInt *ai = a->i, *aj = a->j; 5200 PetscInt *bi = b->i, *bj = b->j; 5201 PetscInt *ci,*cj; 5202 const PetscScalar *aa,*ba; 5203 PetscScalar *ca; 5204 PetscInt i,j,am,dn,on; 5205 5206 PetscCall(MatGetLocalSize(Ad,&am,&dn)); 5207 PetscCall(MatGetLocalSize(Ao,NULL,&on)); 5208 PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 5209 PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5210 if (scall == MAT_INITIAL_MATRIX) { 5211 PetscInt k; 5212 PetscCall(PetscMalloc1(1+am,&ci)); 5213 PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 5214 PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5215 ci[0] = 0; 5216 for (i=0,k=0; i<am; i++) { 5217 const PetscInt ncols_o = bi[i+1] - bi[i]; 5218 const PetscInt ncols_d = ai[i+1] - ai[i]; 5219 ci[i+1] = ci[i] + ncols_o + ncols_d; 5220 /* diagonal portion of A */ 5221 for (j=0; j<ncols_d; j++,k++) { 5222 cj[k] = *aj++; 5223 ca[k] = *aa++; 5224 } 5225 /* off-diagonal portion of A */ 5226 for (j=0; j<ncols_o; j++,k++) { 5227 cj[k] = dn + *bj++; 5228 ca[k] = *ba++; 5229 } 5230 } 5231 /* put together the new matrix */ 5232 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5233 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5234 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5235 c = (Mat_SeqAIJ*)(*A_loc)->data; 5236 c->free_a = PETSC_TRUE; 5237 c->free_ij = PETSC_TRUE; 5238 c->nonew = 0; 5239 PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5240 } else if (scall == MAT_REUSE_MATRIX) { 5241 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5242 for (i=0; i<am; i++) { 5243 const PetscInt ncols_d = ai[i+1] - ai[i]; 5244 const PetscInt ncols_o = bi[i+1] - bi[i]; 5245 /* diagonal portion of A */ 5246 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5247 /* off-diagonal portion of A */ 5248 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5249 } 5250 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 5251 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5252 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 5253 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5254 if (glob) { 5255 PetscInt cst, *gidx; 5256 5257 PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 5258 PetscCall(PetscMalloc1(dn+on,&gidx)); 5259 for (i=0; i<dn; i++) gidx[i] = cst + i; 5260 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5261 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5262 } 5263 } 5264 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5265 PetscFunctionReturn(0); 5266 } 5267 5268 /*@C 5269 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5270 5271 Not Collective 5272 5273 Input Parameters: 5274 + A - the matrix 5275 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5276 - row, col - index sets of rows and columns to extract (or NULL) 5277 5278 Output Parameter: 5279 . A_loc - the local sequential matrix generated 5280 5281 Level: developer 5282 5283 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5284 5285 @*/ 5286 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5287 { 5288 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5289 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5290 IS isrowa,iscola; 5291 Mat *aloc; 5292 PetscBool match; 5293 5294 PetscFunctionBegin; 5295 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 5296 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5297 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 5298 if (!row) { 5299 start = A->rmap->rstart; end = A->rmap->rend; 5300 PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 5301 } else { 5302 isrowa = *row; 5303 } 5304 if (!col) { 5305 start = A->cmap->rstart; 5306 cmap = a->garray; 5307 nzA = a->A->cmap->n; 5308 nzB = a->B->cmap->n; 5309 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5310 ncols = 0; 5311 for (i=0; i<nzB; i++) { 5312 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5313 else break; 5314 } 5315 imark = i; 5316 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5317 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5318 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 5319 } else { 5320 iscola = *col; 5321 } 5322 if (scall != MAT_INITIAL_MATRIX) { 5323 PetscCall(PetscMalloc1(1,&aloc)); 5324 aloc[0] = *A_loc; 5325 } 5326 PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5327 if (!col) { /* attach global id of condensed columns */ 5328 PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5329 } 5330 *A_loc = aloc[0]; 5331 PetscCall(PetscFree(aloc)); 5332 if (!row) { 5333 PetscCall(ISDestroy(&isrowa)); 5334 } 5335 if (!col) { 5336 PetscCall(ISDestroy(&iscola)); 5337 } 5338 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 5339 PetscFunctionReturn(0); 5340 } 5341 5342 /* 5343 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5344 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5345 * on a global size. 5346 * */ 5347 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5348 { 5349 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5350 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5351 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5352 PetscMPIInt owner; 5353 PetscSFNode *iremote,*oiremote; 5354 const PetscInt *lrowindices; 5355 PetscSF sf,osf; 5356 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5357 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5358 MPI_Comm comm; 5359 ISLocalToGlobalMapping mapping; 5360 const PetscScalar *pd_a,*po_a; 5361 5362 PetscFunctionBegin; 5363 PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 5364 /* plocalsize is the number of roots 5365 * nrows is the number of leaves 5366 * */ 5367 PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 5368 PetscCall(ISGetLocalSize(rows,&nrows)); 5369 PetscCall(PetscCalloc1(nrows,&iremote)); 5370 PetscCall(ISGetIndices(rows,&lrowindices)); 5371 for (i=0;i<nrows;i++) { 5372 /* Find a remote index and an owner for a row 5373 * The row could be local or remote 5374 * */ 5375 owner = 0; 5376 lidx = 0; 5377 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 5378 iremote[i].index = lidx; 5379 iremote[i].rank = owner; 5380 } 5381 /* Create SF to communicate how many nonzero columns for each row */ 5382 PetscCall(PetscSFCreate(comm,&sf)); 5383 /* SF will figure out the number of nonzero colunms for each row, and their 5384 * offsets 5385 * */ 5386 PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5387 PetscCall(PetscSFSetFromOptions(sf)); 5388 PetscCall(PetscSFSetUp(sf)); 5389 5390 PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 5391 PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 5392 PetscCall(PetscCalloc1(nrows,&pnnz)); 5393 roffsets[0] = 0; 5394 roffsets[1] = 0; 5395 for (i=0;i<plocalsize;i++) { 5396 /* diag */ 5397 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5398 /* off diag */ 5399 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5400 /* compute offsets so that we relative location for each row */ 5401 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5402 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5403 } 5404 PetscCall(PetscCalloc1(2*nrows,&nlcols)); 5405 PetscCall(PetscCalloc1(2*nrows,&loffsets)); 5406 /* 'r' means root, and 'l' means leaf */ 5407 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5408 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5409 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5410 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5411 PetscCall(PetscSFDestroy(&sf)); 5412 PetscCall(PetscFree(roffsets)); 5413 PetscCall(PetscFree(nrcols)); 5414 dntotalcols = 0; 5415 ontotalcols = 0; 5416 ncol = 0; 5417 for (i=0;i<nrows;i++) { 5418 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5419 ncol = PetscMax(pnnz[i],ncol); 5420 /* diag */ 5421 dntotalcols += nlcols[i*2+0]; 5422 /* off diag */ 5423 ontotalcols += nlcols[i*2+1]; 5424 } 5425 /* We do not need to figure the right number of columns 5426 * since all the calculations will be done by going through the raw data 5427 * */ 5428 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 5429 PetscCall(MatSetUp(*P_oth)); 5430 PetscCall(PetscFree(pnnz)); 5431 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5432 /* diag */ 5433 PetscCall(PetscCalloc1(dntotalcols,&iremote)); 5434 /* off diag */ 5435 PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 5436 /* diag */ 5437 PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 5438 /* off diag */ 5439 PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 5440 dntotalcols = 0; 5441 ontotalcols = 0; 5442 ntotalcols = 0; 5443 for (i=0;i<nrows;i++) { 5444 owner = 0; 5445 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 5446 /* Set iremote for diag matrix */ 5447 for (j=0;j<nlcols[i*2+0];j++) { 5448 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5449 iremote[dntotalcols].rank = owner; 5450 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5451 ilocal[dntotalcols++] = ntotalcols++; 5452 } 5453 /* off diag */ 5454 for (j=0;j<nlcols[i*2+1];j++) { 5455 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5456 oiremote[ontotalcols].rank = owner; 5457 oilocal[ontotalcols++] = ntotalcols++; 5458 } 5459 } 5460 PetscCall(ISRestoreIndices(rows,&lrowindices)); 5461 PetscCall(PetscFree(loffsets)); 5462 PetscCall(PetscFree(nlcols)); 5463 PetscCall(PetscSFCreate(comm,&sf)); 5464 /* P serves as roots and P_oth is leaves 5465 * Diag matrix 5466 * */ 5467 PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5468 PetscCall(PetscSFSetFromOptions(sf)); 5469 PetscCall(PetscSFSetUp(sf)); 5470 5471 PetscCall(PetscSFCreate(comm,&osf)); 5472 /* Off diag */ 5473 PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 5474 PetscCall(PetscSFSetFromOptions(osf)); 5475 PetscCall(PetscSFSetUp(osf)); 5476 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5477 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5478 /* We operate on the matrix internal data for saving memory */ 5479 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5480 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5481 PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 5482 /* Convert to global indices for diag matrix */ 5483 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5484 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5485 /* We want P_oth store global indices */ 5486 PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 5487 /* Use memory scalable approach */ 5488 PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 5489 PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 5490 PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5491 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5492 /* Convert back to local indices */ 5493 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5494 PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5495 nout = 0; 5496 PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 5497 PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5498 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5499 /* Exchange values */ 5500 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5501 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5502 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5503 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5504 /* Stop PETSc from shrinking memory */ 5505 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5506 PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 5507 PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 5508 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5509 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 5510 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 5511 PetscCall(PetscSFDestroy(&sf)); 5512 PetscCall(PetscSFDestroy(&osf)); 5513 PetscFunctionReturn(0); 5514 } 5515 5516 /* 5517 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5518 * This supports MPIAIJ and MAIJ 5519 * */ 5520 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5521 { 5522 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5523 Mat_SeqAIJ *p_oth; 5524 IS rows,map; 5525 PetscHMapI hamp; 5526 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5527 MPI_Comm comm; 5528 PetscSF sf,osf; 5529 PetscBool has; 5530 5531 PetscFunctionBegin; 5532 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5533 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 5534 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5535 * and then create a submatrix (that often is an overlapping matrix) 5536 * */ 5537 if (reuse == MAT_INITIAL_MATRIX) { 5538 /* Use a hash table to figure out unique keys */ 5539 PetscCall(PetscHMapICreate(&hamp)); 5540 PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 5541 PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5542 count = 0; 5543 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5544 for (i=0;i<a->B->cmap->n;i++) { 5545 key = a->garray[i]/dof; 5546 PetscCall(PetscHMapIHas(hamp,key,&has)); 5547 if (!has) { 5548 mapping[i] = count; 5549 PetscCall(PetscHMapISet(hamp,key,count++)); 5550 } else { 5551 /* Current 'i' has the same value the previous step */ 5552 mapping[i] = count-1; 5553 } 5554 } 5555 PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 5556 PetscCall(PetscHMapIGetSize(hamp,&htsize)); 5557 PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5558 PetscCall(PetscCalloc1(htsize,&rowindices)); 5559 off = 0; 5560 PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 5561 PetscCall(PetscHMapIDestroy(&hamp)); 5562 PetscCall(PetscSortInt(htsize,rowindices)); 5563 PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 5564 /* In case, the matrix was already created but users want to recreate the matrix */ 5565 PetscCall(MatDestroy(P_oth)); 5566 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 5567 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 5568 PetscCall(ISDestroy(&map)); 5569 PetscCall(ISDestroy(&rows)); 5570 } else if (reuse == MAT_REUSE_MATRIX) { 5571 /* If matrix was already created, we simply update values using SF objects 5572 * that as attached to the matrix ealier. 5573 */ 5574 const PetscScalar *pd_a,*po_a; 5575 5576 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 5577 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 5578 PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5579 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5580 /* Update values in place */ 5581 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5582 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5583 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5584 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5585 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5586 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5587 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5588 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5589 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5590 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 5591 PetscFunctionReturn(0); 5592 } 5593 5594 /*@C 5595 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5596 5597 Collective on Mat 5598 5599 Input Parameters: 5600 + A - the first matrix in mpiaij format 5601 . B - the second matrix in mpiaij format 5602 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5603 5604 Output Parameters: 5605 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5606 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5607 - B_seq - the sequential matrix generated 5608 5609 Level: developer 5610 5611 @*/ 5612 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5613 { 5614 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5615 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5616 IS isrowb,iscolb; 5617 Mat *bseq=NULL; 5618 5619 PetscFunctionBegin; 5620 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5621 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5622 } 5623 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 5624 5625 if (scall == MAT_INITIAL_MATRIX) { 5626 start = A->cmap->rstart; 5627 cmap = a->garray; 5628 nzA = a->A->cmap->n; 5629 nzB = a->B->cmap->n; 5630 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5631 ncols = 0; 5632 for (i=0; i<nzB; i++) { /* row < local row index */ 5633 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5634 else break; 5635 } 5636 imark = i; 5637 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5638 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5639 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 5640 PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 5641 } else { 5642 PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5643 isrowb = *rowb; iscolb = *colb; 5644 PetscCall(PetscMalloc1(1,&bseq)); 5645 bseq[0] = *B_seq; 5646 } 5647 PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 5648 *B_seq = bseq[0]; 5649 PetscCall(PetscFree(bseq)); 5650 if (!rowb) { 5651 PetscCall(ISDestroy(&isrowb)); 5652 } else { 5653 *rowb = isrowb; 5654 } 5655 if (!colb) { 5656 PetscCall(ISDestroy(&iscolb)); 5657 } else { 5658 *colb = iscolb; 5659 } 5660 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 5661 PetscFunctionReturn(0); 5662 } 5663 5664 /* 5665 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5666 of the OFF-DIAGONAL portion of local A 5667 5668 Collective on Mat 5669 5670 Input Parameters: 5671 + A,B - the matrices in mpiaij format 5672 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5673 5674 Output Parameter: 5675 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5676 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5677 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5678 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5679 5680 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5681 for this matrix. This is not desirable.. 5682 5683 Level: developer 5684 5685 */ 5686 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5687 { 5688 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5689 Mat_SeqAIJ *b_oth; 5690 VecScatter ctx; 5691 MPI_Comm comm; 5692 const PetscMPIInt *rprocs,*sprocs; 5693 const PetscInt *srow,*rstarts,*sstarts; 5694 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5695 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5696 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5697 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5698 PetscMPIInt size,tag,rank,nreqs; 5699 5700 PetscFunctionBegin; 5701 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5702 PetscCallMPI(MPI_Comm_size(comm,&size)); 5703 5704 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5705 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5706 } 5707 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 5708 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5709 5710 if (size == 1) { 5711 startsj_s = NULL; 5712 bufa_ptr = NULL; 5713 *B_oth = NULL; 5714 PetscFunctionReturn(0); 5715 } 5716 5717 ctx = a->Mvctx; 5718 tag = ((PetscObject)ctx)->tag; 5719 5720 PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5721 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5722 PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 5723 PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 5724 PetscCall(PetscMalloc1(nreqs,&reqs)); 5725 rwaits = reqs; 5726 swaits = reqs + nrecvs; 5727 5728 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5729 if (scall == MAT_INITIAL_MATRIX) { 5730 /* i-array */ 5731 /*---------*/ 5732 /* post receives */ 5733 if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5734 for (i=0; i<nrecvs; i++) { 5735 rowlen = rvalues + rstarts[i]*rbs; 5736 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5737 PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5738 } 5739 5740 /* pack the outgoing message */ 5741 PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 5742 5743 sstartsj[0] = 0; 5744 rstartsj[0] = 0; 5745 len = 0; /* total length of j or a array to be sent */ 5746 if (nsends) { 5747 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5748 PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 5749 } 5750 for (i=0; i<nsends; i++) { 5751 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5752 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5753 for (j=0; j<nrows; j++) { 5754 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5755 for (l=0; l<sbs; l++) { 5756 PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 5757 5758 rowlen[j*sbs+l] = ncols; 5759 5760 len += ncols; 5761 PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5762 } 5763 k++; 5764 } 5765 PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5766 5767 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5768 } 5769 /* recvs and sends of i-array are completed */ 5770 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5771 PetscCall(PetscFree(svalues)); 5772 5773 /* allocate buffers for sending j and a arrays */ 5774 PetscCall(PetscMalloc1(len+1,&bufj)); 5775 PetscCall(PetscMalloc1(len+1,&bufa)); 5776 5777 /* create i-array of B_oth */ 5778 PetscCall(PetscMalloc1(aBn+2,&b_othi)); 5779 5780 b_othi[0] = 0; 5781 len = 0; /* total length of j or a array to be received */ 5782 k = 0; 5783 for (i=0; i<nrecvs; i++) { 5784 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5785 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5786 for (j=0; j<nrows; j++) { 5787 b_othi[k+1] = b_othi[k] + rowlen[j]; 5788 PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5789 k++; 5790 } 5791 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5792 } 5793 PetscCall(PetscFree(rvalues)); 5794 5795 /* allocate space for j and a arrays of B_oth */ 5796 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 5797 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5798 5799 /* j-array */ 5800 /*---------*/ 5801 /* post receives of j-array */ 5802 for (i=0; i<nrecvs; i++) { 5803 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5804 PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5805 } 5806 5807 /* pack the outgoing message j-array */ 5808 if (nsends) k = sstarts[0]; 5809 for (i=0; i<nsends; i++) { 5810 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5811 bufJ = bufj+sstartsj[i]; 5812 for (j=0; j<nrows; j++) { 5813 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5814 for (ll=0; ll<sbs; ll++) { 5815 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5816 for (l=0; l<ncols; l++) { 5817 *bufJ++ = cols[l]; 5818 } 5819 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5820 } 5821 } 5822 PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5823 } 5824 5825 /* recvs and sends of j-array are completed */ 5826 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5827 } else if (scall == MAT_REUSE_MATRIX) { 5828 sstartsj = *startsj_s; 5829 rstartsj = *startsj_r; 5830 bufa = *bufa_ptr; 5831 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5832 PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5833 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5834 5835 /* a-array */ 5836 /*---------*/ 5837 /* post receives of a-array */ 5838 for (i=0; i<nrecvs; i++) { 5839 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5840 PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 5841 } 5842 5843 /* pack the outgoing message a-array */ 5844 if (nsends) k = sstarts[0]; 5845 for (i=0; i<nsends; i++) { 5846 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5847 bufA = bufa+sstartsj[i]; 5848 for (j=0; j<nrows; j++) { 5849 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5850 for (ll=0; ll<sbs; ll++) { 5851 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5852 for (l=0; l<ncols; l++) { 5853 *bufA++ = vals[l]; 5854 } 5855 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5856 } 5857 } 5858 PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5859 } 5860 /* recvs and sends of a-array are completed */ 5861 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5862 PetscCall(PetscFree(reqs)); 5863 5864 if (scall == MAT_INITIAL_MATRIX) { 5865 /* put together the new matrix */ 5866 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5867 5868 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5869 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5870 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5871 b_oth->free_a = PETSC_TRUE; 5872 b_oth->free_ij = PETSC_TRUE; 5873 b_oth->nonew = 0; 5874 5875 PetscCall(PetscFree(bufj)); 5876 if (!startsj_s || !bufa_ptr) { 5877 PetscCall(PetscFree2(sstartsj,rstartsj)); 5878 PetscCall(PetscFree(bufa_ptr)); 5879 } else { 5880 *startsj_s = sstartsj; 5881 *startsj_r = rstartsj; 5882 *bufa_ptr = bufa; 5883 } 5884 } else if (scall == MAT_REUSE_MATRIX) { 5885 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5886 } 5887 5888 PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5889 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 5890 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5891 PetscFunctionReturn(0); 5892 } 5893 5894 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5895 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5896 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5897 #if defined(PETSC_HAVE_MKL_SPARSE) 5898 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5899 #endif 5900 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5901 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5902 #if defined(PETSC_HAVE_ELEMENTAL) 5903 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5904 #endif 5905 #if defined(PETSC_HAVE_SCALAPACK) 5906 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5907 #endif 5908 #if defined(PETSC_HAVE_HYPRE) 5909 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5910 #endif 5911 #if defined(PETSC_HAVE_CUDA) 5912 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5913 #endif 5914 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5915 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5916 #endif 5917 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5918 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5919 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5920 5921 /* 5922 Computes (B'*A')' since computing B*A directly is untenable 5923 5924 n p p 5925 [ ] [ ] [ ] 5926 m [ A ] * n [ B ] = m [ C ] 5927 [ ] [ ] [ ] 5928 5929 */ 5930 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5931 { 5932 Mat At,Bt,Ct; 5933 5934 PetscFunctionBegin; 5935 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 5936 PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 5937 PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 5938 PetscCall(MatDestroy(&At)); 5939 PetscCall(MatDestroy(&Bt)); 5940 PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 5941 PetscCall(MatDestroy(&Ct)); 5942 PetscFunctionReturn(0); 5943 } 5944 5945 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5946 { 5947 PetscBool cisdense; 5948 5949 PetscFunctionBegin; 5950 PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 5951 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 5952 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 5953 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 5954 if (!cisdense) { 5955 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 5956 } 5957 PetscCall(MatSetUp(C)); 5958 5959 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5960 PetscFunctionReturn(0); 5961 } 5962 5963 /* ----------------------------------------------------------------*/ 5964 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5965 { 5966 Mat_Product *product = C->product; 5967 Mat A = product->A,B=product->B; 5968 5969 PetscFunctionBegin; 5970 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5971 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5972 5973 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5974 C->ops->productsymbolic = MatProductSymbolic_AB; 5975 PetscFunctionReturn(0); 5976 } 5977 5978 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5979 { 5980 Mat_Product *product = C->product; 5981 5982 PetscFunctionBegin; 5983 if (product->type == MATPRODUCT_AB) { 5984 PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 5985 } 5986 PetscFunctionReturn(0); 5987 } 5988 5989 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 5990 5991 Input Parameters: 5992 5993 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 5994 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 5995 5996 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 5997 5998 For Set1, j1[] contains column indices of the nonzeros. 5999 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6000 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6001 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6002 6003 Similar for Set2. 6004 6005 This routine merges the two sets of nonzeros row by row and removes repeats. 6006 6007 Output Parameters: (memory is allocated by the caller) 6008 6009 i[],j[]: the CSR of the merged matrix, which has m rows. 6010 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6011 imap2[]: similar to imap1[], but for Set2. 6012 Note we order nonzeros row-by-row and from left to right. 6013 */ 6014 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 6015 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 6016 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 6017 { 6018 PetscInt r,m; /* Row index of mat */ 6019 PetscCount t,t1,t2,b1,e1,b2,e2; 6020 6021 PetscFunctionBegin; 6022 PetscCall(MatGetLocalSize(mat,&m,NULL)); 6023 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6024 i[0] = 0; 6025 for (r=0; r<m; r++) { /* Do row by row merging */ 6026 b1 = rowBegin1[r]; 6027 e1 = rowEnd1[r]; 6028 b2 = rowBegin2[r]; 6029 e2 = rowEnd2[r]; 6030 while (b1 < e1 && b2 < e2) { 6031 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6032 j[t] = j1[b1]; 6033 imap1[t1] = t; 6034 imap2[t2] = t; 6035 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6036 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6037 t1++; t2++; t++; 6038 } else if (j1[b1] < j2[b2]) { 6039 j[t] = j1[b1]; 6040 imap1[t1] = t; 6041 b1 += jmap1[t1+1] - jmap1[t1]; 6042 t1++; t++; 6043 } else { 6044 j[t] = j2[b2]; 6045 imap2[t2] = t; 6046 b2 += jmap2[t2+1] - jmap2[t2]; 6047 t2++; t++; 6048 } 6049 } 6050 /* Merge the remaining in either j1[] or j2[] */ 6051 while (b1 < e1) { 6052 j[t] = j1[b1]; 6053 imap1[t1] = t; 6054 b1 += jmap1[t1+1] - jmap1[t1]; 6055 t1++; t++; 6056 } 6057 while (b2 < e2) { 6058 j[t] = j2[b2]; 6059 imap2[t2] = t; 6060 b2 += jmap2[t2+1] - jmap2[t2]; 6061 t2++; t++; 6062 } 6063 i[r+1] = t; 6064 } 6065 PetscFunctionReturn(0); 6066 } 6067 6068 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6069 6070 Input Parameters: 6071 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6072 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6073 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6074 6075 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6076 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6077 6078 Output Parameters: 6079 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6080 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6081 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6082 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6083 6084 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6085 Atot: number of entries belonging to the diagonal block. 6086 Annz: number of unique nonzeros belonging to the diagonal block. 6087 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6088 repeats (i.e., same 'i,j' pair). 6089 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6090 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6091 6092 Atot: number of entries belonging to the diagonal block 6093 Annz: number of unique nonzeros belonging to the diagonal block. 6094 6095 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6096 6097 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6098 */ 6099 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6100 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6101 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6102 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6103 { 6104 PetscInt cstart,cend,rstart,rend,row,col; 6105 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6106 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6107 PetscCount k,m,p,q,r,s,mid; 6108 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6109 6110 PetscFunctionBegin; 6111 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6112 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6113 m = rend - rstart; 6114 6115 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6116 6117 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6118 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6119 */ 6120 while (k<n) { 6121 row = i[k]; 6122 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6123 for (s=k; s<n; s++) if (i[s] != row) break; 6124 for (p=k; p<s; p++) { 6125 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6126 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6127 } 6128 PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6129 PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6130 rowBegin[row-rstart] = k; 6131 rowMid[row-rstart] = mid; 6132 rowEnd[row-rstart] = s; 6133 6134 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6135 Atot += mid - k; 6136 Btot += s - mid; 6137 6138 /* Count unique nonzeros of this diag/offdiag row */ 6139 for (p=k; p<mid;) { 6140 col = j[p]; 6141 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6142 Annz++; 6143 } 6144 6145 for (p=mid; p<s;) { 6146 col = j[p]; 6147 do {p++;} while (p<s && j[p] == col); 6148 Bnnz++; 6149 } 6150 k = s; 6151 } 6152 6153 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6154 PetscCall(PetscMalloc1(Atot,&Aperm)); 6155 PetscCall(PetscMalloc1(Btot,&Bperm)); 6156 PetscCall(PetscMalloc1(Annz+1,&Ajmap)); 6157 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap)); 6158 6159 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6160 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6161 for (r=0; r<m; r++) { 6162 k = rowBegin[r]; 6163 mid = rowMid[r]; 6164 s = rowEnd[r]; 6165 PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 6166 PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6167 Atot += mid - k; 6168 Btot += s - mid; 6169 6170 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6171 for (p=k; p<mid;) { 6172 col = j[p]; 6173 q = p; 6174 do {p++;} while (p<mid && j[p] == col); 6175 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6176 Annz++; 6177 } 6178 6179 for (p=mid; p<s;) { 6180 col = j[p]; 6181 q = p; 6182 do {p++;} while (p<s && j[p] == col); 6183 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6184 Bnnz++; 6185 } 6186 } 6187 /* Output */ 6188 *Aperm_ = Aperm; 6189 *Annz_ = Annz; 6190 *Atot_ = Atot; 6191 *Ajmap_ = Ajmap; 6192 *Bperm_ = Bperm; 6193 *Bnnz_ = Bnnz; 6194 *Btot_ = Btot; 6195 *Bjmap_ = Bjmap; 6196 PetscFunctionReturn(0); 6197 } 6198 6199 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6200 6201 Input Parameters: 6202 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6203 nnz: number of unique nonzeros in the merged matrix 6204 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6205 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6206 6207 Output Parameter: (memory is allocated by the caller) 6208 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6209 6210 Example: 6211 nnz1 = 4 6212 nnz = 6 6213 imap = [1,3,4,5] 6214 jmap = [0,3,5,6,7] 6215 then, 6216 jmap_new = [0,0,3,3,5,6,7] 6217 */ 6218 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[]) 6219 { 6220 PetscCount k,p; 6221 6222 PetscFunctionBegin; 6223 jmap_new[0] = 0; 6224 p = nnz; /* p loops over jmap_new[] backwards */ 6225 for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */ 6226 for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1]; 6227 } 6228 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6229 PetscFunctionReturn(0); 6230 } 6231 6232 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6233 { 6234 MPI_Comm comm; 6235 PetscMPIInt rank,size; 6236 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6237 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6238 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6239 6240 PetscFunctionBegin; 6241 PetscCall(PetscFree(mpiaij->garray)); 6242 PetscCall(VecDestroy(&mpiaij->lvec)); 6243 #if defined(PETSC_USE_CTABLE) 6244 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6245 #else 6246 PetscCall(PetscFree(mpiaij->colmap)); 6247 #endif 6248 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6249 mat->assembled = PETSC_FALSE; 6250 mat->was_assembled = PETSC_FALSE; 6251 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6252 6253 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 6254 PetscCallMPI(MPI_Comm_size(comm,&size)); 6255 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 6256 PetscCall(PetscLayoutSetUp(mat->rmap)); 6257 PetscCall(PetscLayoutSetUp(mat->cmap)); 6258 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6259 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6260 PetscCall(MatGetLocalSize(mat,&m,&n)); 6261 PetscCall(MatGetSize(mat,&M,&N)); 6262 6263 /* ---------------------------------------------------------------------------*/ 6264 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6265 /* entries come first, then local rows, then remote rows. */ 6266 /* ---------------------------------------------------------------------------*/ 6267 PetscCount n1 = coo_n,*perm1; 6268 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6269 PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1)); 6270 PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */ 6271 PetscCall(PetscArraycpy(j1,coo_j,n1)); 6272 for (k=0; k<n1; k++) perm1[k] = k; 6273 6274 /* Manipulate indices so that entries with negative row or col indices will have smallest 6275 row indices, local entries will have greater but negative row indices, and remote entries 6276 will have positive row indices. 6277 */ 6278 for (k=0; k<n1; k++) { 6279 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6280 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6281 else { 6282 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6283 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6284 } 6285 } 6286 6287 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6288 PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6289 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6290 PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6291 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6292 6293 /* ---------------------------------------------------------------------------*/ 6294 /* Split local rows into diag/offdiag portions */ 6295 /* ---------------------------------------------------------------------------*/ 6296 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6297 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6298 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6299 6300 PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 6301 PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 6302 PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6303 6304 /* ---------------------------------------------------------------------------*/ 6305 /* Send remote rows to their owner */ 6306 /* ---------------------------------------------------------------------------*/ 6307 /* Find which rows should be sent to which remote ranks*/ 6308 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6309 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6310 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6311 const PetscInt *ranges; 6312 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6313 6314 PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 6315 PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6316 for (k=rem; k<n1;) { 6317 PetscMPIInt owner; 6318 PetscInt firstRow,lastRow; 6319 6320 /* Locate a row range */ 6321 firstRow = i1[k]; /* first row of this owner */ 6322 PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6323 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6324 6325 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6326 PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6327 6328 /* All entries in [k,p) belong to this remote owner */ 6329 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6330 PetscMPIInt *sendto2; 6331 PetscInt *nentries2; 6332 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6333 6334 PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 6335 PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 6336 PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 6337 PetscCall(PetscFree2(sendto,nentries2)); 6338 sendto = sendto2; 6339 nentries = nentries2; 6340 maxNsend = maxNsend2; 6341 } 6342 sendto[nsend] = owner; 6343 nentries[nsend] = p - k; 6344 PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6345 nsend++; 6346 k = p; 6347 } 6348 6349 /* Build 1st SF to know offsets on remote to send data */ 6350 PetscSF sf1; 6351 PetscInt nroots = 1,nroots2 = 0; 6352 PetscInt nleaves = nsend,nleaves2 = 0; 6353 PetscInt *offsets; 6354 PetscSFNode *iremote; 6355 6356 PetscCall(PetscSFCreate(comm,&sf1)); 6357 PetscCall(PetscMalloc1(nsend,&iremote)); 6358 PetscCall(PetscMalloc1(nsend,&offsets)); 6359 for (k=0; k<nsend; k++) { 6360 iremote[k].rank = sendto[k]; 6361 iremote[k].index = 0; 6362 nleaves2 += nentries[k]; 6363 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6364 } 6365 PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6366 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 6367 PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6368 PetscCall(PetscSFDestroy(&sf1)); 6369 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem); 6370 6371 /* Build 2nd SF to send remote COOs to their owner */ 6372 PetscSF sf2; 6373 nroots = nroots2; 6374 nleaves = nleaves2; 6375 PetscCall(PetscSFCreate(comm,&sf2)); 6376 PetscCall(PetscSFSetFromOptions(sf2)); 6377 PetscCall(PetscMalloc1(nleaves,&iremote)); 6378 p = 0; 6379 for (k=0; k<nsend; k++) { 6380 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6381 for (q=0; q<nentries[k]; q++,p++) { 6382 iremote[p].rank = sendto[k]; 6383 iremote[p].index = offsets[k] + q; 6384 } 6385 } 6386 PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6387 6388 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6389 PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6390 6391 /* Send the remote COOs to their owner */ 6392 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6393 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6394 PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 6395 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 6396 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 6397 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 6398 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6399 6400 PetscCall(PetscFree(offsets)); 6401 PetscCall(PetscFree2(sendto,nentries)); 6402 6403 /* ---------------------------------------------------------------*/ 6404 /* Sort received COOs by row along with the permutation array */ 6405 /* ---------------------------------------------------------------*/ 6406 for (k=0; k<n2; k++) perm2[k] = k; 6407 PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6408 6409 /* ---------------------------------------------------------------*/ 6410 /* Split received COOs into diag/offdiag portions */ 6411 /* ---------------------------------------------------------------*/ 6412 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6413 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6414 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6415 6416 PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 6417 PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6418 6419 /* --------------------------------------------------------------------------*/ 6420 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6421 /* --------------------------------------------------------------------------*/ 6422 PetscInt *Ai,*Bi; 6423 PetscInt *Aj,*Bj; 6424 6425 PetscCall(PetscMalloc1(m+1,&Ai)); 6426 PetscCall(PetscMalloc1(m+1,&Bi)); 6427 PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6428 PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6429 6430 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6431 PetscCall(PetscMalloc1(Annz1,&Aimap1)); 6432 PetscCall(PetscMalloc1(Bnnz1,&Bimap1)); 6433 PetscCall(PetscMalloc1(Annz2,&Aimap2)); 6434 PetscCall(PetscMalloc1(Bnnz2,&Bimap2)); 6435 6436 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 6437 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6438 6439 /* --------------------------------------------------------------------------*/ 6440 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6441 /* expect nonzeros in A/B most likely have local contributing entries */ 6442 /* --------------------------------------------------------------------------*/ 6443 PetscInt Annz = Ai[m]; 6444 PetscInt Bnnz = Bi[m]; 6445 PetscCount *Ajmap1_new,*Bjmap1_new; 6446 6447 PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new)); 6448 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new)); 6449 6450 PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new)); 6451 PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new)); 6452 6453 PetscCall(PetscFree(Aimap1)); 6454 PetscCall(PetscFree(Ajmap1)); 6455 PetscCall(PetscFree(Bimap1)); 6456 PetscCall(PetscFree(Bjmap1)); 6457 PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 6458 PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 6459 PetscCall(PetscFree3(i1,j1,perm1)); 6460 PetscCall(PetscFree3(i2,j2,perm2)); 6461 6462 Ajmap1 = Ajmap1_new; 6463 Bjmap1 = Bjmap1_new; 6464 6465 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6466 if (Annz < Annz1 + Annz2) { 6467 PetscInt *Aj_new; 6468 PetscCall(PetscMalloc1(Annz,&Aj_new)); 6469 PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 6470 PetscCall(PetscFree(Aj)); 6471 Aj = Aj_new; 6472 } 6473 6474 if (Bnnz < Bnnz1 + Bnnz2) { 6475 PetscInt *Bj_new; 6476 PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 6477 PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 6478 PetscCall(PetscFree(Bj)); 6479 Bj = Bj_new; 6480 } 6481 6482 /* --------------------------------------------------------------------------------*/ 6483 /* Create new submatrices for on-process and off-process coupling */ 6484 /* --------------------------------------------------------------------------------*/ 6485 PetscScalar *Aa,*Ba; 6486 MatType rtype; 6487 Mat_SeqAIJ *a,*b; 6488 PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 6489 PetscCall(PetscCalloc1(Bnnz,&Ba)); 6490 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6491 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6492 PetscCall(MatDestroy(&mpiaij->A)); 6493 PetscCall(MatDestroy(&mpiaij->B)); 6494 PetscCall(MatGetRootType_Private(mat,&rtype)); 6495 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 6496 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 6497 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6498 6499 a = (Mat_SeqAIJ*)mpiaij->A->data; 6500 b = (Mat_SeqAIJ*)mpiaij->B->data; 6501 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6502 a->free_a = b->free_a = PETSC_TRUE; 6503 a->free_ij = b->free_ij = PETSC_TRUE; 6504 6505 /* conversion must happen AFTER multiply setup */ 6506 PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 6507 PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 6508 PetscCall(VecDestroy(&mpiaij->lvec)); 6509 PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 6510 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6511 6512 mpiaij->coo_n = coo_n; 6513 mpiaij->coo_sf = sf2; 6514 mpiaij->sendlen = nleaves; 6515 mpiaij->recvlen = nroots; 6516 6517 mpiaij->Annz = Annz; 6518 mpiaij->Bnnz = Bnnz; 6519 6520 mpiaij->Annz2 = Annz2; 6521 mpiaij->Bnnz2 = Bnnz2; 6522 6523 mpiaij->Atot1 = Atot1; 6524 mpiaij->Atot2 = Atot2; 6525 mpiaij->Btot1 = Btot1; 6526 mpiaij->Btot2 = Btot2; 6527 6528 mpiaij->Ajmap1 = Ajmap1; 6529 mpiaij->Aperm1 = Aperm1; 6530 6531 mpiaij->Bjmap1 = Bjmap1; 6532 mpiaij->Bperm1 = Bperm1; 6533 6534 mpiaij->Aimap2 = Aimap2; 6535 mpiaij->Ajmap2 = Ajmap2; 6536 mpiaij->Aperm2 = Aperm2; 6537 6538 mpiaij->Bimap2 = Bimap2; 6539 mpiaij->Bjmap2 = Bjmap2; 6540 mpiaij->Bperm2 = Bperm2; 6541 6542 mpiaij->Cperm1 = Cperm1; 6543 6544 /* Allocate in preallocation. If not used, it has zero cost on host */ 6545 PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6546 PetscFunctionReturn(0); 6547 } 6548 6549 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6550 { 6551 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6552 Mat A = mpiaij->A,B = mpiaij->B; 6553 PetscCount Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2; 6554 PetscScalar *Aa,*Ba; 6555 PetscScalar *sendbuf = mpiaij->sendbuf; 6556 PetscScalar *recvbuf = mpiaij->recvbuf; 6557 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2; 6558 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2; 6559 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6560 const PetscCount *Cperm1 = mpiaij->Cperm1; 6561 6562 PetscFunctionBegin; 6563 PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 6564 PetscCall(MatSeqAIJGetArray(B,&Ba)); 6565 6566 /* Pack entries to be sent to remote */ 6567 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6568 6569 /* Send remote entries to their owner and overlap the communication with local computation */ 6570 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6571 /* Add local entries to A and B */ 6572 for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6573 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6574 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]]; 6575 Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum; 6576 } 6577 for (PetscCount i=0; i<Bnnz; i++) { 6578 PetscScalar sum = 0.0; 6579 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]]; 6580 Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum; 6581 } 6582 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6583 6584 /* Add received remote entries to A and B */ 6585 for (PetscCount i=0; i<Annz2; i++) { 6586 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6587 } 6588 for (PetscCount i=0; i<Bnnz2; i++) { 6589 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6590 } 6591 PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 6592 PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6593 PetscFunctionReturn(0); 6594 } 6595 6596 /* ----------------------------------------------------------------*/ 6597 6598 /*MC 6599 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6600 6601 Options Database Keys: 6602 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6603 6604 Level: beginner 6605 6606 Notes: 6607 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6608 in this case the values associated with the rows and columns one passes in are set to zero 6609 in the matrix 6610 6611 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6612 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6613 6614 .seealso: `MatCreateAIJ()` 6615 M*/ 6616 6617 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6618 { 6619 Mat_MPIAIJ *b; 6620 PetscMPIInt size; 6621 6622 PetscFunctionBegin; 6623 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 6624 6625 PetscCall(PetscNewLog(B,&b)); 6626 B->data = (void*)b; 6627 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6628 B->assembled = PETSC_FALSE; 6629 B->insertmode = NOT_SET_VALUES; 6630 b->size = size; 6631 6632 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6633 6634 /* build cache for off array entries formed */ 6635 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 6636 6637 b->donotstash = PETSC_FALSE; 6638 b->colmap = NULL; 6639 b->garray = NULL; 6640 b->roworiented = PETSC_TRUE; 6641 6642 /* stuff used for matrix vector multiply */ 6643 b->lvec = NULL; 6644 b->Mvctx = NULL; 6645 6646 /* stuff for MatGetRow() */ 6647 b->rowindices = NULL; 6648 b->rowvalues = NULL; 6649 b->getrowactive = PETSC_FALSE; 6650 6651 /* flexible pointer used in CUSPARSE classes */ 6652 b->spptr = NULL; 6653 6654 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6655 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 6656 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 6657 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 6658 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 6659 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 6660 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6661 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 6662 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 6663 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 6664 #if defined(PETSC_HAVE_CUDA) 6665 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6666 #endif 6667 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6668 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 6669 #endif 6670 #if defined(PETSC_HAVE_MKL_SPARSE) 6671 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6672 #endif 6673 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 6674 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 6675 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 6676 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 6677 #if defined(PETSC_HAVE_ELEMENTAL) 6678 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 6679 #endif 6680 #if defined(PETSC_HAVE_SCALAPACK) 6681 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6682 #endif 6683 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 6684 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 6685 #if defined(PETSC_HAVE_HYPRE) 6686 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 6687 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6688 #endif 6689 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 6690 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 6691 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 6692 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 6693 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6694 PetscFunctionReturn(0); 6695 } 6696 6697 /*@C 6698 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6699 and "off-diagonal" part of the matrix in CSR format. 6700 6701 Collective 6702 6703 Input Parameters: 6704 + comm - MPI communicator 6705 . m - number of local rows (Cannot be PETSC_DECIDE) 6706 . n - This value should be the same as the local size used in creating the 6707 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6708 calculated if N is given) For square matrices n is almost always m. 6709 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6710 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6711 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6712 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6713 . a - matrix values 6714 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6715 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6716 - oa - matrix values 6717 6718 Output Parameter: 6719 . mat - the matrix 6720 6721 Level: advanced 6722 6723 Notes: 6724 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6725 must free the arrays once the matrix has been destroyed and not before. 6726 6727 The i and j indices are 0 based 6728 6729 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6730 6731 This sets local rows and cannot be used to set off-processor values. 6732 6733 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6734 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6735 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6736 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6737 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6738 communication if it is known that only local entries will be set. 6739 6740 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6741 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6742 @*/ 6743 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6744 { 6745 Mat_MPIAIJ *maij; 6746 6747 PetscFunctionBegin; 6748 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6749 PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6750 PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6751 PetscCall(MatCreate(comm,mat)); 6752 PetscCall(MatSetSizes(*mat,m,n,M,N)); 6753 PetscCall(MatSetType(*mat,MATMPIAIJ)); 6754 maij = (Mat_MPIAIJ*) (*mat)->data; 6755 6756 (*mat)->preallocated = PETSC_TRUE; 6757 6758 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6759 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6760 6761 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 6762 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 6763 6764 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 6765 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 6766 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 6767 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 6768 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 6769 PetscFunctionReturn(0); 6770 } 6771 6772 typedef struct { 6773 Mat *mp; /* intermediate products */ 6774 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6775 PetscInt cp; /* number of intermediate products */ 6776 6777 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6778 PetscInt *startsj_s,*startsj_r; 6779 PetscScalar *bufa; 6780 Mat P_oth; 6781 6782 /* may take advantage of merging product->B */ 6783 Mat Bloc; /* B-local by merging diag and off-diag */ 6784 6785 /* cusparse does not have support to split between symbolic and numeric phases. 6786 When api_user is true, we don't need to update the numerical values 6787 of the temporary storage */ 6788 PetscBool reusesym; 6789 6790 /* support for COO values insertion */ 6791 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6792 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6793 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6794 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6795 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6796 PetscMemType mtype; 6797 6798 /* customization */ 6799 PetscBool abmerge; 6800 PetscBool P_oth_bind; 6801 } MatMatMPIAIJBACKEND; 6802 6803 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6804 { 6805 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6806 PetscInt i; 6807 6808 PetscFunctionBegin; 6809 PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 6810 PetscCall(PetscFree(mmdata->bufa)); 6811 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 6812 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 6813 PetscCall(MatDestroy(&mmdata->P_oth)); 6814 PetscCall(MatDestroy(&mmdata->Bloc)); 6815 PetscCall(PetscSFDestroy(&mmdata->sf)); 6816 for (i = 0; i < mmdata->cp; i++) { 6817 PetscCall(MatDestroy(&mmdata->mp[i])); 6818 } 6819 PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 6820 PetscCall(PetscFree(mmdata->own[0])); 6821 PetscCall(PetscFree(mmdata->own)); 6822 PetscCall(PetscFree(mmdata->off[0])); 6823 PetscCall(PetscFree(mmdata->off)); 6824 PetscCall(PetscFree(mmdata)); 6825 PetscFunctionReturn(0); 6826 } 6827 6828 /* Copy selected n entries with indices in idx[] of A to v[]. 6829 If idx is NULL, copy the whole data array of A to v[] 6830 */ 6831 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6832 { 6833 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6834 6835 PetscFunctionBegin; 6836 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6837 if (f) { 6838 PetscCall((*f)(A,n,idx,v)); 6839 } else { 6840 const PetscScalar *vv; 6841 6842 PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6843 if (n && idx) { 6844 PetscScalar *w = v; 6845 const PetscInt *oi = idx; 6846 PetscInt j; 6847 6848 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6849 } else { 6850 PetscCall(PetscArraycpy(v,vv,n)); 6851 } 6852 PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6853 } 6854 PetscFunctionReturn(0); 6855 } 6856 6857 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6858 { 6859 MatMatMPIAIJBACKEND *mmdata; 6860 PetscInt i,n_d,n_o; 6861 6862 PetscFunctionBegin; 6863 MatCheckProduct(C,1); 6864 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6865 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6866 if (!mmdata->reusesym) { /* update temporary matrices */ 6867 if (mmdata->P_oth) { 6868 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6869 } 6870 if (mmdata->Bloc) { 6871 PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 6872 } 6873 } 6874 mmdata->reusesym = PETSC_FALSE; 6875 6876 for (i = 0; i < mmdata->cp; i++) { 6877 PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6878 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6879 } 6880 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6881 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6882 6883 if (mmdata->mptmp[i]) continue; 6884 if (noff) { 6885 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6886 6887 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 6888 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 6889 n_o += noff; 6890 n_d += nown; 6891 } else { 6892 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6893 6894 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 6895 n_d += mm->nz; 6896 } 6897 } 6898 if (mmdata->hasoffproc) { /* offprocess insertion */ 6899 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6900 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6901 } 6902 PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 6903 PetscFunctionReturn(0); 6904 } 6905 6906 /* Support for Pt * A, A * P, or Pt * A * P */ 6907 #define MAX_NUMBER_INTERMEDIATE 4 6908 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6909 { 6910 Mat_Product *product = C->product; 6911 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6912 Mat_MPIAIJ *a,*p; 6913 MatMatMPIAIJBACKEND *mmdata; 6914 ISLocalToGlobalMapping P_oth_l2g = NULL; 6915 IS glob = NULL; 6916 const char *prefix; 6917 char pprefix[256]; 6918 const PetscInt *globidx,*P_oth_idx; 6919 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 6920 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 6921 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6922 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6923 /* a base offset; type-2: sparse with a local to global map table */ 6924 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6925 6926 MatProductType ptype; 6927 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6928 PetscMPIInt size; 6929 6930 PetscFunctionBegin; 6931 MatCheckProduct(C,1); 6932 PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6933 ptype = product->type; 6934 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6935 ptype = MATPRODUCT_AB; 6936 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6937 } 6938 switch (ptype) { 6939 case MATPRODUCT_AB: 6940 A = product->A; 6941 P = product->B; 6942 m = A->rmap->n; 6943 n = P->cmap->n; 6944 M = A->rmap->N; 6945 N = P->cmap->N; 6946 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6947 break; 6948 case MATPRODUCT_AtB: 6949 P = product->A; 6950 A = product->B; 6951 m = P->cmap->n; 6952 n = A->cmap->n; 6953 M = P->cmap->N; 6954 N = A->cmap->N; 6955 hasoffproc = PETSC_TRUE; 6956 break; 6957 case MATPRODUCT_PtAP: 6958 A = product->A; 6959 P = product->B; 6960 m = P->cmap->n; 6961 n = P->cmap->n; 6962 M = P->cmap->N; 6963 N = P->cmap->N; 6964 hasoffproc = PETSC_TRUE; 6965 break; 6966 default: 6967 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6968 } 6969 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 6970 if (size == 1) hasoffproc = PETSC_FALSE; 6971 6972 /* defaults */ 6973 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6974 mp[i] = NULL; 6975 mptmp[i] = PETSC_FALSE; 6976 rmapt[i] = -1; 6977 cmapt[i] = -1; 6978 rmapa[i] = NULL; 6979 cmapa[i] = NULL; 6980 } 6981 6982 /* customization */ 6983 PetscCall(PetscNew(&mmdata)); 6984 mmdata->reusesym = product->api_user; 6985 if (ptype == MATPRODUCT_AB) { 6986 if (product->api_user) { 6987 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 6988 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6989 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6990 PetscOptionsEnd(); 6991 } else { 6992 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 6993 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6994 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6995 PetscOptionsEnd(); 6996 } 6997 } else if (ptype == MATPRODUCT_PtAP) { 6998 if (product->api_user) { 6999 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 7000 PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7001 PetscOptionsEnd(); 7002 } else { 7003 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 7004 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7005 PetscOptionsEnd(); 7006 } 7007 } 7008 a = (Mat_MPIAIJ*)A->data; 7009 p = (Mat_MPIAIJ*)P->data; 7010 PetscCall(MatSetSizes(C,m,n,M,N)); 7011 PetscCall(PetscLayoutSetUp(C->rmap)); 7012 PetscCall(PetscLayoutSetUp(C->cmap)); 7013 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 7014 PetscCall(MatGetOptionsPrefix(C,&prefix)); 7015 7016 cp = 0; 7017 switch (ptype) { 7018 case MATPRODUCT_AB: /* A * P */ 7019 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7020 7021 /* A_diag * P_local (merged or not) */ 7022 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7023 /* P is product->B */ 7024 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7025 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7026 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7027 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7028 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7029 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7030 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7031 mp[cp]->product->api_user = product->api_user; 7032 PetscCall(MatProductSetFromOptions(mp[cp])); 7033 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7034 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7035 PetscCall(ISGetIndices(glob,&globidx)); 7036 rmapt[cp] = 1; 7037 cmapt[cp] = 2; 7038 cmapa[cp] = globidx; 7039 mptmp[cp] = PETSC_FALSE; 7040 cp++; 7041 } else { /* A_diag * P_diag and A_diag * P_off */ 7042 PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 7043 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7044 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7045 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7046 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7047 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7048 mp[cp]->product->api_user = product->api_user; 7049 PetscCall(MatProductSetFromOptions(mp[cp])); 7050 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7051 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7052 rmapt[cp] = 1; 7053 cmapt[cp] = 1; 7054 mptmp[cp] = PETSC_FALSE; 7055 cp++; 7056 PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 7057 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7058 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7059 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7060 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7061 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7062 mp[cp]->product->api_user = product->api_user; 7063 PetscCall(MatProductSetFromOptions(mp[cp])); 7064 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7065 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7066 rmapt[cp] = 1; 7067 cmapt[cp] = 2; 7068 cmapa[cp] = p->garray; 7069 mptmp[cp] = PETSC_FALSE; 7070 cp++; 7071 } 7072 7073 /* A_off * P_other */ 7074 if (mmdata->P_oth) { 7075 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 7076 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7077 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7078 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7079 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7080 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7081 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7082 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7083 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7084 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7085 mp[cp]->product->api_user = product->api_user; 7086 PetscCall(MatProductSetFromOptions(mp[cp])); 7087 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7088 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7089 rmapt[cp] = 1; 7090 cmapt[cp] = 2; 7091 cmapa[cp] = P_oth_idx; 7092 mptmp[cp] = PETSC_FALSE; 7093 cp++; 7094 } 7095 break; 7096 7097 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7098 /* A is product->B */ 7099 PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7100 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7101 PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 7102 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7103 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7104 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7105 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7106 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7107 mp[cp]->product->api_user = product->api_user; 7108 PetscCall(MatProductSetFromOptions(mp[cp])); 7109 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7110 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7111 PetscCall(ISGetIndices(glob,&globidx)); 7112 rmapt[cp] = 2; 7113 rmapa[cp] = globidx; 7114 cmapt[cp] = 2; 7115 cmapa[cp] = globidx; 7116 mptmp[cp] = PETSC_FALSE; 7117 cp++; 7118 } else { 7119 PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 7120 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7121 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7122 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7123 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7124 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7125 mp[cp]->product->api_user = product->api_user; 7126 PetscCall(MatProductSetFromOptions(mp[cp])); 7127 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7128 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7129 PetscCall(ISGetIndices(glob,&globidx)); 7130 rmapt[cp] = 1; 7131 cmapt[cp] = 2; 7132 cmapa[cp] = globidx; 7133 mptmp[cp] = PETSC_FALSE; 7134 cp++; 7135 PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 7136 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7137 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7138 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7139 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7140 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7141 mp[cp]->product->api_user = product->api_user; 7142 PetscCall(MatProductSetFromOptions(mp[cp])); 7143 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7144 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7145 rmapt[cp] = 2; 7146 rmapa[cp] = p->garray; 7147 cmapt[cp] = 2; 7148 cmapa[cp] = globidx; 7149 mptmp[cp] = PETSC_FALSE; 7150 cp++; 7151 } 7152 break; 7153 case MATPRODUCT_PtAP: 7154 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7155 /* P is product->B */ 7156 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7157 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7158 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 7159 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7160 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7161 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7162 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7163 mp[cp]->product->api_user = product->api_user; 7164 PetscCall(MatProductSetFromOptions(mp[cp])); 7165 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7166 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7167 PetscCall(ISGetIndices(glob,&globidx)); 7168 rmapt[cp] = 2; 7169 rmapa[cp] = globidx; 7170 cmapt[cp] = 2; 7171 cmapa[cp] = globidx; 7172 mptmp[cp] = PETSC_FALSE; 7173 cp++; 7174 if (mmdata->P_oth) { 7175 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 7176 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7177 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7178 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7179 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7180 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7181 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7182 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7183 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7184 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7185 mp[cp]->product->api_user = product->api_user; 7186 PetscCall(MatProductSetFromOptions(mp[cp])); 7187 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7188 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7189 mptmp[cp] = PETSC_TRUE; 7190 cp++; 7191 PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 7192 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7193 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7194 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7195 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7196 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7197 mp[cp]->product->api_user = product->api_user; 7198 PetscCall(MatProductSetFromOptions(mp[cp])); 7199 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7200 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7201 rmapt[cp] = 2; 7202 rmapa[cp] = globidx; 7203 cmapt[cp] = 2; 7204 cmapa[cp] = P_oth_idx; 7205 mptmp[cp] = PETSC_FALSE; 7206 cp++; 7207 } 7208 break; 7209 default: 7210 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7211 } 7212 /* sanity check */ 7213 if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7214 7215 PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7216 for (i = 0; i < cp; i++) { 7217 mmdata->mp[i] = mp[i]; 7218 mmdata->mptmp[i] = mptmp[i]; 7219 } 7220 mmdata->cp = cp; 7221 C->product->data = mmdata; 7222 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7223 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7224 7225 /* memory type */ 7226 mmdata->mtype = PETSC_MEMTYPE_HOST; 7227 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 7228 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7229 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7230 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7231 7232 /* prepare coo coordinates for values insertion */ 7233 7234 /* count total nonzeros of those intermediate seqaij Mats 7235 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7236 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7237 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7238 */ 7239 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7240 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7241 if (mptmp[cp]) continue; 7242 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7243 const PetscInt *rmap = rmapa[cp]; 7244 const PetscInt mr = mp[cp]->rmap->n; 7245 const PetscInt rs = C->rmap->rstart; 7246 const PetscInt re = C->rmap->rend; 7247 const PetscInt *ii = mm->i; 7248 for (i = 0; i < mr; i++) { 7249 const PetscInt gr = rmap[i]; 7250 const PetscInt nz = ii[i+1] - ii[i]; 7251 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7252 else ncoo_oown += nz; /* this row is local */ 7253 } 7254 } else ncoo_d += mm->nz; 7255 } 7256 7257 /* 7258 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7259 7260 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7261 7262 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7263 7264 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7265 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7266 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7267 7268 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7269 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7270 */ 7271 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 7272 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7273 7274 /* gather (i,j) of nonzeros inserted by remote procs */ 7275 if (hasoffproc) { 7276 PetscSF msf; 7277 PetscInt ncoo2,*coo_i2,*coo_j2; 7278 7279 PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 7280 PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 7281 PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7282 7283 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7284 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7285 PetscInt *idxoff = mmdata->off[cp]; 7286 PetscInt *idxown = mmdata->own[cp]; 7287 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7288 const PetscInt *rmap = rmapa[cp]; 7289 const PetscInt *cmap = cmapa[cp]; 7290 const PetscInt *ii = mm->i; 7291 PetscInt *coi = coo_i + ncoo_o; 7292 PetscInt *coj = coo_j + ncoo_o; 7293 const PetscInt mr = mp[cp]->rmap->n; 7294 const PetscInt rs = C->rmap->rstart; 7295 const PetscInt re = C->rmap->rend; 7296 const PetscInt cs = C->cmap->rstart; 7297 for (i = 0; i < mr; i++) { 7298 const PetscInt *jj = mm->j + ii[i]; 7299 const PetscInt gr = rmap[i]; 7300 const PetscInt nz = ii[i+1] - ii[i]; 7301 if (gr < rs || gr >= re) { /* this is an offproc row */ 7302 for (j = ii[i]; j < ii[i+1]; j++) { 7303 *coi++ = gr; 7304 *idxoff++ = j; 7305 } 7306 if (!cmapt[cp]) { /* already global */ 7307 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7308 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7309 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7310 } else { /* offdiag */ 7311 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7312 } 7313 ncoo_o += nz; 7314 } else { /* this is a local row */ 7315 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7316 } 7317 } 7318 } 7319 mmdata->off[cp + 1] = idxoff; 7320 mmdata->own[cp + 1] = idxown; 7321 } 7322 7323 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7324 PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 7325 PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 7326 PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 7327 ncoo = ncoo_d + ncoo_oown + ncoo2; 7328 PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 7329 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7330 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 7331 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7332 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7333 PetscCall(PetscFree2(coo_i,coo_j)); 7334 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7335 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 7336 coo_i = coo_i2; 7337 coo_j = coo_j2; 7338 } else { /* no offproc values insertion */ 7339 ncoo = ncoo_d; 7340 PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7341 7342 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7343 PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 7344 PetscCall(PetscSFSetUp(mmdata->sf)); 7345 } 7346 mmdata->hasoffproc = hasoffproc; 7347 7348 /* gather (i,j) of nonzeros inserted locally */ 7349 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7350 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7351 PetscInt *coi = coo_i + ncoo_d; 7352 PetscInt *coj = coo_j + ncoo_d; 7353 const PetscInt *jj = mm->j; 7354 const PetscInt *ii = mm->i; 7355 const PetscInt *cmap = cmapa[cp]; 7356 const PetscInt *rmap = rmapa[cp]; 7357 const PetscInt mr = mp[cp]->rmap->n; 7358 const PetscInt rs = C->rmap->rstart; 7359 const PetscInt re = C->rmap->rend; 7360 const PetscInt cs = C->cmap->rstart; 7361 7362 if (mptmp[cp]) continue; 7363 if (rmapt[cp] == 1) { /* consecutive rows */ 7364 /* fill coo_i */ 7365 for (i = 0; i < mr; i++) { 7366 const PetscInt gr = i + rs; 7367 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7368 } 7369 /* fill coo_j */ 7370 if (!cmapt[cp]) { /* type-0, already global */ 7371 PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7372 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7373 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7374 } else { /* type-2, local to global for sparse columns */ 7375 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7376 } 7377 ncoo_d += mm->nz; 7378 } else if (rmapt[cp] == 2) { /* sparse rows */ 7379 for (i = 0; i < mr; i++) { 7380 const PetscInt *jj = mm->j + ii[i]; 7381 const PetscInt gr = rmap[i]; 7382 const PetscInt nz = ii[i+1] - ii[i]; 7383 if (gr >= rs && gr < re) { /* local rows */ 7384 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7385 if (!cmapt[cp]) { /* type-0, already global */ 7386 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7387 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7388 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7389 } else { /* type-2, local to global for sparse columns */ 7390 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7391 } 7392 ncoo_d += nz; 7393 } 7394 } 7395 } 7396 } 7397 if (glob) { 7398 PetscCall(ISRestoreIndices(glob,&globidx)); 7399 } 7400 PetscCall(ISDestroy(&glob)); 7401 if (P_oth_l2g) { 7402 PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 7403 } 7404 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7405 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7406 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 7407 7408 /* preallocate with COO data */ 7409 PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 7410 PetscCall(PetscFree2(coo_i,coo_j)); 7411 PetscFunctionReturn(0); 7412 } 7413 7414 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7415 { 7416 Mat_Product *product = mat->product; 7417 #if defined(PETSC_HAVE_DEVICE) 7418 PetscBool match = PETSC_FALSE; 7419 PetscBool usecpu = PETSC_FALSE; 7420 #else 7421 PetscBool match = PETSC_TRUE; 7422 #endif 7423 7424 PetscFunctionBegin; 7425 MatCheckProduct(mat,1); 7426 #if defined(PETSC_HAVE_DEVICE) 7427 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7428 PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 7429 } 7430 if (match) { /* we can always fallback to the CPU if requested */ 7431 switch (product->type) { 7432 case MATPRODUCT_AB: 7433 if (product->api_user) { 7434 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat"); 7435 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7436 PetscOptionsEnd(); 7437 } else { 7438 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat"); 7439 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7440 PetscOptionsEnd(); 7441 } 7442 break; 7443 case MATPRODUCT_AtB: 7444 if (product->api_user) { 7445 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat"); 7446 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7447 PetscOptionsEnd(); 7448 } else { 7449 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat"); 7450 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7451 PetscOptionsEnd(); 7452 } 7453 break; 7454 case MATPRODUCT_PtAP: 7455 if (product->api_user) { 7456 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat"); 7457 PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7458 PetscOptionsEnd(); 7459 } else { 7460 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat"); 7461 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7462 PetscOptionsEnd(); 7463 } 7464 break; 7465 default: 7466 break; 7467 } 7468 match = (PetscBool)!usecpu; 7469 } 7470 #endif 7471 if (match) { 7472 switch (product->type) { 7473 case MATPRODUCT_AB: 7474 case MATPRODUCT_AtB: 7475 case MATPRODUCT_PtAP: 7476 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7477 break; 7478 default: 7479 break; 7480 } 7481 } 7482 /* fallback to MPIAIJ ops */ 7483 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7484 PetscFunctionReturn(0); 7485 } 7486 7487 /* 7488 Special version for direct calls from Fortran 7489 */ 7490 #include <petsc/private/fortranimpl.h> 7491 7492 /* Change these macros so can be used in void function */ 7493 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7494 #undef PetscCall 7495 #define PetscCall(...) do { \ 7496 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7497 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7498 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7499 return; \ 7500 } \ 7501 } while (0) 7502 7503 #undef SETERRQ 7504 #define SETERRQ(comm,ierr,...) do { \ 7505 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 7506 return; \ 7507 } while (0) 7508 7509 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7510 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7511 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7512 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7513 #else 7514 #endif 7515 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 7516 { 7517 Mat mat = *mmat; 7518 PetscInt m = *mm, n = *mn; 7519 InsertMode addv = *maddv; 7520 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 7521 PetscScalar value; 7522 7523 MatCheckPreallocated(mat,1); 7524 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 7525 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 7526 { 7527 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 7528 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 7529 PetscBool roworiented = aij->roworiented; 7530 7531 /* Some Variables required in the macro */ 7532 Mat A = aij->A; 7533 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 7534 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 7535 MatScalar *aa; 7536 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 7537 Mat B = aij->B; 7538 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 7539 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 7540 MatScalar *ba; 7541 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 7542 * cannot use "#if defined" inside a macro. */ 7543 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 7544 7545 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 7546 PetscInt nonew = a->nonew; 7547 MatScalar *ap1,*ap2; 7548 7549 PetscFunctionBegin; 7550 PetscCall(MatSeqAIJGetArray(A,&aa)); 7551 PetscCall(MatSeqAIJGetArray(B,&ba)); 7552 for (i=0; i<m; i++) { 7553 if (im[i] < 0) continue; 7554 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 7555 if (im[i] >= rstart && im[i] < rend) { 7556 row = im[i] - rstart; 7557 lastcol1 = -1; 7558 rp1 = aj + ai[row]; 7559 ap1 = aa + ai[row]; 7560 rmax1 = aimax[row]; 7561 nrow1 = ailen[row]; 7562 low1 = 0; 7563 high1 = nrow1; 7564 lastcol2 = -1; 7565 rp2 = bj + bi[row]; 7566 ap2 = ba + bi[row]; 7567 rmax2 = bimax[row]; 7568 nrow2 = bilen[row]; 7569 low2 = 0; 7570 high2 = nrow2; 7571 7572 for (j=0; j<n; j++) { 7573 if (roworiented) value = v[i*n+j]; 7574 else value = v[i+j*m]; 7575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 7576 if (in[j] >= cstart && in[j] < cend) { 7577 col = in[j] - cstart; 7578 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 7579 } else if (in[j] < 0) continue; 7580 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 7581 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 7582 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 7583 } else { 7584 if (mat->was_assembled) { 7585 if (!aij->colmap) { 7586 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 7587 } 7588 #if defined(PETSC_USE_CTABLE) 7589 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 7590 col--; 7591 #else 7592 col = aij->colmap[in[j]] - 1; 7593 #endif 7594 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 7595 PetscCall(MatDisAssemble_MPIAIJ(mat)); 7596 col = in[j]; 7597 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 7598 B = aij->B; 7599 b = (Mat_SeqAIJ*)B->data; 7600 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 7601 rp2 = bj + bi[row]; 7602 ap2 = ba + bi[row]; 7603 rmax2 = bimax[row]; 7604 nrow2 = bilen[row]; 7605 low2 = 0; 7606 high2 = nrow2; 7607 bm = aij->B->rmap->n; 7608 ba = b->a; 7609 inserted = PETSC_FALSE; 7610 } 7611 } else col = in[j]; 7612 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 7613 } 7614 } 7615 } else if (!aij->donotstash) { 7616 if (roworiented) { 7617 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7618 } else { 7619 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7620 } 7621 } 7622 } 7623 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 7624 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 7625 } 7626 PetscFunctionReturnVoid(); 7627 } 7628 /* Undefining these here since they were redefined from their original definition above! No 7629 * other PETSc functions should be defined past this point, as it is impossible to recover the 7630 * original definitions */ 7631 #undef PetscCall 7632 #undef SETERRQ 7633