1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B)); 15 PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B)); 16 PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 17 PetscFunctionReturn(0); 18 } 19 20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 21 { 22 Mat B; 23 24 PetscFunctionBegin; 25 PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B)); 26 PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 27 PetscCall(MatDestroy(&B)); 28 PetscFunctionReturn(0); 29 } 30 31 /*MC 32 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 33 34 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 35 and MATMPIAIJ otherwise. As a result, for single process communicators, 36 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 37 for communicators controlling multiple processes. It is recommended that you call both of 38 the above preallocation routines for simplicity. 39 40 Options Database Keys: 41 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 42 43 Developer Notes: 44 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 45 enough exist. 46 47 Level: beginner 48 49 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 50 M*/ 51 52 /*MC 53 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 54 55 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 56 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 57 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 58 for communicators controlling multiple processes. It is recommended that you call both of 59 the above preallocation routines for simplicity. 60 61 Options Database Keys: 62 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 63 64 Level: beginner 65 66 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 67 M*/ 68 69 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 70 { 71 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 72 73 PetscFunctionBegin; 74 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 75 A->boundtocpu = flg; 76 #endif 77 if (a->A) PetscCall(MatBindToCPU(a->A,flg)); 78 if (a->B) PetscCall(MatBindToCPU(a->B,flg)); 79 80 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 81 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 82 * to differ from the parent matrix. */ 83 if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg)); 84 if (a->diag) PetscCall(VecBindToCPU(a->diag,flg)); 85 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 90 { 91 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 92 93 PetscFunctionBegin; 94 if (mat->A) { 95 PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 96 PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 97 } 98 PetscFunctionReturn(0); 99 } 100 101 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 102 { 103 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 104 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 105 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 106 const PetscInt *ia,*ib; 107 const MatScalar *aa,*bb,*aav,*bav; 108 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 109 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 110 111 PetscFunctionBegin; 112 *keptrows = NULL; 113 114 ia = a->i; 115 ib = b->i; 116 PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 117 PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 118 for (i=0; i<m; i++) { 119 na = ia[i+1] - ia[i]; 120 nb = ib[i+1] - ib[i]; 121 if (!na && !nb) { 122 cnt++; 123 goto ok1; 124 } 125 aa = aav + ia[i]; 126 for (j=0; j<na; j++) { 127 if (aa[j] != 0.0) goto ok1; 128 } 129 bb = bav + ib[i]; 130 for (j=0; j <nb; j++) { 131 if (bb[j] != 0.0) goto ok1; 132 } 133 cnt++; 134 ok1:; 135 } 136 PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 137 if (!n0rows) { 138 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 139 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 140 PetscFunctionReturn(0); 141 } 142 PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 143 cnt = 0; 144 for (i=0; i<m; i++) { 145 na = ia[i+1] - ia[i]; 146 nb = ib[i+1] - ib[i]; 147 if (!na && !nb) continue; 148 aa = aav + ia[i]; 149 for (j=0; j<na;j++) { 150 if (aa[j] != 0.0) { 151 rows[cnt++] = rstart + i; 152 goto ok2; 153 } 154 } 155 bb = bav + ib[i]; 156 for (j=0; j<nb; j++) { 157 if (bb[j] != 0.0) { 158 rows[cnt++] = rstart + i; 159 goto ok2; 160 } 161 } 162 ok2:; 163 } 164 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 165 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 166 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 167 PetscFunctionReturn(0); 168 } 169 170 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 171 { 172 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 173 PetscBool cong; 174 175 PetscFunctionBegin; 176 PetscCall(MatHasCongruentLayouts(Y,&cong)); 177 if (Y->assembled && cong) { 178 PetscCall(MatDiagonalSet(aij->A,D,is)); 179 } else { 180 PetscCall(MatDiagonalSet_Default(Y,D,is)); 181 } 182 PetscFunctionReturn(0); 183 } 184 185 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 188 PetscInt i,rstart,nrows,*rows; 189 190 PetscFunctionBegin; 191 *zrows = NULL; 192 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 193 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 194 for (i=0; i<nrows; i++) rows[i] += rstart; 195 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 200 { 201 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 202 PetscInt i,m,n,*garray = aij->garray; 203 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 204 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 205 PetscReal *work; 206 const PetscScalar *dummy; 207 208 PetscFunctionBegin; 209 PetscCall(MatGetSize(A,&m,&n)); 210 PetscCall(PetscCalloc1(n,&work)); 211 PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 212 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 213 PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 214 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 215 if (type == NORM_2) { 216 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 217 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 218 } 219 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 220 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 221 } 222 } else if (type == NORM_1) { 223 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 224 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 225 } 226 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 227 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 228 } 229 } else if (type == NORM_INFINITY) { 230 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 231 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 232 } 233 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 234 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 235 } 236 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 237 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 238 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 239 } 240 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 241 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 242 } 243 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 244 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 245 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 246 } 247 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 248 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 249 } 250 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 251 if (type == NORM_INFINITY) { 252 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 253 } else { 254 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 255 } 256 PetscCall(PetscFree(work)); 257 if (type == NORM_2) { 258 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 259 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 260 for (i=0; i<n; i++) reductions[i] /= m; 261 } 262 PetscFunctionReturn(0); 263 } 264 265 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 266 { 267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 268 IS sis,gis; 269 const PetscInt *isis,*igis; 270 PetscInt n,*iis,nsis,ngis,rstart,i; 271 272 PetscFunctionBegin; 273 PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 274 PetscCall(MatFindNonzeroRows(a->B,&gis)); 275 PetscCall(ISGetSize(gis,&ngis)); 276 PetscCall(ISGetSize(sis,&nsis)); 277 PetscCall(ISGetIndices(sis,&isis)); 278 PetscCall(ISGetIndices(gis,&igis)); 279 280 PetscCall(PetscMalloc1(ngis+nsis,&iis)); 281 PetscCall(PetscArraycpy(iis,igis,ngis)); 282 PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 283 n = ngis + nsis; 284 PetscCall(PetscSortRemoveDupsInt(&n,iis)); 285 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 286 for (i=0; i<n; i++) iis[i] += rstart; 287 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 288 289 PetscCall(ISRestoreIndices(sis,&isis)); 290 PetscCall(ISRestoreIndices(gis,&igis)); 291 PetscCall(ISDestroy(&sis)); 292 PetscCall(ISDestroy(&gis)); 293 PetscFunctionReturn(0); 294 } 295 296 /* 297 Local utility routine that creates a mapping from the global column 298 number to the local number in the off-diagonal part of the local 299 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 300 a slightly higher hash table cost; without it it is not scalable (each processor 301 has an order N integer array but is fast to access. 302 */ 303 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscInt n = aij->B->cmap->n,i; 307 308 PetscFunctionBegin; 309 PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 310 #if defined(PETSC_USE_CTABLE) 311 PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 312 for (i=0; i<n; i++) { 313 PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 314 } 315 #else 316 PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 317 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 318 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 319 #endif 320 PetscFunctionReturn(0); 321 } 322 323 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 324 { \ 325 if (col <= lastcol1) low1 = 0; \ 326 else high1 = nrow1; \ 327 lastcol1 = col;\ 328 while (high1-low1 > 5) { \ 329 t = (low1+high1)/2; \ 330 if (rp1[t] > col) high1 = t; \ 331 else low1 = t; \ 332 } \ 333 for (_i=low1; _i<high1; _i++) { \ 334 if (rp1[_i] > col) break; \ 335 if (rp1[_i] == col) { \ 336 if (addv == ADD_VALUES) { \ 337 ap1[_i] += value; \ 338 /* Not sure LogFlops will slow dow the code or not */ \ 339 (void)PetscLogFlops(1.0); \ 340 } \ 341 else ap1[_i] = value; \ 342 goto a_noinsert; \ 343 } \ 344 } \ 345 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 346 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 347 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 348 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 349 N = nrow1++ - 1; a->nz++; high1++; \ 350 /* shift up all the later entries in this row */ \ 351 PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 352 PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 353 rp1[_i] = col; \ 354 ap1[_i] = value; \ 355 A->nonzerostate++;\ 356 a_noinsert: ; \ 357 ailen[row] = nrow1; \ 358 } 359 360 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 361 { \ 362 if (col <= lastcol2) low2 = 0; \ 363 else high2 = nrow2; \ 364 lastcol2 = col; \ 365 while (high2-low2 > 5) { \ 366 t = (low2+high2)/2; \ 367 if (rp2[t] > col) high2 = t; \ 368 else low2 = t; \ 369 } \ 370 for (_i=low2; _i<high2; _i++) { \ 371 if (rp2[_i] > col) break; \ 372 if (rp2[_i] == col) { \ 373 if (addv == ADD_VALUES) { \ 374 ap2[_i] += value; \ 375 (void)PetscLogFlops(1.0); \ 376 } \ 377 else ap2[_i] = value; \ 378 goto b_noinsert; \ 379 } \ 380 } \ 381 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 382 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 383 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 384 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 385 N = nrow2++ - 1; b->nz++; high2++; \ 386 /* shift up all the later entries in this row */ \ 387 PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 388 PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 389 rp2[_i] = col; \ 390 ap2[_i] = value; \ 391 B->nonzerostate++; \ 392 b_noinsert: ; \ 393 bilen[row] = nrow2; \ 394 } 395 396 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 397 { 398 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 399 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 400 PetscInt l,*garray = mat->garray,diag; 401 PetscScalar *aa,*ba; 402 403 PetscFunctionBegin; 404 /* code only works for square matrices A */ 405 406 /* find size of row to the left of the diagonal part */ 407 PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 408 row = row - diag; 409 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 410 if (garray[b->j[b->i[row]+l]] > diag) break; 411 } 412 if (l) { 413 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 414 PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 415 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 416 } 417 418 /* diagonal part */ 419 if (a->i[row+1]-a->i[row]) { 420 PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 421 PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 422 PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 423 } 424 425 /* right of diagonal part */ 426 if (b->i[row+1]-b->i[row]-l) { 427 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 428 PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 429 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 430 } 431 PetscFunctionReturn(0); 432 } 433 434 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 435 { 436 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 437 PetscScalar value = 0.0; 438 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 439 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 440 PetscBool roworiented = aij->roworiented; 441 442 /* Some Variables required in the macro */ 443 Mat A = aij->A; 444 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 445 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 446 PetscBool ignorezeroentries = a->ignorezeroentries; 447 Mat B = aij->B; 448 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 449 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 450 MatScalar *aa,*ba; 451 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 452 PetscInt nonew; 453 MatScalar *ap1,*ap2; 454 455 PetscFunctionBegin; 456 PetscCall(MatSeqAIJGetArray(A,&aa)); 457 PetscCall(MatSeqAIJGetArray(B,&ba)); 458 for (i=0; i<m; i++) { 459 if (im[i] < 0) continue; 460 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 461 if (im[i] >= rstart && im[i] < rend) { 462 row = im[i] - rstart; 463 lastcol1 = -1; 464 rp1 = aj + ai[row]; 465 ap1 = aa + ai[row]; 466 rmax1 = aimax[row]; 467 nrow1 = ailen[row]; 468 low1 = 0; 469 high1 = nrow1; 470 lastcol2 = -1; 471 rp2 = bj + bi[row]; 472 ap2 = ba + bi[row]; 473 rmax2 = bimax[row]; 474 nrow2 = bilen[row]; 475 low2 = 0; 476 high2 = nrow2; 477 478 for (j=0; j<n; j++) { 479 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 480 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 481 if (in[j] >= cstart && in[j] < cend) { 482 col = in[j] - cstart; 483 nonew = a->nonew; 484 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 485 } else if (in[j] < 0) { 486 continue; 487 } else { 488 PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 489 if (mat->was_assembled) { 490 if (!aij->colmap) { 491 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 492 } 493 #if defined(PETSC_USE_CTABLE) 494 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 495 col--; 496 #else 497 col = aij->colmap[in[j]] - 1; 498 #endif 499 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 500 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 501 col = in[j]; 502 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 503 B = aij->B; 504 b = (Mat_SeqAIJ*)B->data; 505 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 506 rp2 = bj + bi[row]; 507 ap2 = ba + bi[row]; 508 rmax2 = bimax[row]; 509 nrow2 = bilen[row]; 510 low2 = 0; 511 high2 = nrow2; 512 bm = aij->B->rmap->n; 513 ba = b->a; 514 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 515 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 516 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 517 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 518 } 519 } else col = in[j]; 520 nonew = b->nonew; 521 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 522 } 523 } 524 } else { 525 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 526 if (!aij->donotstash) { 527 mat->assembled = PETSC_FALSE; 528 if (roworiented) { 529 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 530 } else { 531 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 532 } 533 } 534 } 535 } 536 PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 537 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 538 PetscFunctionReturn(0); 539 } 540 541 /* 542 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 543 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 544 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 545 */ 546 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 547 { 548 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 549 Mat A = aij->A; /* diagonal part of the matrix */ 550 Mat B = aij->B; /* offdiagonal part of the matrix */ 551 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 552 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 553 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 554 PetscInt *ailen = a->ilen,*aj = a->j; 555 PetscInt *bilen = b->ilen,*bj = b->j; 556 PetscInt am = aij->A->rmap->n,j; 557 PetscInt diag_so_far = 0,dnz; 558 PetscInt offd_so_far = 0,onz; 559 560 PetscFunctionBegin; 561 /* Iterate over all rows of the matrix */ 562 for (j=0; j<am; j++) { 563 dnz = onz = 0; 564 /* Iterate over all non-zero columns of the current row */ 565 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 566 /* If column is in the diagonal */ 567 if (mat_j[col] >= cstart && mat_j[col] < cend) { 568 aj[diag_so_far++] = mat_j[col] - cstart; 569 dnz++; 570 } else { /* off-diagonal entries */ 571 bj[offd_so_far++] = mat_j[col]; 572 onz++; 573 } 574 } 575 ailen[j] = dnz; 576 bilen[j] = onz; 577 } 578 PetscFunctionReturn(0); 579 } 580 581 /* 582 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 583 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 584 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 585 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 586 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 587 */ 588 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 589 { 590 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 591 Mat A = aij->A; /* diagonal part of the matrix */ 592 Mat B = aij->B; /* offdiagonal part of the matrix */ 593 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 594 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 595 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 596 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 597 PetscInt *ailen = a->ilen,*aj = a->j; 598 PetscInt *bilen = b->ilen,*bj = b->j; 599 PetscInt am = aij->A->rmap->n,j; 600 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 601 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 602 PetscScalar *aa = a->a,*ba = b->a; 603 604 PetscFunctionBegin; 605 /* Iterate over all rows of the matrix */ 606 for (j=0; j<am; j++) { 607 dnz_row = onz_row = 0; 608 rowstart_offd = full_offd_i[j]; 609 rowstart_diag = full_diag_i[j]; 610 /* Iterate over all non-zero columns of the current row */ 611 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 612 /* If column is in the diagonal */ 613 if (mat_j[col] >= cstart && mat_j[col] < cend) { 614 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 615 aa[rowstart_diag+dnz_row] = mat_a[col]; 616 dnz_row++; 617 } else { /* off-diagonal entries */ 618 bj[rowstart_offd+onz_row] = mat_j[col]; 619 ba[rowstart_offd+onz_row] = mat_a[col]; 620 onz_row++; 621 } 622 } 623 ailen[j] = dnz_row; 624 bilen[j] = onz_row; 625 } 626 PetscFunctionReturn(0); 627 } 628 629 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* negative row */ 638 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= rstart && idxm[i] < rend) { 640 row = idxm[i] - rstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* negative column */ 643 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= cstart && idxn[j] < cend) { 645 col = idxn[j] - cstart; 646 PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 647 } else { 648 if (!aij->colmap) { 649 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 653 col--; 654 #else 655 col = aij->colmap[idxn[j]] - 1; 656 #endif 657 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 658 else { 659 PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 660 } 661 } 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 671 PetscInt nstash,reallocs; 672 673 PetscFunctionBegin; 674 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 675 676 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 677 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 678 PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 679 PetscFunctionReturn(0); 680 } 681 682 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 PetscMPIInt n; 686 PetscInt i,j,rstart,ncols,flg; 687 PetscInt *row,*col; 688 PetscBool other_disassembled; 689 PetscScalar *val; 690 691 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 692 693 PetscFunctionBegin; 694 if (!aij->donotstash && !mat->nooffprocentries) { 695 while (1) { 696 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 697 if (!flg) break; 698 699 for (i=0; i<n;) { 700 /* Now identify the consecutive vals belonging to the same row */ 701 for (j=i,rstart=row[j]; j<n; j++) { 702 if (row[j] != rstart) break; 703 } 704 if (j < n) ncols = j-i; 705 else ncols = n-i; 706 /* Now assemble all these values with a single function call */ 707 PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 708 i = j; 709 } 710 } 711 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 712 } 713 #if defined(PETSC_HAVE_DEVICE) 714 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 715 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 716 if (mat->boundtocpu) { 717 PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 718 PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 719 } 720 #endif 721 PetscCall(MatAssemblyBegin(aij->A,mode)); 722 PetscCall(MatAssemblyEnd(aij->A,mode)); 723 724 /* determine if any processor has disassembled, if so we must 725 also disassemble ourself, in order that we may reassemble. */ 726 /* 727 if nonzero structure of submatrix B cannot change then we know that 728 no processor disassembled thus we can skip this stuff 729 */ 730 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 731 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 732 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 733 PetscCall(MatDisAssemble_MPIAIJ(mat)); 734 } 735 } 736 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 737 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 738 } 739 PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 740 #if defined(PETSC_HAVE_DEVICE) 741 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 742 #endif 743 PetscCall(MatAssemblyBegin(aij->B,mode)); 744 PetscCall(MatAssemblyEnd(aij->B,mode)); 745 746 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 747 748 aij->rowvalues = NULL; 749 750 PetscCall(VecDestroy(&aij->diag)); 751 752 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 753 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 754 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 755 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 756 } 757 #if defined(PETSC_HAVE_DEVICE) 758 mat->offloadmask = PETSC_OFFLOAD_BOTH; 759 #endif 760 PetscFunctionReturn(0); 761 } 762 763 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 764 { 765 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 766 767 PetscFunctionBegin; 768 PetscCall(MatZeroEntries(l->A)); 769 PetscCall(MatZeroEntries(l->B)); 770 PetscFunctionReturn(0); 771 } 772 773 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 774 { 775 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 776 PetscObjectState sA, sB; 777 PetscInt *lrows; 778 PetscInt r, len; 779 PetscBool cong, lch, gch; 780 781 PetscFunctionBegin; 782 /* get locally owned rows */ 783 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 784 PetscCall(MatHasCongruentLayouts(A,&cong)); 785 /* fix right hand side if needed */ 786 if (x && b) { 787 const PetscScalar *xx; 788 PetscScalar *bb; 789 790 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 791 PetscCall(VecGetArrayRead(x, &xx)); 792 PetscCall(VecGetArray(b, &bb)); 793 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 794 PetscCall(VecRestoreArrayRead(x, &xx)); 795 PetscCall(VecRestoreArray(b, &bb)); 796 } 797 798 sA = mat->A->nonzerostate; 799 sB = mat->B->nonzerostate; 800 801 if (diag != 0.0 && cong) { 802 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 803 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 804 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 805 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 806 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 807 PetscInt nnwA, nnwB; 808 PetscBool nnzA, nnzB; 809 810 nnwA = aijA->nonew; 811 nnwB = aijB->nonew; 812 nnzA = aijA->keepnonzeropattern; 813 nnzB = aijB->keepnonzeropattern; 814 if (!nnzA) { 815 PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 816 aijA->nonew = 0; 817 } 818 if (!nnzB) { 819 PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 820 aijB->nonew = 0; 821 } 822 /* Must zero here before the next loop */ 823 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 824 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 825 for (r = 0; r < len; ++r) { 826 const PetscInt row = lrows[r] + A->rmap->rstart; 827 if (row >= A->cmap->N) continue; 828 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 829 } 830 aijA->nonew = nnwA; 831 aijB->nonew = nnwB; 832 } else { 833 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 834 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 835 } 836 PetscCall(PetscFree(lrows)); 837 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 838 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 839 840 /* reduce nonzerostate */ 841 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 842 PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 843 if (gch) A->nonzerostate++; 844 PetscFunctionReturn(0); 845 } 846 847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 848 { 849 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 850 PetscMPIInt n = A->rmap->n; 851 PetscInt i,j,r,m,len = 0; 852 PetscInt *lrows,*owners = A->rmap->range; 853 PetscMPIInt p = 0; 854 PetscSFNode *rrows; 855 PetscSF sf; 856 const PetscScalar *xx; 857 PetscScalar *bb,*mask,*aij_a; 858 Vec xmask,lmask; 859 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 860 const PetscInt *aj, *ii,*ridx; 861 PetscScalar *aa; 862 863 PetscFunctionBegin; 864 /* Create SF where leaves are input rows and roots are owned rows */ 865 PetscCall(PetscMalloc1(n, &lrows)); 866 for (r = 0; r < n; ++r) lrows[r] = -1; 867 PetscCall(PetscMalloc1(N, &rrows)); 868 for (r = 0; r < N; ++r) { 869 const PetscInt idx = rows[r]; 870 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 871 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 872 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 873 } 874 rrows[r].rank = p; 875 rrows[r].index = rows[r] - owners[p]; 876 } 877 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 878 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 879 /* Collect flags for rows to be zeroed */ 880 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 881 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 882 PetscCall(PetscSFDestroy(&sf)); 883 /* Compress and put in row numbers */ 884 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 885 /* zero diagonal part of matrix */ 886 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 887 /* handle off diagonal part of matrix */ 888 PetscCall(MatCreateVecs(A,&xmask,NULL)); 889 PetscCall(VecDuplicate(l->lvec,&lmask)); 890 PetscCall(VecGetArray(xmask,&bb)); 891 for (i=0; i<len; i++) bb[lrows[i]] = 1; 892 PetscCall(VecRestoreArray(xmask,&bb)); 893 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 894 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 895 PetscCall(VecDestroy(&xmask)); 896 if (x && b) { /* this code is buggy when the row and column layout don't match */ 897 PetscBool cong; 898 899 PetscCall(MatHasCongruentLayouts(A,&cong)); 900 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 901 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 902 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 903 PetscCall(VecGetArrayRead(l->lvec,&xx)); 904 PetscCall(VecGetArray(b,&bb)); 905 } 906 PetscCall(VecGetArray(lmask,&mask)); 907 /* remove zeroed rows of off diagonal matrix */ 908 PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 909 ii = aij->i; 910 for (i=0; i<len; i++) { 911 PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 912 } 913 /* loop over all elements of off process part of matrix zeroing removed columns*/ 914 if (aij->compressedrow.use) { 915 m = aij->compressedrow.nrows; 916 ii = aij->compressedrow.i; 917 ridx = aij->compressedrow.rindex; 918 for (i=0; i<m; i++) { 919 n = ii[i+1] - ii[i]; 920 aj = aij->j + ii[i]; 921 aa = aij_a + ii[i]; 922 923 for (j=0; j<n; j++) { 924 if (PetscAbsScalar(mask[*aj])) { 925 if (b) bb[*ridx] -= *aa*xx[*aj]; 926 *aa = 0.0; 927 } 928 aa++; 929 aj++; 930 } 931 ridx++; 932 } 933 } else { /* do not use compressed row format */ 934 m = l->B->rmap->n; 935 for (i=0; i<m; i++) { 936 n = ii[i+1] - ii[i]; 937 aj = aij->j + ii[i]; 938 aa = aij_a + ii[i]; 939 for (j=0; j<n; j++) { 940 if (PetscAbsScalar(mask[*aj])) { 941 if (b) bb[i] -= *aa*xx[*aj]; 942 *aa = 0.0; 943 } 944 aa++; 945 aj++; 946 } 947 } 948 } 949 if (x && b) { 950 PetscCall(VecRestoreArray(b,&bb)); 951 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 952 } 953 PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 954 PetscCall(VecRestoreArray(lmask,&mask)); 955 PetscCall(VecDestroy(&lmask)); 956 PetscCall(PetscFree(lrows)); 957 958 /* only change matrix nonzero state if pattern was allowed to be changed */ 959 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 960 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 961 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 962 } 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 967 { 968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 969 PetscInt nt; 970 VecScatter Mvctx = a->Mvctx; 971 972 PetscFunctionBegin; 973 PetscCall(VecGetLocalSize(xx,&nt)); 974 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 975 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 976 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 977 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 978 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 979 PetscFunctionReturn(0); 980 } 981 982 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 985 986 PetscFunctionBegin; 987 PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 988 PetscFunctionReturn(0); 989 } 990 991 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 992 { 993 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 994 VecScatter Mvctx = a->Mvctx; 995 996 PetscFunctionBegin; 997 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 998 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 999 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1000 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1005 { 1006 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1007 1008 PetscFunctionBegin; 1009 /* do nondiagonal part */ 1010 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1011 /* do local part */ 1012 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1013 /* add partial results together */ 1014 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1015 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1020 { 1021 MPI_Comm comm; 1022 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1023 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1024 IS Me,Notme; 1025 PetscInt M,N,first,last,*notme,i; 1026 PetscBool lf; 1027 PetscMPIInt size; 1028 1029 PetscFunctionBegin; 1030 /* Easy test: symmetric diagonal block */ 1031 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1032 PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 1033 PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1034 if (!*f) PetscFunctionReturn(0); 1035 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 1036 PetscCallMPI(MPI_Comm_size(comm,&size)); 1037 if (size == 1) PetscFunctionReturn(0); 1038 1039 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1040 PetscCall(MatGetSize(Amat,&M,&N)); 1041 PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 1042 PetscCall(PetscMalloc1(N-last+first,¬me)); 1043 for (i=0; i<first; i++) notme[i] = i; 1044 for (i=last; i<M; i++) notme[i-last+first] = i; 1045 PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 1046 PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 1047 PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 1048 Aoff = Aoffs[0]; 1049 PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 1050 Boff = Boffs[0]; 1051 PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 1052 PetscCall(MatDestroyMatrices(1,&Aoffs)); 1053 PetscCall(MatDestroyMatrices(1,&Boffs)); 1054 PetscCall(ISDestroy(&Me)); 1055 PetscCall(ISDestroy(&Notme)); 1056 PetscCall(PetscFree(notme)); 1057 PetscFunctionReturn(0); 1058 } 1059 1060 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1061 { 1062 PetscFunctionBegin; 1063 PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 1071 PetscFunctionBegin; 1072 /* do nondiagonal part */ 1073 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1074 /* do local part */ 1075 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1076 /* add partial results together */ 1077 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1078 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 /* 1083 This only works correctly for square matrices where the subblock A->A is the 1084 diagonal block 1085 */ 1086 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1087 { 1088 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1089 1090 PetscFunctionBegin; 1091 PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1092 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1093 PetscCall(MatGetDiagonal(a->A,v)); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1098 { 1099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1100 1101 PetscFunctionBegin; 1102 PetscCall(MatScale(a->A,aa)); 1103 PetscCall(MatScale(a->B,aa)); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1108 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1109 { 1110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1111 1112 PetscFunctionBegin; 1113 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1114 PetscCall(PetscFree(aij->Aperm1)); 1115 PetscCall(PetscFree(aij->Bperm1)); 1116 PetscCall(PetscFree(aij->Ajmap1)); 1117 PetscCall(PetscFree(aij->Bjmap1)); 1118 1119 PetscCall(PetscFree(aij->Aimap2)); 1120 PetscCall(PetscFree(aij->Bimap2)); 1121 PetscCall(PetscFree(aij->Aperm2)); 1122 PetscCall(PetscFree(aij->Bperm2)); 1123 PetscCall(PetscFree(aij->Ajmap2)); 1124 PetscCall(PetscFree(aij->Bjmap2)); 1125 1126 PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 1127 PetscCall(PetscFree(aij->Cperm1)); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1132 { 1133 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1134 1135 PetscFunctionBegin; 1136 #if defined(PETSC_USE_LOG) 1137 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1138 #endif 1139 PetscCall(MatStashDestroy_Private(&mat->stash)); 1140 PetscCall(VecDestroy(&aij->diag)); 1141 PetscCall(MatDestroy(&aij->A)); 1142 PetscCall(MatDestroy(&aij->B)); 1143 #if defined(PETSC_USE_CTABLE) 1144 PetscCall(PetscTableDestroy(&aij->colmap)); 1145 #else 1146 PetscCall(PetscFree(aij->colmap)); 1147 #endif 1148 PetscCall(PetscFree(aij->garray)); 1149 PetscCall(VecDestroy(&aij->lvec)); 1150 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1151 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 1152 PetscCall(PetscFree(aij->ld)); 1153 1154 /* Free COO */ 1155 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1156 1157 PetscCall(PetscFree(mat->data)); 1158 1159 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1160 PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 1161 1162 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1164 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 1166 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 1167 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 1169 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 1171 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 1172 #if defined(PETSC_HAVE_CUDA) 1173 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 1174 #endif 1175 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1176 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 1177 #endif 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 1179 #if defined(PETSC_HAVE_ELEMENTAL) 1180 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 1181 #endif 1182 #if defined(PETSC_HAVE_SCALAPACK) 1183 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1184 #endif 1185 #if defined(PETSC_HAVE_HYPRE) 1186 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 1187 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 1188 #endif 1189 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1190 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 1192 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 1195 #if defined(PETSC_HAVE_MKL_SPARSE) 1196 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 1197 #endif 1198 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 1199 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1200 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 1201 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 1202 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1207 { 1208 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1209 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1210 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1211 const PetscInt *garray = aij->garray; 1212 const PetscScalar *aa,*ba; 1213 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1214 PetscInt *rowlens; 1215 PetscInt *colidxs; 1216 PetscScalar *matvals; 1217 1218 PetscFunctionBegin; 1219 PetscCall(PetscViewerSetUp(viewer)); 1220 1221 M = mat->rmap->N; 1222 N = mat->cmap->N; 1223 m = mat->rmap->n; 1224 rs = mat->rmap->rstart; 1225 cs = mat->cmap->rstart; 1226 nz = A->nz + B->nz; 1227 1228 /* write matrix header */ 1229 header[0] = MAT_FILE_CLASSID; 1230 header[1] = M; header[2] = N; header[3] = nz; 1231 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1232 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1233 1234 /* fill in and store row lengths */ 1235 PetscCall(PetscMalloc1(m,&rowlens)); 1236 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1237 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1238 PetscCall(PetscFree(rowlens)); 1239 1240 /* fill in and store column indices */ 1241 PetscCall(PetscMalloc1(nz,&colidxs)); 1242 for (cnt=0, i=0; i<m; i++) { 1243 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1244 if (garray[B->j[jb]] > cs) break; 1245 colidxs[cnt++] = garray[B->j[jb]]; 1246 } 1247 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1248 colidxs[cnt++] = A->j[ja] + cs; 1249 for (; jb<B->i[i+1]; jb++) 1250 colidxs[cnt++] = garray[B->j[jb]]; 1251 } 1252 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 1254 PetscCall(PetscFree(colidxs)); 1255 1256 /* fill in and store nonzero values */ 1257 PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 1258 PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 1259 PetscCall(PetscMalloc1(nz,&matvals)); 1260 for (cnt=0, i=0; i<m; i++) { 1261 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1262 if (garray[B->j[jb]] > cs) break; 1263 matvals[cnt++] = ba[jb]; 1264 } 1265 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1266 matvals[cnt++] = aa[ja]; 1267 for (; jb<B->i[i+1]; jb++) 1268 matvals[cnt++] = ba[jb]; 1269 } 1270 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 1271 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 1272 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1273 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 1274 PetscCall(PetscFree(matvals)); 1275 1276 /* write block size option to the viewer's .info file */ 1277 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #include <petscdraw.h> 1282 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1283 { 1284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1285 PetscMPIInt rank = aij->rank,size = aij->size; 1286 PetscBool isdraw,iascii,isbinary; 1287 PetscViewer sviewer; 1288 PetscViewerFormat format; 1289 1290 PetscFunctionBegin; 1291 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1292 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1293 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1294 if (iascii) { 1295 PetscCall(PetscViewerGetFormat(viewer,&format)); 1296 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1297 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1298 PetscCall(PetscMalloc1(size,&nz)); 1299 PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1300 for (i=0; i<(PetscInt)size; i++) { 1301 nmax = PetscMax(nmax,nz[i]); 1302 nmin = PetscMin(nmin,nz[i]); 1303 navg += nz[i]; 1304 } 1305 PetscCall(PetscFree(nz)); 1306 navg = navg/size; 1307 PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1308 PetscFunctionReturn(0); 1309 } 1310 PetscCall(PetscViewerGetFormat(viewer,&format)); 1311 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1312 MatInfo info; 1313 PetscInt *inodes=NULL; 1314 1315 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 1316 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 1317 PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 1318 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1319 if (!inodes) { 1320 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1321 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1322 } else { 1323 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1324 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1325 } 1326 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 1327 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1328 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 1329 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1330 PetscCall(PetscViewerFlush(viewer)); 1331 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1332 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 1333 PetscCall(VecScatterView(aij->Mvctx,viewer)); 1334 PetscFunctionReturn(0); 1335 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1336 PetscInt inodecount,inodelimit,*inodes; 1337 PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1338 if (inodes) { 1339 PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1340 } else { 1341 PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1342 } 1343 PetscFunctionReturn(0); 1344 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1345 PetscFunctionReturn(0); 1346 } 1347 } else if (isbinary) { 1348 if (size == 1) { 1349 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1350 PetscCall(MatView(aij->A,viewer)); 1351 } else { 1352 PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 1353 } 1354 PetscFunctionReturn(0); 1355 } else if (iascii && size == 1) { 1356 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1357 PetscCall(MatView(aij->A,viewer)); 1358 PetscFunctionReturn(0); 1359 } else if (isdraw) { 1360 PetscDraw draw; 1361 PetscBool isnull; 1362 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1363 PetscCall(PetscDrawIsNull(draw,&isnull)); 1364 if (isnull) PetscFunctionReturn(0); 1365 } 1366 1367 { /* assemble the entire matrix onto first processor */ 1368 Mat A = NULL, Av; 1369 IS isrow,iscol; 1370 1371 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1372 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1373 PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 1374 PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 1375 /* The commented code uses MatCreateSubMatrices instead */ 1376 /* 1377 Mat *AA, A = NULL, Av; 1378 IS isrow,iscol; 1379 1380 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1381 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1382 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1383 if (rank == 0) { 1384 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1385 A = AA[0]; 1386 Av = AA[0]; 1387 } 1388 PetscCall(MatDestroySubMatrices(1,&AA)); 1389 */ 1390 PetscCall(ISDestroy(&iscol)); 1391 PetscCall(ISDestroy(&isrow)); 1392 /* 1393 Everyone has to call to draw the matrix since the graphics waits are 1394 synchronized across all processors that share the PetscDraw object 1395 */ 1396 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1397 if (rank == 0) { 1398 if (((PetscObject)mat)->name) { 1399 PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 1400 } 1401 PetscCall(MatView_SeqAIJ(Av,sviewer)); 1402 } 1403 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1404 PetscCall(PetscViewerFlush(viewer)); 1405 PetscCall(MatDestroy(&A)); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1411 { 1412 PetscBool iascii,isdraw,issocket,isbinary; 1413 1414 PetscFunctionBegin; 1415 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1416 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1417 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1418 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1419 if (iascii || isdraw || isbinary || issocket) { 1420 PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1421 } 1422 PetscFunctionReturn(0); 1423 } 1424 1425 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1426 { 1427 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1428 Vec bb1 = NULL; 1429 PetscBool hasop; 1430 1431 PetscFunctionBegin; 1432 if (flag == SOR_APPLY_UPPER) { 1433 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1438 PetscCall(VecDuplicate(bb,&bb1)); 1439 } 1440 1441 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1442 if (flag & SOR_ZERO_INITIAL_GUESS) { 1443 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1444 its--; 1445 } 1446 1447 while (its--) { 1448 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1449 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1450 1451 /* update rhs: bb1 = bb - B*x */ 1452 PetscCall(VecScale(mat->lvec,-1.0)); 1453 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1454 1455 /* local sweep */ 1456 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 1457 } 1458 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1459 if (flag & SOR_ZERO_INITIAL_GUESS) { 1460 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1461 its--; 1462 } 1463 while (its--) { 1464 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1465 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1466 1467 /* update rhs: bb1 = bb - B*x */ 1468 PetscCall(VecScale(mat->lvec,-1.0)); 1469 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1470 1471 /* local sweep */ 1472 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 1473 } 1474 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1475 if (flag & SOR_ZERO_INITIAL_GUESS) { 1476 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1477 its--; 1478 } 1479 while (its--) { 1480 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1481 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1482 1483 /* update rhs: bb1 = bb - B*x */ 1484 PetscCall(VecScale(mat->lvec,-1.0)); 1485 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1486 1487 /* local sweep */ 1488 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 1489 } 1490 } else if (flag & SOR_EISENSTAT) { 1491 Vec xx1; 1492 1493 PetscCall(VecDuplicate(bb,&xx1)); 1494 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1495 1496 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1497 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1498 if (!mat->diag) { 1499 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 1500 PetscCall(MatGetDiagonal(matin,mat->diag)); 1501 } 1502 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1503 if (hasop) { 1504 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1505 } else { 1506 PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1507 } 1508 PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1509 1510 PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1511 1512 /* local sweep */ 1513 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 1514 PetscCall(VecAXPY(xx,1.0,xx1)); 1515 PetscCall(VecDestroy(&xx1)); 1516 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1517 1518 PetscCall(VecDestroy(&bb1)); 1519 1520 matin->factorerrortype = mat->A->factorerrortype; 1521 PetscFunctionReturn(0); 1522 } 1523 1524 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1525 { 1526 Mat aA,aB,Aperm; 1527 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1528 PetscScalar *aa,*ba; 1529 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1530 PetscSF rowsf,sf; 1531 IS parcolp = NULL; 1532 PetscBool done; 1533 1534 PetscFunctionBegin; 1535 PetscCall(MatGetLocalSize(A,&m,&n)); 1536 PetscCall(ISGetIndices(rowp,&rwant)); 1537 PetscCall(ISGetIndices(colp,&cwant)); 1538 PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 1539 1540 /* Invert row permutation to find out where my rows should go */ 1541 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 1542 PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 1543 PetscCall(PetscSFSetFromOptions(rowsf)); 1544 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1545 PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1546 PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1547 1548 /* Invert column permutation to find out where my columns should go */ 1549 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1550 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 1551 PetscCall(PetscSFSetFromOptions(sf)); 1552 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1553 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1554 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1555 PetscCall(PetscSFDestroy(&sf)); 1556 1557 PetscCall(ISRestoreIndices(rowp,&rwant)); 1558 PetscCall(ISRestoreIndices(colp,&cwant)); 1559 PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 1560 1561 /* Find out where my gcols should go */ 1562 PetscCall(MatGetSize(aB,NULL,&ng)); 1563 PetscCall(PetscMalloc1(ng,&gcdest)); 1564 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1565 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 1566 PetscCall(PetscSFSetFromOptions(sf)); 1567 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1568 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1569 PetscCall(PetscSFDestroy(&sf)); 1570 1571 PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 1572 PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1573 PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1574 for (i=0; i<m; i++) { 1575 PetscInt row = rdest[i]; 1576 PetscMPIInt rowner; 1577 PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 1578 for (j=ai[i]; j<ai[i+1]; j++) { 1579 PetscInt col = cdest[aj[j]]; 1580 PetscMPIInt cowner; 1581 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 1582 if (rowner == cowner) dnnz[i]++; 1583 else onnz[i]++; 1584 } 1585 for (j=bi[i]; j<bi[i+1]; j++) { 1586 PetscInt col = gcdest[bj[j]]; 1587 PetscMPIInt cowner; 1588 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 1589 if (rowner == cowner) dnnz[i]++; 1590 else onnz[i]++; 1591 } 1592 } 1593 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1594 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1595 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1596 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1597 PetscCall(PetscSFDestroy(&rowsf)); 1598 1599 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 1600 PetscCall(MatSeqAIJGetArray(aA,&aa)); 1601 PetscCall(MatSeqAIJGetArray(aB,&ba)); 1602 for (i=0; i<m; i++) { 1603 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1604 PetscInt j0,rowlen; 1605 rowlen = ai[i+1] - ai[i]; 1606 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1607 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1608 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1609 } 1610 rowlen = bi[i+1] - bi[i]; 1611 for (j0=j=0; j<rowlen; j0=j) { 1612 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1613 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1614 } 1615 } 1616 PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 1617 PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 1618 PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1619 PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1620 PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 1621 PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 1622 PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 1623 PetscCall(PetscFree3(work,rdest,cdest)); 1624 PetscCall(PetscFree(gcdest)); 1625 if (parcolp) PetscCall(ISDestroy(&colp)); 1626 *B = Aperm; 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1631 { 1632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1633 1634 PetscFunctionBegin; 1635 PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1636 if (ghosts) *ghosts = aij->garray; 1637 PetscFunctionReturn(0); 1638 } 1639 1640 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1641 { 1642 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1643 Mat A = mat->A,B = mat->B; 1644 PetscLogDouble isend[5],irecv[5]; 1645 1646 PetscFunctionBegin; 1647 info->block_size = 1.0; 1648 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1649 1650 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1651 isend[3] = info->memory; isend[4] = info->mallocs; 1652 1653 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1654 1655 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1656 isend[3] += info->memory; isend[4] += info->mallocs; 1657 if (flag == MAT_LOCAL) { 1658 info->nz_used = isend[0]; 1659 info->nz_allocated = isend[1]; 1660 info->nz_unneeded = isend[2]; 1661 info->memory = isend[3]; 1662 info->mallocs = isend[4]; 1663 } else if (flag == MAT_GLOBAL_MAX) { 1664 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1665 1666 info->nz_used = irecv[0]; 1667 info->nz_allocated = irecv[1]; 1668 info->nz_unneeded = irecv[2]; 1669 info->memory = irecv[3]; 1670 info->mallocs = irecv[4]; 1671 } else if (flag == MAT_GLOBAL_SUM) { 1672 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1673 1674 info->nz_used = irecv[0]; 1675 info->nz_allocated = irecv[1]; 1676 info->nz_unneeded = irecv[2]; 1677 info->memory = irecv[3]; 1678 info->mallocs = irecv[4]; 1679 } 1680 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1681 info->fill_ratio_needed = 0; 1682 info->factor_mallocs = 0; 1683 PetscFunctionReturn(0); 1684 } 1685 1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1687 { 1688 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1689 1690 PetscFunctionBegin; 1691 switch (op) { 1692 case MAT_NEW_NONZERO_LOCATIONS: 1693 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1694 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1695 case MAT_KEEP_NONZERO_PATTERN: 1696 case MAT_NEW_NONZERO_LOCATION_ERR: 1697 case MAT_USE_INODES: 1698 case MAT_IGNORE_ZERO_ENTRIES: 1699 case MAT_FORM_EXPLICIT_TRANSPOSE: 1700 MatCheckPreallocated(A,1); 1701 PetscCall(MatSetOption(a->A,op,flg)); 1702 PetscCall(MatSetOption(a->B,op,flg)); 1703 break; 1704 case MAT_ROW_ORIENTED: 1705 MatCheckPreallocated(A,1); 1706 a->roworiented = flg; 1707 1708 PetscCall(MatSetOption(a->A,op,flg)); 1709 PetscCall(MatSetOption(a->B,op,flg)); 1710 break; 1711 case MAT_FORCE_DIAGONAL_ENTRIES: 1712 case MAT_SORTED_FULL: 1713 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1714 break; 1715 case MAT_IGNORE_OFF_PROC_ENTRIES: 1716 a->donotstash = flg; 1717 break; 1718 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1719 case MAT_SPD: 1720 case MAT_SYMMETRIC: 1721 case MAT_STRUCTURALLY_SYMMETRIC: 1722 case MAT_HERMITIAN: 1723 case MAT_SYMMETRY_ETERNAL: 1724 break; 1725 case MAT_SUBMAT_SINGLEIS: 1726 A->submat_singleis = flg; 1727 break; 1728 case MAT_STRUCTURE_ONLY: 1729 /* The option is handled directly by MatSetOption() */ 1730 break; 1731 default: 1732 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1733 } 1734 PetscFunctionReturn(0); 1735 } 1736 1737 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1738 { 1739 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1740 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1741 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1742 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1743 PetscInt *cmap,*idx_p; 1744 1745 PetscFunctionBegin; 1746 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1747 mat->getrowactive = PETSC_TRUE; 1748 1749 if (!mat->rowvalues && (idx || v)) { 1750 /* 1751 allocate enough space to hold information from the longest row. 1752 */ 1753 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1754 PetscInt max = 1,tmp; 1755 for (i=0; i<matin->rmap->n; i++) { 1756 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1757 if (max < tmp) max = tmp; 1758 } 1759 PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 1760 } 1761 1762 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1763 lrow = row - rstart; 1764 1765 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1766 if (!v) {pvA = NULL; pvB = NULL;} 1767 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1768 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1769 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1770 nztot = nzA + nzB; 1771 1772 cmap = mat->garray; 1773 if (v || idx) { 1774 if (nztot) { 1775 /* Sort by increasing column numbers, assuming A and B already sorted */ 1776 PetscInt imark = -1; 1777 if (v) { 1778 *v = v_p = mat->rowvalues; 1779 for (i=0; i<nzB; i++) { 1780 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1781 else break; 1782 } 1783 imark = i; 1784 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1785 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1786 } 1787 if (idx) { 1788 *idx = idx_p = mat->rowindices; 1789 if (imark > -1) { 1790 for (i=0; i<imark; i++) { 1791 idx_p[i] = cmap[cworkB[i]]; 1792 } 1793 } else { 1794 for (i=0; i<nzB; i++) { 1795 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1796 else break; 1797 } 1798 imark = i; 1799 } 1800 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1801 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1802 } 1803 } else { 1804 if (idx) *idx = NULL; 1805 if (v) *v = NULL; 1806 } 1807 } 1808 *nz = nztot; 1809 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1810 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1811 PetscFunctionReturn(0); 1812 } 1813 1814 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1815 { 1816 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1817 1818 PetscFunctionBegin; 1819 PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1820 aij->getrowactive = PETSC_FALSE; 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1825 { 1826 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1827 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1828 PetscInt i,j,cstart = mat->cmap->rstart; 1829 PetscReal sum = 0.0; 1830 const MatScalar *v,*amata,*bmata; 1831 1832 PetscFunctionBegin; 1833 if (aij->size == 1) { 1834 PetscCall(MatNorm(aij->A,type,norm)); 1835 } else { 1836 PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 1837 PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 1838 if (type == NORM_FROBENIUS) { 1839 v = amata; 1840 for (i=0; i<amat->nz; i++) { 1841 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1842 } 1843 v = bmata; 1844 for (i=0; i<bmat->nz; i++) { 1845 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1846 } 1847 PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1848 *norm = PetscSqrtReal(*norm); 1849 PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 1850 } else if (type == NORM_1) { /* max column norm */ 1851 PetscReal *tmp,*tmp2; 1852 PetscInt *jj,*garray = aij->garray; 1853 PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 1854 PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 1855 *norm = 0.0; 1856 v = amata; jj = amat->j; 1857 for (j=0; j<amat->nz; j++) { 1858 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1859 } 1860 v = bmata; jj = bmat->j; 1861 for (j=0; j<bmat->nz; j++) { 1862 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1863 } 1864 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1865 for (j=0; j<mat->cmap->N; j++) { 1866 if (tmp2[j] > *norm) *norm = tmp2[j]; 1867 } 1868 PetscCall(PetscFree(tmp)); 1869 PetscCall(PetscFree(tmp2)); 1870 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1871 } else if (type == NORM_INFINITY) { /* max row norm */ 1872 PetscReal ntemp = 0.0; 1873 for (j=0; j<aij->A->rmap->n; j++) { 1874 v = amata + amat->i[j]; 1875 sum = 0.0; 1876 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1877 sum += PetscAbsScalar(*v); v++; 1878 } 1879 v = bmata + bmat->i[j]; 1880 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1881 sum += PetscAbsScalar(*v); v++; 1882 } 1883 if (sum > ntemp) ntemp = sum; 1884 } 1885 PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 1886 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1887 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1888 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 1889 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 1890 } 1891 PetscFunctionReturn(0); 1892 } 1893 1894 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1895 { 1896 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1897 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1898 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1899 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1900 Mat B,A_diag,*B_diag; 1901 const MatScalar *pbv,*bv; 1902 1903 PetscFunctionBegin; 1904 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1905 ai = Aloc->i; aj = Aloc->j; 1906 bi = Bloc->i; bj = Bloc->j; 1907 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1908 PetscInt *d_nnz,*g_nnz,*o_nnz; 1909 PetscSFNode *oloc; 1910 PETSC_UNUSED PetscSF sf; 1911 1912 PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 1913 /* compute d_nnz for preallocation */ 1914 PetscCall(PetscArrayzero(d_nnz,na)); 1915 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1916 /* compute local off-diagonal contributions */ 1917 PetscCall(PetscArrayzero(g_nnz,nb)); 1918 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1919 /* map those to global */ 1920 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1921 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 1922 PetscCall(PetscSFSetFromOptions(sf)); 1923 PetscCall(PetscArrayzero(o_nnz,na)); 1924 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1925 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1926 PetscCall(PetscSFDestroy(&sf)); 1927 1928 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1929 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1930 PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1931 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1932 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 1933 PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1934 } else { 1935 B = *matout; 1936 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1937 } 1938 1939 b = (Mat_MPIAIJ*)B->data; 1940 A_diag = a->A; 1941 B_diag = &b->A; 1942 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1943 A_diag_ncol = A_diag->cmap->N; 1944 B_diag_ilen = sub_B_diag->ilen; 1945 B_diag_i = sub_B_diag->i; 1946 1947 /* Set ilen for diagonal of B */ 1948 for (i=0; i<A_diag_ncol; i++) { 1949 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1950 } 1951 1952 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1953 very quickly (=without using MatSetValues), because all writes are local. */ 1954 PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1955 1956 /* copy over the B part */ 1957 PetscCall(PetscMalloc1(bi[mb],&cols)); 1958 PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1959 pbv = bv; 1960 row = A->rmap->rstart; 1961 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1962 cols_tmp = cols; 1963 for (i=0; i<mb; i++) { 1964 ncol = bi[i+1]-bi[i]; 1965 PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 1966 row++; 1967 pbv += ncol; cols_tmp += ncol; 1968 } 1969 PetscCall(PetscFree(cols)); 1970 PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1971 1972 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1973 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1974 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1975 *matout = B; 1976 } else { 1977 PetscCall(MatHeaderMerge(A,&B)); 1978 } 1979 PetscFunctionReturn(0); 1980 } 1981 1982 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1983 { 1984 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1985 Mat a = aij->A,b = aij->B; 1986 PetscInt s1,s2,s3; 1987 1988 PetscFunctionBegin; 1989 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1990 if (rr) { 1991 PetscCall(VecGetLocalSize(rr,&s1)); 1992 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1993 /* Overlap communication with computation. */ 1994 PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1995 } 1996 if (ll) { 1997 PetscCall(VecGetLocalSize(ll,&s1)); 1998 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1999 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 2000 } 2001 /* scale the diagonal block */ 2002 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 2003 2004 if (rr) { 2005 /* Do a scatter end and then right scale the off-diagonal block */ 2006 PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2007 PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 2008 } 2009 PetscFunctionReturn(0); 2010 } 2011 2012 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2013 { 2014 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2015 2016 PetscFunctionBegin; 2017 PetscCall(MatSetUnfactored(a->A)); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2022 { 2023 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2024 Mat a,b,c,d; 2025 PetscBool flg; 2026 2027 PetscFunctionBegin; 2028 a = matA->A; b = matA->B; 2029 c = matB->A; d = matB->B; 2030 2031 PetscCall(MatEqual(a,c,&flg)); 2032 if (flg) { 2033 PetscCall(MatEqual(b,d,&flg)); 2034 } 2035 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2036 PetscFunctionReturn(0); 2037 } 2038 2039 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2040 { 2041 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2042 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2043 2044 PetscFunctionBegin; 2045 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2046 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2047 /* because of the column compression in the off-processor part of the matrix a->B, 2048 the number of columns in a->B and b->B may be different, hence we cannot call 2049 the MatCopy() directly on the two parts. If need be, we can provide a more 2050 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2051 then copying the submatrices */ 2052 PetscCall(MatCopy_Basic(A,B,str)); 2053 } else { 2054 PetscCall(MatCopy(a->A,b->A,str)); 2055 PetscCall(MatCopy(a->B,b->B,str)); 2056 } 2057 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2058 PetscFunctionReturn(0); 2059 } 2060 2061 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2062 { 2063 PetscFunctionBegin; 2064 PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /* 2069 Computes the number of nonzeros per row needed for preallocation when X and Y 2070 have different nonzero structure. 2071 */ 2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2073 { 2074 PetscInt i,j,k,nzx,nzy; 2075 2076 PetscFunctionBegin; 2077 /* Set the number of nonzeros in the new matrix */ 2078 for (i=0; i<m; i++) { 2079 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2080 nzx = xi[i+1] - xi[i]; 2081 nzy = yi[i+1] - yi[i]; 2082 nnz[i] = 0; 2083 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2084 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2085 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2086 nnz[i]++; 2087 } 2088 for (; k<nzy; k++) nnz[i]++; 2089 } 2090 PetscFunctionReturn(0); 2091 } 2092 2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2095 { 2096 PetscInt m = Y->rmap->N; 2097 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2098 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2099 2100 PetscFunctionBegin; 2101 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2106 { 2107 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2108 2109 PetscFunctionBegin; 2110 if (str == SAME_NONZERO_PATTERN) { 2111 PetscCall(MatAXPY(yy->A,a,xx->A,str)); 2112 PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2113 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2114 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2115 } else { 2116 Mat B; 2117 PetscInt *nnz_d,*nnz_o; 2118 2119 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 2120 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 2121 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2122 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2123 PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 2124 PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 2125 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 2126 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 2127 PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 2128 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2129 PetscCall(MatHeaderMerge(Y,&B)); 2130 PetscCall(PetscFree(nnz_d)); 2131 PetscCall(PetscFree(nnz_o)); 2132 } 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2137 2138 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2139 { 2140 PetscFunctionBegin; 2141 if (PetscDefined(USE_COMPLEX)) { 2142 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2143 2144 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2145 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2146 } 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2151 { 2152 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2153 2154 PetscFunctionBegin; 2155 PetscCall(MatRealPart(a->A)); 2156 PetscCall(MatRealPart(a->B)); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2161 { 2162 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2163 2164 PetscFunctionBegin; 2165 PetscCall(MatImaginaryPart(a->A)); 2166 PetscCall(MatImaginaryPart(a->B)); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2171 { 2172 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2173 PetscInt i,*idxb = NULL,m = A->rmap->n; 2174 PetscScalar *va,*vv; 2175 Vec vB,vA; 2176 const PetscScalar *vb; 2177 2178 PetscFunctionBegin; 2179 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 2180 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2181 2182 PetscCall(VecGetArrayWrite(vA,&va)); 2183 if (idx) { 2184 for (i=0; i<m; i++) { 2185 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2186 } 2187 } 2188 2189 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 2190 PetscCall(PetscMalloc1(m,&idxb)); 2191 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2192 2193 PetscCall(VecGetArrayWrite(v,&vv)); 2194 PetscCall(VecGetArrayRead(vB,&vb)); 2195 for (i=0; i<m; i++) { 2196 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2197 vv[i] = vb[i]; 2198 if (idx) idx[i] = a->garray[idxb[i]]; 2199 } else { 2200 vv[i] = va[i]; 2201 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2202 idx[i] = a->garray[idxb[i]]; 2203 } 2204 } 2205 PetscCall(VecRestoreArrayWrite(vA,&vv)); 2206 PetscCall(VecRestoreArrayWrite(vA,&va)); 2207 PetscCall(VecRestoreArrayRead(vB,&vb)); 2208 PetscCall(PetscFree(idxb)); 2209 PetscCall(VecDestroy(&vA)); 2210 PetscCall(VecDestroy(&vB)); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2215 { 2216 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2217 PetscInt m = A->rmap->n,n = A->cmap->n; 2218 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2219 PetscInt *cmap = mat->garray; 2220 PetscInt *diagIdx, *offdiagIdx; 2221 Vec diagV, offdiagV; 2222 PetscScalar *a, *diagA, *offdiagA; 2223 const PetscScalar *ba,*bav; 2224 PetscInt r,j,col,ncols,*bi,*bj; 2225 Mat B = mat->B; 2226 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2227 2228 PetscFunctionBegin; 2229 /* When a process holds entire A and other processes have no entry */ 2230 if (A->cmap->N == n) { 2231 PetscCall(VecGetArrayWrite(v,&diagA)); 2232 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2233 PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 2234 PetscCall(VecDestroy(&diagV)); 2235 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2236 PetscFunctionReturn(0); 2237 } else if (n == 0) { 2238 if (m) { 2239 PetscCall(VecGetArrayWrite(v,&a)); 2240 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2241 PetscCall(VecRestoreArrayWrite(v,&a)); 2242 } 2243 PetscFunctionReturn(0); 2244 } 2245 2246 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2248 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2249 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2250 2251 /* Get offdiagIdx[] for implicit 0.0 */ 2252 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2253 ba = bav; 2254 bi = b->i; 2255 bj = b->j; 2256 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2257 for (r = 0; r < m; r++) { 2258 ncols = bi[r+1] - bi[r]; 2259 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2260 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2261 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2262 offdiagA[r] = 0.0; 2263 2264 /* Find first hole in the cmap */ 2265 for (j=0; j<ncols; j++) { 2266 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2267 if (col > j && j < cstart) { 2268 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2269 break; 2270 } else if (col > j + n && j >= cstart) { 2271 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2272 break; 2273 } 2274 } 2275 if (j == ncols && ncols < A->cmap->N - n) { 2276 /* a hole is outside compressed Bcols */ 2277 if (ncols == 0) { 2278 if (cstart) { 2279 offdiagIdx[r] = 0; 2280 } else offdiagIdx[r] = cend; 2281 } else { /* ncols > 0 */ 2282 offdiagIdx[r] = cmap[ncols-1] + 1; 2283 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2284 } 2285 } 2286 } 2287 2288 for (j=0; j<ncols; j++) { 2289 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2290 ba++; bj++; 2291 } 2292 } 2293 2294 PetscCall(VecGetArrayWrite(v, &a)); 2295 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2296 for (r = 0; r < m; ++r) { 2297 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2298 a[r] = diagA[r]; 2299 if (idx) idx[r] = cstart + diagIdx[r]; 2300 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2301 a[r] = diagA[r]; 2302 if (idx) { 2303 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2304 idx[r] = cstart + diagIdx[r]; 2305 } else idx[r] = offdiagIdx[r]; 2306 } 2307 } else { 2308 a[r] = offdiagA[r]; 2309 if (idx) idx[r] = offdiagIdx[r]; 2310 } 2311 } 2312 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2313 PetscCall(VecRestoreArrayWrite(v, &a)); 2314 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2315 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2316 PetscCall(VecDestroy(&diagV)); 2317 PetscCall(VecDestroy(&offdiagV)); 2318 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2323 { 2324 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2325 PetscInt m = A->rmap->n,n = A->cmap->n; 2326 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2327 PetscInt *cmap = mat->garray; 2328 PetscInt *diagIdx, *offdiagIdx; 2329 Vec diagV, offdiagV; 2330 PetscScalar *a, *diagA, *offdiagA; 2331 const PetscScalar *ba,*bav; 2332 PetscInt r,j,col,ncols,*bi,*bj; 2333 Mat B = mat->B; 2334 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2335 2336 PetscFunctionBegin; 2337 /* When a process holds entire A and other processes have no entry */ 2338 if (A->cmap->N == n) { 2339 PetscCall(VecGetArrayWrite(v,&diagA)); 2340 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2341 PetscCall(MatGetRowMin(mat->A,diagV,idx)); 2342 PetscCall(VecDestroy(&diagV)); 2343 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2344 PetscFunctionReturn(0); 2345 } else if (n == 0) { 2346 if (m) { 2347 PetscCall(VecGetArrayWrite(v,&a)); 2348 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2349 PetscCall(VecRestoreArrayWrite(v,&a)); 2350 } 2351 PetscFunctionReturn(0); 2352 } 2353 2354 PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 2355 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2356 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2357 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2358 2359 /* Get offdiagIdx[] for implicit 0.0 */ 2360 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2361 ba = bav; 2362 bi = b->i; 2363 bj = b->j; 2364 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2365 for (r = 0; r < m; r++) { 2366 ncols = bi[r+1] - bi[r]; 2367 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2368 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2369 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2370 offdiagA[r] = 0.0; 2371 2372 /* Find first hole in the cmap */ 2373 for (j=0; j<ncols; j++) { 2374 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2375 if (col > j && j < cstart) { 2376 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2377 break; 2378 } else if (col > j + n && j >= cstart) { 2379 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2380 break; 2381 } 2382 } 2383 if (j == ncols && ncols < A->cmap->N - n) { 2384 /* a hole is outside compressed Bcols */ 2385 if (ncols == 0) { 2386 if (cstart) { 2387 offdiagIdx[r] = 0; 2388 } else offdiagIdx[r] = cend; 2389 } else { /* ncols > 0 */ 2390 offdiagIdx[r] = cmap[ncols-1] + 1; 2391 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2392 } 2393 } 2394 } 2395 2396 for (j=0; j<ncols; j++) { 2397 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2398 ba++; bj++; 2399 } 2400 } 2401 2402 PetscCall(VecGetArrayWrite(v, &a)); 2403 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2404 for (r = 0; r < m; ++r) { 2405 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2406 a[r] = diagA[r]; 2407 if (idx) idx[r] = cstart + diagIdx[r]; 2408 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2409 a[r] = diagA[r]; 2410 if (idx) { 2411 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2412 idx[r] = cstart + diagIdx[r]; 2413 } else idx[r] = offdiagIdx[r]; 2414 } 2415 } else { 2416 a[r] = offdiagA[r]; 2417 if (idx) idx[r] = offdiagIdx[r]; 2418 } 2419 } 2420 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2421 PetscCall(VecRestoreArrayWrite(v, &a)); 2422 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2423 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2424 PetscCall(VecDestroy(&diagV)); 2425 PetscCall(VecDestroy(&offdiagV)); 2426 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2431 { 2432 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2433 PetscInt m = A->rmap->n,n = A->cmap->n; 2434 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2435 PetscInt *cmap = mat->garray; 2436 PetscInt *diagIdx, *offdiagIdx; 2437 Vec diagV, offdiagV; 2438 PetscScalar *a, *diagA, *offdiagA; 2439 const PetscScalar *ba,*bav; 2440 PetscInt r,j,col,ncols,*bi,*bj; 2441 Mat B = mat->B; 2442 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2443 2444 PetscFunctionBegin; 2445 /* When a process holds entire A and other processes have no entry */ 2446 if (A->cmap->N == n) { 2447 PetscCall(VecGetArrayWrite(v,&diagA)); 2448 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2449 PetscCall(MatGetRowMax(mat->A,diagV,idx)); 2450 PetscCall(VecDestroy(&diagV)); 2451 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2452 PetscFunctionReturn(0); 2453 } else if (n == 0) { 2454 if (m) { 2455 PetscCall(VecGetArrayWrite(v,&a)); 2456 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2457 PetscCall(VecRestoreArrayWrite(v,&a)); 2458 } 2459 PetscFunctionReturn(0); 2460 } 2461 2462 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2463 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2464 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2465 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2466 2467 /* Get offdiagIdx[] for implicit 0.0 */ 2468 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2469 ba = bav; 2470 bi = b->i; 2471 bj = b->j; 2472 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2473 for (r = 0; r < m; r++) { 2474 ncols = bi[r+1] - bi[r]; 2475 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2476 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2477 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2478 offdiagA[r] = 0.0; 2479 2480 /* Find first hole in the cmap */ 2481 for (j=0; j<ncols; j++) { 2482 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2483 if (col > j && j < cstart) { 2484 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2485 break; 2486 } else if (col > j + n && j >= cstart) { 2487 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2488 break; 2489 } 2490 } 2491 if (j == ncols && ncols < A->cmap->N - n) { 2492 /* a hole is outside compressed Bcols */ 2493 if (ncols == 0) { 2494 if (cstart) { 2495 offdiagIdx[r] = 0; 2496 } else offdiagIdx[r] = cend; 2497 } else { /* ncols > 0 */ 2498 offdiagIdx[r] = cmap[ncols-1] + 1; 2499 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2500 } 2501 } 2502 } 2503 2504 for (j=0; j<ncols; j++) { 2505 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2506 ba++; bj++; 2507 } 2508 } 2509 2510 PetscCall(VecGetArrayWrite(v, &a)); 2511 PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 2512 for (r = 0; r < m; ++r) { 2513 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2514 a[r] = diagA[r]; 2515 if (idx) idx[r] = cstart + diagIdx[r]; 2516 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2517 a[r] = diagA[r]; 2518 if (idx) { 2519 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2520 idx[r] = cstart + diagIdx[r]; 2521 } else idx[r] = offdiagIdx[r]; 2522 } 2523 } else { 2524 a[r] = offdiagA[r]; 2525 if (idx) idx[r] = offdiagIdx[r]; 2526 } 2527 } 2528 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2529 PetscCall(VecRestoreArrayWrite(v, &a)); 2530 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2531 PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 2532 PetscCall(VecDestroy(&diagV)); 2533 PetscCall(VecDestroy(&offdiagV)); 2534 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2539 { 2540 Mat *dummy; 2541 2542 PetscFunctionBegin; 2543 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2544 *newmat = *dummy; 2545 PetscCall(PetscFree(dummy)); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2550 { 2551 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2552 2553 PetscFunctionBegin; 2554 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2555 A->factorerrortype = a->A->factorerrortype; 2556 PetscFunctionReturn(0); 2557 } 2558 2559 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2560 { 2561 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2562 2563 PetscFunctionBegin; 2564 PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2565 PetscCall(MatSetRandom(aij->A,rctx)); 2566 if (x->assembled) { 2567 PetscCall(MatSetRandom(aij->B,rctx)); 2568 } else { 2569 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2570 } 2571 PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 2572 PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2577 { 2578 PetscFunctionBegin; 2579 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2580 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2581 PetscFunctionReturn(0); 2582 } 2583 2584 /*@ 2585 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2586 2587 Collective on Mat 2588 2589 Input Parameters: 2590 + A - the matrix 2591 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2592 2593 Level: advanced 2594 2595 @*/ 2596 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2597 { 2598 PetscFunctionBegin; 2599 PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2600 PetscFunctionReturn(0); 2601 } 2602 2603 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2604 { 2605 PetscBool sc = PETSC_FALSE,flg; 2606 2607 PetscFunctionBegin; 2608 PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options"); 2609 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2610 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2611 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2612 PetscOptionsHeadEnd(); 2613 PetscFunctionReturn(0); 2614 } 2615 2616 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2617 { 2618 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2619 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2620 2621 PetscFunctionBegin; 2622 if (!Y->preallocated) { 2623 PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2624 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2625 PetscInt nonew = aij->nonew; 2626 PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2627 aij->nonew = nonew; 2628 } 2629 PetscCall(MatShift_Basic(Y,a)); 2630 PetscFunctionReturn(0); 2631 } 2632 2633 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2634 { 2635 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2636 2637 PetscFunctionBegin; 2638 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2639 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2640 if (d) { 2641 PetscInt rstart; 2642 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2643 *d += rstart; 2644 2645 } 2646 PetscFunctionReturn(0); 2647 } 2648 2649 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2650 { 2651 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2652 2653 PetscFunctionBegin; 2654 PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2655 PetscFunctionReturn(0); 2656 } 2657 2658 /* -------------------------------------------------------------------*/ 2659 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2660 MatGetRow_MPIAIJ, 2661 MatRestoreRow_MPIAIJ, 2662 MatMult_MPIAIJ, 2663 /* 4*/ MatMultAdd_MPIAIJ, 2664 MatMultTranspose_MPIAIJ, 2665 MatMultTransposeAdd_MPIAIJ, 2666 NULL, 2667 NULL, 2668 NULL, 2669 /*10*/ NULL, 2670 NULL, 2671 NULL, 2672 MatSOR_MPIAIJ, 2673 MatTranspose_MPIAIJ, 2674 /*15*/ MatGetInfo_MPIAIJ, 2675 MatEqual_MPIAIJ, 2676 MatGetDiagonal_MPIAIJ, 2677 MatDiagonalScale_MPIAIJ, 2678 MatNorm_MPIAIJ, 2679 /*20*/ MatAssemblyBegin_MPIAIJ, 2680 MatAssemblyEnd_MPIAIJ, 2681 MatSetOption_MPIAIJ, 2682 MatZeroEntries_MPIAIJ, 2683 /*24*/ MatZeroRows_MPIAIJ, 2684 NULL, 2685 NULL, 2686 NULL, 2687 NULL, 2688 /*29*/ MatSetUp_MPIAIJ, 2689 NULL, 2690 NULL, 2691 MatGetDiagonalBlock_MPIAIJ, 2692 NULL, 2693 /*34*/ MatDuplicate_MPIAIJ, 2694 NULL, 2695 NULL, 2696 NULL, 2697 NULL, 2698 /*39*/ MatAXPY_MPIAIJ, 2699 MatCreateSubMatrices_MPIAIJ, 2700 MatIncreaseOverlap_MPIAIJ, 2701 MatGetValues_MPIAIJ, 2702 MatCopy_MPIAIJ, 2703 /*44*/ MatGetRowMax_MPIAIJ, 2704 MatScale_MPIAIJ, 2705 MatShift_MPIAIJ, 2706 MatDiagonalSet_MPIAIJ, 2707 MatZeroRowsColumns_MPIAIJ, 2708 /*49*/ MatSetRandom_MPIAIJ, 2709 MatGetRowIJ_MPIAIJ, 2710 MatRestoreRowIJ_MPIAIJ, 2711 NULL, 2712 NULL, 2713 /*54*/ MatFDColoringCreate_MPIXAIJ, 2714 NULL, 2715 MatSetUnfactored_MPIAIJ, 2716 MatPermute_MPIAIJ, 2717 NULL, 2718 /*59*/ MatCreateSubMatrix_MPIAIJ, 2719 MatDestroy_MPIAIJ, 2720 MatView_MPIAIJ, 2721 NULL, 2722 NULL, 2723 /*64*/ NULL, 2724 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2725 NULL, 2726 NULL, 2727 NULL, 2728 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2729 MatGetRowMinAbs_MPIAIJ, 2730 NULL, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*75*/ MatFDColoringApply_AIJ, 2735 MatSetFromOptions_MPIAIJ, 2736 NULL, 2737 NULL, 2738 MatFindZeroDiagonals_MPIAIJ, 2739 /*80*/ NULL, 2740 NULL, 2741 NULL, 2742 /*83*/ MatLoad_MPIAIJ, 2743 MatIsSymmetric_MPIAIJ, 2744 NULL, 2745 NULL, 2746 NULL, 2747 NULL, 2748 /*89*/ NULL, 2749 NULL, 2750 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2751 NULL, 2752 NULL, 2753 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2754 NULL, 2755 NULL, 2756 NULL, 2757 MatBindToCPU_MPIAIJ, 2758 /*99*/ MatProductSetFromOptions_MPIAIJ, 2759 NULL, 2760 NULL, 2761 MatConjugate_MPIAIJ, 2762 NULL, 2763 /*104*/MatSetValuesRow_MPIAIJ, 2764 MatRealPart_MPIAIJ, 2765 MatImaginaryPart_MPIAIJ, 2766 NULL, 2767 NULL, 2768 /*109*/NULL, 2769 NULL, 2770 MatGetRowMin_MPIAIJ, 2771 NULL, 2772 MatMissingDiagonal_MPIAIJ, 2773 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2774 NULL, 2775 MatGetGhosts_MPIAIJ, 2776 NULL, 2777 NULL, 2778 /*119*/MatMultDiagonalBlock_MPIAIJ, 2779 NULL, 2780 NULL, 2781 NULL, 2782 MatGetMultiProcBlock_MPIAIJ, 2783 /*124*/MatFindNonzeroRows_MPIAIJ, 2784 MatGetColumnReductions_MPIAIJ, 2785 MatInvertBlockDiagonal_MPIAIJ, 2786 MatInvertVariableBlockDiagonal_MPIAIJ, 2787 MatCreateSubMatricesMPI_MPIAIJ, 2788 /*129*/NULL, 2789 NULL, 2790 NULL, 2791 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2792 NULL, 2793 /*134*/NULL, 2794 NULL, 2795 NULL, 2796 NULL, 2797 NULL, 2798 /*139*/MatSetBlockSizes_MPIAIJ, 2799 NULL, 2800 NULL, 2801 MatFDColoringSetUp_MPIXAIJ, 2802 MatFindOffBlockDiagonalEntries_MPIAIJ, 2803 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2804 /*145*/NULL, 2805 NULL, 2806 NULL, 2807 MatCreateGraph_Simple_AIJ, 2808 MatFilter_AIJ 2809 }; 2810 2811 /* ----------------------------------------------------------------------------------------*/ 2812 2813 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2814 { 2815 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2816 2817 PetscFunctionBegin; 2818 PetscCall(MatStoreValues(aij->A)); 2819 PetscCall(MatStoreValues(aij->B)); 2820 PetscFunctionReturn(0); 2821 } 2822 2823 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2824 { 2825 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2826 2827 PetscFunctionBegin; 2828 PetscCall(MatRetrieveValues(aij->A)); 2829 PetscCall(MatRetrieveValues(aij->B)); 2830 PetscFunctionReturn(0); 2831 } 2832 2833 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2834 { 2835 Mat_MPIAIJ *b; 2836 PetscMPIInt size; 2837 2838 PetscFunctionBegin; 2839 PetscCall(PetscLayoutSetUp(B->rmap)); 2840 PetscCall(PetscLayoutSetUp(B->cmap)); 2841 b = (Mat_MPIAIJ*)B->data; 2842 2843 #if defined(PETSC_USE_CTABLE) 2844 PetscCall(PetscTableDestroy(&b->colmap)); 2845 #else 2846 PetscCall(PetscFree(b->colmap)); 2847 #endif 2848 PetscCall(PetscFree(b->garray)); 2849 PetscCall(VecDestroy(&b->lvec)); 2850 PetscCall(VecScatterDestroy(&b->Mvctx)); 2851 2852 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2853 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2854 PetscCall(MatDestroy(&b->B)); 2855 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2856 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2857 PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 2858 PetscCall(MatSetType(b->B,MATSEQAIJ)); 2859 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2860 2861 if (!B->preallocated) { 2862 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2863 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2864 PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 2865 PetscCall(MatSetType(b->A,MATSEQAIJ)); 2866 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2867 } 2868 2869 PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 2870 PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2871 B->preallocated = PETSC_TRUE; 2872 B->was_assembled = PETSC_FALSE; 2873 B->assembled = PETSC_FALSE; 2874 PetscFunctionReturn(0); 2875 } 2876 2877 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2878 { 2879 Mat_MPIAIJ *b; 2880 2881 PetscFunctionBegin; 2882 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2883 PetscCall(PetscLayoutSetUp(B->rmap)); 2884 PetscCall(PetscLayoutSetUp(B->cmap)); 2885 b = (Mat_MPIAIJ*)B->data; 2886 2887 #if defined(PETSC_USE_CTABLE) 2888 PetscCall(PetscTableDestroy(&b->colmap)); 2889 #else 2890 PetscCall(PetscFree(b->colmap)); 2891 #endif 2892 PetscCall(PetscFree(b->garray)); 2893 PetscCall(VecDestroy(&b->lvec)); 2894 PetscCall(VecScatterDestroy(&b->Mvctx)); 2895 2896 PetscCall(MatResetPreallocation(b->A)); 2897 PetscCall(MatResetPreallocation(b->B)); 2898 B->preallocated = PETSC_TRUE; 2899 B->was_assembled = PETSC_FALSE; 2900 B->assembled = PETSC_FALSE; 2901 PetscFunctionReturn(0); 2902 } 2903 2904 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2905 { 2906 Mat mat; 2907 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2908 2909 PetscFunctionBegin; 2910 *newmat = NULL; 2911 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2912 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2913 PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 2914 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2915 a = (Mat_MPIAIJ*)mat->data; 2916 2917 mat->factortype = matin->factortype; 2918 mat->assembled = matin->assembled; 2919 mat->insertmode = NOT_SET_VALUES; 2920 mat->preallocated = matin->preallocated; 2921 2922 a->size = oldmat->size; 2923 a->rank = oldmat->rank; 2924 a->donotstash = oldmat->donotstash; 2925 a->roworiented = oldmat->roworiented; 2926 a->rowindices = NULL; 2927 a->rowvalues = NULL; 2928 a->getrowactive = PETSC_FALSE; 2929 2930 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2931 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2932 2933 if (oldmat->colmap) { 2934 #if defined(PETSC_USE_CTABLE) 2935 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2936 #else 2937 PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 2938 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 2939 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2940 #endif 2941 } else a->colmap = NULL; 2942 if (oldmat->garray) { 2943 PetscInt len; 2944 len = oldmat->B->cmap->n; 2945 PetscCall(PetscMalloc1(len+1,&a->garray)); 2946 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2947 if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2948 } else a->garray = NULL; 2949 2950 /* It may happen MatDuplicate is called with a non-assembled matrix 2951 In fact, MatDuplicate only requires the matrix to be preallocated 2952 This may happen inside a DMCreateMatrix_Shell */ 2953 if (oldmat->lvec) { 2954 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2955 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2956 } 2957 if (oldmat->Mvctx) { 2958 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2959 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2960 } 2961 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2962 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2963 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2964 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2965 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2966 *newmat = mat; 2967 PetscFunctionReturn(0); 2968 } 2969 2970 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2971 { 2972 PetscBool isbinary, ishdf5; 2973 2974 PetscFunctionBegin; 2975 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2976 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2977 /* force binary viewer to load .info file if it has not yet done so */ 2978 PetscCall(PetscViewerSetUp(viewer)); 2979 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2980 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2981 if (isbinary) { 2982 PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 2983 } else if (ishdf5) { 2984 #if defined(PETSC_HAVE_HDF5) 2985 PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 2986 #else 2987 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2988 #endif 2989 } else { 2990 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2991 } 2992 PetscFunctionReturn(0); 2993 } 2994 2995 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2996 { 2997 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2998 PetscInt *rowidxs,*colidxs; 2999 PetscScalar *matvals; 3000 3001 PetscFunctionBegin; 3002 PetscCall(PetscViewerSetUp(viewer)); 3003 3004 /* read in matrix header */ 3005 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3006 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3007 M = header[1]; N = header[2]; nz = header[3]; 3008 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3009 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3010 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3011 3012 /* set block sizes from the viewer's .info file */ 3013 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3014 /* set global sizes if not set already */ 3015 if (mat->rmap->N < 0) mat->rmap->N = M; 3016 if (mat->cmap->N < 0) mat->cmap->N = N; 3017 PetscCall(PetscLayoutSetUp(mat->rmap)); 3018 PetscCall(PetscLayoutSetUp(mat->cmap)); 3019 3020 /* check if the matrix sizes are correct */ 3021 PetscCall(MatGetSize(mat,&rows,&cols)); 3022 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3023 3024 /* read in row lengths and build row indices */ 3025 PetscCall(MatGetLocalSize(mat,&m,NULL)); 3026 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3027 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3028 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3029 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3030 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3031 /* read in column indices and matrix values */ 3032 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3033 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3034 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3035 /* store matrix indices and values */ 3036 PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 3037 PetscCall(PetscFree(rowidxs)); 3038 PetscCall(PetscFree2(colidxs,matvals)); 3039 PetscFunctionReturn(0); 3040 } 3041 3042 /* Not scalable because of ISAllGather() unless getting all columns. */ 3043 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3044 { 3045 IS iscol_local; 3046 PetscBool isstride; 3047 PetscMPIInt lisstride=0,gisstride; 3048 3049 PetscFunctionBegin; 3050 /* check if we are grabbing all columns*/ 3051 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 3052 3053 if (isstride) { 3054 PetscInt start,len,mstart,mlen; 3055 PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 3056 PetscCall(ISGetLocalSize(iscol,&len)); 3057 PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3058 if (mstart == start && mlen-mstart == len) lisstride = 1; 3059 } 3060 3061 PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3062 if (gisstride) { 3063 PetscInt N; 3064 PetscCall(MatGetSize(mat,NULL,&N)); 3065 PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 3066 PetscCall(ISSetIdentity(iscol_local)); 3067 PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3068 } else { 3069 PetscInt cbs; 3070 PetscCall(ISGetBlockSize(iscol,&cbs)); 3071 PetscCall(ISAllGather(iscol,&iscol_local)); 3072 PetscCall(ISSetBlockSize(iscol_local,cbs)); 3073 } 3074 3075 *isseq = iscol_local; 3076 PetscFunctionReturn(0); 3077 } 3078 3079 /* 3080 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3081 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3082 3083 Input Parameters: 3084 mat - matrix 3085 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3086 i.e., mat->rstart <= isrow[i] < mat->rend 3087 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3088 i.e., mat->cstart <= iscol[i] < mat->cend 3089 Output Parameter: 3090 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3091 iscol_o - sequential column index set for retrieving mat->B 3092 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3093 */ 3094 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3095 { 3096 Vec x,cmap; 3097 const PetscInt *is_idx; 3098 PetscScalar *xarray,*cmaparray; 3099 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3100 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3101 Mat B=a->B; 3102 Vec lvec=a->lvec,lcmap; 3103 PetscInt i,cstart,cend,Bn=B->cmap->N; 3104 MPI_Comm comm; 3105 VecScatter Mvctx=a->Mvctx; 3106 3107 PetscFunctionBegin; 3108 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3109 PetscCall(ISGetLocalSize(iscol,&ncols)); 3110 3111 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3112 PetscCall(MatCreateVecs(mat,&x,NULL)); 3113 PetscCall(VecSet(x,-1.0)); 3114 PetscCall(VecDuplicate(x,&cmap)); 3115 PetscCall(VecSet(cmap,-1.0)); 3116 3117 /* Get start indices */ 3118 PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3119 isstart -= ncols; 3120 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3121 3122 PetscCall(ISGetIndices(iscol,&is_idx)); 3123 PetscCall(VecGetArray(x,&xarray)); 3124 PetscCall(VecGetArray(cmap,&cmaparray)); 3125 PetscCall(PetscMalloc1(ncols,&idx)); 3126 for (i=0; i<ncols; i++) { 3127 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3128 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3129 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3130 } 3131 PetscCall(VecRestoreArray(x,&xarray)); 3132 PetscCall(VecRestoreArray(cmap,&cmaparray)); 3133 PetscCall(ISRestoreIndices(iscol,&is_idx)); 3134 3135 /* Get iscol_d */ 3136 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 3137 PetscCall(ISGetBlockSize(iscol,&i)); 3138 PetscCall(ISSetBlockSize(*iscol_d,i)); 3139 3140 /* Get isrow_d */ 3141 PetscCall(ISGetLocalSize(isrow,&m)); 3142 rstart = mat->rmap->rstart; 3143 PetscCall(PetscMalloc1(m,&idx)); 3144 PetscCall(ISGetIndices(isrow,&is_idx)); 3145 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3146 PetscCall(ISRestoreIndices(isrow,&is_idx)); 3147 3148 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 3149 PetscCall(ISGetBlockSize(isrow,&i)); 3150 PetscCall(ISSetBlockSize(*isrow_d,i)); 3151 3152 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3153 PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3154 PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3155 3156 PetscCall(VecDuplicate(lvec,&lcmap)); 3157 3158 PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3159 PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3160 3161 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3162 /* off-process column indices */ 3163 count = 0; 3164 PetscCall(PetscMalloc1(Bn,&idx)); 3165 PetscCall(PetscMalloc1(Bn,&cmap1)); 3166 3167 PetscCall(VecGetArray(lvec,&xarray)); 3168 PetscCall(VecGetArray(lcmap,&cmaparray)); 3169 for (i=0; i<Bn; i++) { 3170 if (PetscRealPart(xarray[i]) > -1.0) { 3171 idx[count] = i; /* local column index in off-diagonal part B */ 3172 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3173 count++; 3174 } 3175 } 3176 PetscCall(VecRestoreArray(lvec,&xarray)); 3177 PetscCall(VecRestoreArray(lcmap,&cmaparray)); 3178 3179 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3180 /* cannot ensure iscol_o has same blocksize as iscol! */ 3181 3182 PetscCall(PetscFree(idx)); 3183 *garray = cmap1; 3184 3185 PetscCall(VecDestroy(&x)); 3186 PetscCall(VecDestroy(&cmap)); 3187 PetscCall(VecDestroy(&lcmap)); 3188 PetscFunctionReturn(0); 3189 } 3190 3191 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3192 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3193 { 3194 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3195 Mat M = NULL; 3196 MPI_Comm comm; 3197 IS iscol_d,isrow_d,iscol_o; 3198 Mat Asub = NULL,Bsub = NULL; 3199 PetscInt n; 3200 3201 PetscFunctionBegin; 3202 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3203 3204 if (call == MAT_REUSE_MATRIX) { 3205 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3206 PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 3207 PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3208 3209 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 3210 PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3211 3212 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 3213 PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3214 3215 /* Update diagonal and off-diagonal portions of submat */ 3216 asub = (Mat_MPIAIJ*)(*submat)->data; 3217 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 3218 PetscCall(ISGetLocalSize(iscol_o,&n)); 3219 if (n) { 3220 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 3221 } 3222 PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 3223 PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 3224 3225 } else { /* call == MAT_INITIAL_MATRIX) */ 3226 const PetscInt *garray; 3227 PetscInt BsubN; 3228 3229 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3230 PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 3231 3232 /* Create local submatrices Asub and Bsub */ 3233 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 3234 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 3235 3236 /* Create submatrix M */ 3237 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 3238 3239 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3240 asub = (Mat_MPIAIJ*)M->data; 3241 3242 PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3243 n = asub->B->cmap->N; 3244 if (BsubN > n) { 3245 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3246 const PetscInt *idx; 3247 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3248 PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 3249 3250 PetscCall(PetscMalloc1(n,&idx_new)); 3251 j = 0; 3252 PetscCall(ISGetIndices(iscol_o,&idx)); 3253 for (i=0; i<n; i++) { 3254 if (j >= BsubN) break; 3255 while (subgarray[i] > garray[j]) j++; 3256 3257 if (subgarray[i] == garray[j]) { 3258 idx_new[i] = idx[j++]; 3259 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3260 } 3261 PetscCall(ISRestoreIndices(iscol_o,&idx)); 3262 3263 PetscCall(ISDestroy(&iscol_o)); 3264 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 3265 3266 } else if (BsubN < n) { 3267 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3268 } 3269 3270 PetscCall(PetscFree(garray)); 3271 *submat = M; 3272 3273 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3274 PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 3275 PetscCall(ISDestroy(&isrow_d)); 3276 3277 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 3278 PetscCall(ISDestroy(&iscol_d)); 3279 3280 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 3281 PetscCall(ISDestroy(&iscol_o)); 3282 } 3283 PetscFunctionReturn(0); 3284 } 3285 3286 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3287 { 3288 IS iscol_local=NULL,isrow_d; 3289 PetscInt csize; 3290 PetscInt n,i,j,start,end; 3291 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3292 MPI_Comm comm; 3293 3294 PetscFunctionBegin; 3295 /* If isrow has same processor distribution as mat, 3296 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3297 if (call == MAT_REUSE_MATRIX) { 3298 PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3299 if (isrow_d) { 3300 sameRowDist = PETSC_TRUE; 3301 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3302 } else { 3303 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3304 if (iscol_local) { 3305 sameRowDist = PETSC_TRUE; 3306 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3307 } 3308 } 3309 } else { 3310 /* Check if isrow has same processor distribution as mat */ 3311 sameDist[0] = PETSC_FALSE; 3312 PetscCall(ISGetLocalSize(isrow,&n)); 3313 if (!n) { 3314 sameDist[0] = PETSC_TRUE; 3315 } else { 3316 PetscCall(ISGetMinMax(isrow,&i,&j)); 3317 PetscCall(MatGetOwnershipRange(mat,&start,&end)); 3318 if (i >= start && j < end) { 3319 sameDist[0] = PETSC_TRUE; 3320 } 3321 } 3322 3323 /* Check if iscol has same processor distribution as mat */ 3324 sameDist[1] = PETSC_FALSE; 3325 PetscCall(ISGetLocalSize(iscol,&n)); 3326 if (!n) { 3327 sameDist[1] = PETSC_TRUE; 3328 } else { 3329 PetscCall(ISGetMinMax(iscol,&i,&j)); 3330 PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 3331 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3332 } 3333 3334 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3335 PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 3336 sameRowDist = tsameDist[0]; 3337 } 3338 3339 if (sameRowDist) { 3340 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3341 /* isrow and iscol have same processor distribution as mat */ 3342 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 3343 PetscFunctionReturn(0); 3344 } else { /* sameRowDist */ 3345 /* isrow has same processor distribution as mat */ 3346 if (call == MAT_INITIAL_MATRIX) { 3347 PetscBool sorted; 3348 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3349 PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 3350 PetscCall(ISGetSize(iscol,&i)); 3351 PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3352 3353 PetscCall(ISSorted(iscol_local,&sorted)); 3354 if (sorted) { 3355 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3356 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 3357 PetscFunctionReturn(0); 3358 } 3359 } else { /* call == MAT_REUSE_MATRIX */ 3360 IS iscol_sub; 3361 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3362 if (iscol_sub) { 3363 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 3364 PetscFunctionReturn(0); 3365 } 3366 } 3367 } 3368 } 3369 3370 /* General case: iscol -> iscol_local which has global size of iscol */ 3371 if (call == MAT_REUSE_MATRIX) { 3372 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 3373 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3374 } else { 3375 if (!iscol_local) { 3376 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3377 } 3378 } 3379 3380 PetscCall(ISGetLocalSize(iscol,&csize)); 3381 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 3382 3383 if (call == MAT_INITIAL_MATRIX) { 3384 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 3385 PetscCall(ISDestroy(&iscol_local)); 3386 } 3387 PetscFunctionReturn(0); 3388 } 3389 3390 /*@C 3391 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3392 and "off-diagonal" part of the matrix in CSR format. 3393 3394 Collective 3395 3396 Input Parameters: 3397 + comm - MPI communicator 3398 . A - "diagonal" portion of matrix 3399 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3400 - garray - global index of B columns 3401 3402 Output Parameter: 3403 . mat - the matrix, with input A as its local diagonal matrix 3404 Level: advanced 3405 3406 Notes: 3407 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3408 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3409 3410 .seealso: `MatCreateMPIAIJWithSplitArrays()` 3411 @*/ 3412 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3413 { 3414 Mat_MPIAIJ *maij; 3415 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3416 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3417 const PetscScalar *oa; 3418 Mat Bnew; 3419 PetscInt m,n,N; 3420 MatType mpi_mat_type; 3421 3422 PetscFunctionBegin; 3423 PetscCall(MatCreate(comm,mat)); 3424 PetscCall(MatGetSize(A,&m,&n)); 3425 PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3426 PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3427 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3428 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3429 3430 /* Get global columns of mat */ 3431 PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3432 3433 PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 3434 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3435 PetscCall(MatGetMPIMatType_Private(A,&mpi_mat_type)); 3436 PetscCall(MatSetType(*mat,mpi_mat_type)); 3437 3438 PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3439 maij = (Mat_MPIAIJ*)(*mat)->data; 3440 3441 (*mat)->preallocated = PETSC_TRUE; 3442 3443 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3444 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3445 3446 /* Set A as diagonal portion of *mat */ 3447 maij->A = A; 3448 3449 nz = oi[m]; 3450 for (i=0; i<nz; i++) { 3451 col = oj[i]; 3452 oj[i] = garray[col]; 3453 } 3454 3455 /* Set Bnew as off-diagonal portion of *mat */ 3456 PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 3457 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 3458 PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3459 bnew = (Mat_SeqAIJ*)Bnew->data; 3460 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3461 maij->B = Bnew; 3462 3463 PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3464 3465 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3466 b->free_a = PETSC_FALSE; 3467 b->free_ij = PETSC_FALSE; 3468 PetscCall(MatDestroy(&B)); 3469 3470 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3471 bnew->free_a = PETSC_TRUE; 3472 bnew->free_ij = PETSC_TRUE; 3473 3474 /* condense columns of maij->B */ 3475 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3476 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3477 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3478 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 3479 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3480 PetscFunctionReturn(0); 3481 } 3482 3483 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3484 3485 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3486 { 3487 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3488 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3489 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3490 Mat M,Msub,B=a->B; 3491 MatScalar *aa; 3492 Mat_SeqAIJ *aij; 3493 PetscInt *garray = a->garray,*colsub,Ncols; 3494 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3495 IS iscol_sub,iscmap; 3496 const PetscInt *is_idx,*cmap; 3497 PetscBool allcolumns=PETSC_FALSE; 3498 MPI_Comm comm; 3499 3500 PetscFunctionBegin; 3501 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3502 if (call == MAT_REUSE_MATRIX) { 3503 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3504 PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3505 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3506 3507 PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 3508 PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3509 3510 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 3511 PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3512 3513 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3514 3515 } else { /* call == MAT_INITIAL_MATRIX) */ 3516 PetscBool flg; 3517 3518 PetscCall(ISGetLocalSize(iscol,&n)); 3519 PetscCall(ISGetSize(iscol,&Ncols)); 3520 3521 /* (1) iscol -> nonscalable iscol_local */ 3522 /* Check for special case: each processor gets entire matrix columns */ 3523 PetscCall(ISIdentity(iscol_local,&flg)); 3524 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3525 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3526 if (allcolumns) { 3527 iscol_sub = iscol_local; 3528 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3529 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3530 3531 } else { 3532 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3533 PetscInt *idx,*cmap1,k; 3534 PetscCall(PetscMalloc1(Ncols,&idx)); 3535 PetscCall(PetscMalloc1(Ncols,&cmap1)); 3536 PetscCall(ISGetIndices(iscol_local,&is_idx)); 3537 count = 0; 3538 k = 0; 3539 for (i=0; i<Ncols; i++) { 3540 j = is_idx[i]; 3541 if (j >= cstart && j < cend) { 3542 /* diagonal part of mat */ 3543 idx[count] = j; 3544 cmap1[count++] = i; /* column index in submat */ 3545 } else if (Bn) { 3546 /* off-diagonal part of mat */ 3547 if (j == garray[k]) { 3548 idx[count] = j; 3549 cmap1[count++] = i; /* column index in submat */ 3550 } else if (j > garray[k]) { 3551 while (j > garray[k] && k < Bn-1) k++; 3552 if (j == garray[k]) { 3553 idx[count] = j; 3554 cmap1[count++] = i; /* column index in submat */ 3555 } 3556 } 3557 } 3558 } 3559 PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 3560 3561 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 3562 PetscCall(ISGetBlockSize(iscol,&cbs)); 3563 PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3564 3565 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3566 } 3567 3568 /* (3) Create sequential Msub */ 3569 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3570 } 3571 3572 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3573 aij = (Mat_SeqAIJ*)(Msub)->data; 3574 ii = aij->i; 3575 PetscCall(ISGetIndices(iscmap,&cmap)); 3576 3577 /* 3578 m - number of local rows 3579 Ncols - number of columns (same on all processors) 3580 rstart - first row in new global matrix generated 3581 */ 3582 PetscCall(MatGetSize(Msub,&m,NULL)); 3583 3584 if (call == MAT_INITIAL_MATRIX) { 3585 /* (4) Create parallel newmat */ 3586 PetscMPIInt rank,size; 3587 PetscInt csize; 3588 3589 PetscCallMPI(MPI_Comm_size(comm,&size)); 3590 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3591 3592 /* 3593 Determine the number of non-zeros in the diagonal and off-diagonal 3594 portions of the matrix in order to do correct preallocation 3595 */ 3596 3597 /* first get start and end of "diagonal" columns */ 3598 PetscCall(ISGetLocalSize(iscol,&csize)); 3599 if (csize == PETSC_DECIDE) { 3600 PetscCall(ISGetSize(isrow,&mglobal)); 3601 if (mglobal == Ncols) { /* square matrix */ 3602 nlocal = m; 3603 } else { 3604 nlocal = Ncols/size + ((Ncols % size) > rank); 3605 } 3606 } else { 3607 nlocal = csize; 3608 } 3609 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3610 rstart = rend - nlocal; 3611 PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3612 3613 /* next, compute all the lengths */ 3614 jj = aij->j; 3615 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3616 olens = dlens + m; 3617 for (i=0; i<m; i++) { 3618 jend = ii[i+1] - ii[i]; 3619 olen = 0; 3620 dlen = 0; 3621 for (j=0; j<jend; j++) { 3622 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3623 else dlen++; 3624 jj++; 3625 } 3626 olens[i] = olen; 3627 dlens[i] = dlen; 3628 } 3629 3630 PetscCall(ISGetBlockSize(isrow,&bs)); 3631 PetscCall(ISGetBlockSize(iscol,&cbs)); 3632 3633 PetscCall(MatCreate(comm,&M)); 3634 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 3635 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3636 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3637 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3638 PetscCall(PetscFree(dlens)); 3639 3640 } else { /* call == MAT_REUSE_MATRIX */ 3641 M = *newmat; 3642 PetscCall(MatGetLocalSize(M,&i,NULL)); 3643 PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3644 PetscCall(MatZeroEntries(M)); 3645 /* 3646 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3647 rather than the slower MatSetValues(). 3648 */ 3649 M->was_assembled = PETSC_TRUE; 3650 M->assembled = PETSC_FALSE; 3651 } 3652 3653 /* (5) Set values of Msub to *newmat */ 3654 PetscCall(PetscMalloc1(count,&colsub)); 3655 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 3656 3657 jj = aij->j; 3658 PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3659 for (i=0; i<m; i++) { 3660 row = rstart + i; 3661 nz = ii[i+1] - ii[i]; 3662 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3663 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 3664 jj += nz; aa += nz; 3665 } 3666 PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 3667 PetscCall(ISRestoreIndices(iscmap,&cmap)); 3668 3669 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3670 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3671 3672 PetscCall(PetscFree(colsub)); 3673 3674 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3675 if (call == MAT_INITIAL_MATRIX) { 3676 *newmat = M; 3677 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 3678 PetscCall(MatDestroy(&Msub)); 3679 3680 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 3681 PetscCall(ISDestroy(&iscol_sub)); 3682 3683 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 3684 PetscCall(ISDestroy(&iscmap)); 3685 3686 if (iscol_local) { 3687 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 3688 PetscCall(ISDestroy(&iscol_local)); 3689 } 3690 } 3691 PetscFunctionReturn(0); 3692 } 3693 3694 /* 3695 Not great since it makes two copies of the submatrix, first an SeqAIJ 3696 in local and then by concatenating the local matrices the end result. 3697 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3698 3699 Note: This requires a sequential iscol with all indices. 3700 */ 3701 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3702 { 3703 PetscMPIInt rank,size; 3704 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3705 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3706 Mat M,Mreuse; 3707 MatScalar *aa,*vwork; 3708 MPI_Comm comm; 3709 Mat_SeqAIJ *aij; 3710 PetscBool colflag,allcolumns=PETSC_FALSE; 3711 3712 PetscFunctionBegin; 3713 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3714 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3715 PetscCallMPI(MPI_Comm_size(comm,&size)); 3716 3717 /* Check for special case: each processor gets entire matrix columns */ 3718 PetscCall(ISIdentity(iscol,&colflag)); 3719 PetscCall(ISGetLocalSize(iscol,&n)); 3720 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3721 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3722 3723 if (call == MAT_REUSE_MATRIX) { 3724 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 3725 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3726 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3727 } else { 3728 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3729 } 3730 3731 /* 3732 m - number of local rows 3733 n - number of columns (same on all processors) 3734 rstart - first row in new global matrix generated 3735 */ 3736 PetscCall(MatGetSize(Mreuse,&m,&n)); 3737 PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3738 if (call == MAT_INITIAL_MATRIX) { 3739 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3740 ii = aij->i; 3741 jj = aij->j; 3742 3743 /* 3744 Determine the number of non-zeros in the diagonal and off-diagonal 3745 portions of the matrix in order to do correct preallocation 3746 */ 3747 3748 /* first get start and end of "diagonal" columns */ 3749 if (csize == PETSC_DECIDE) { 3750 PetscCall(ISGetSize(isrow,&mglobal)); 3751 if (mglobal == n) { /* square matrix */ 3752 nlocal = m; 3753 } else { 3754 nlocal = n/size + ((n % size) > rank); 3755 } 3756 } else { 3757 nlocal = csize; 3758 } 3759 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3760 rstart = rend - nlocal; 3761 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3762 3763 /* next, compute all the lengths */ 3764 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3765 olens = dlens + m; 3766 for (i=0; i<m; i++) { 3767 jend = ii[i+1] - ii[i]; 3768 olen = 0; 3769 dlen = 0; 3770 for (j=0; j<jend; j++) { 3771 if (*jj < rstart || *jj >= rend) olen++; 3772 else dlen++; 3773 jj++; 3774 } 3775 olens[i] = olen; 3776 dlens[i] = dlen; 3777 } 3778 PetscCall(MatCreate(comm,&M)); 3779 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 3780 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3781 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3782 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3783 PetscCall(PetscFree(dlens)); 3784 } else { 3785 PetscInt ml,nl; 3786 3787 M = *newmat; 3788 PetscCall(MatGetLocalSize(M,&ml,&nl)); 3789 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3790 PetscCall(MatZeroEntries(M)); 3791 /* 3792 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3793 rather than the slower MatSetValues(). 3794 */ 3795 M->was_assembled = PETSC_TRUE; 3796 M->assembled = PETSC_FALSE; 3797 } 3798 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3799 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3800 ii = aij->i; 3801 jj = aij->j; 3802 3803 /* trigger copy to CPU if needed */ 3804 PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3805 for (i=0; i<m; i++) { 3806 row = rstart + i; 3807 nz = ii[i+1] - ii[i]; 3808 cwork = jj; jj += nz; 3809 vwork = aa; aa += nz; 3810 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3811 } 3812 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3813 3814 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3815 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3816 *newmat = M; 3817 3818 /* save submatrix used in processor for next request */ 3819 if (call == MAT_INITIAL_MATRIX) { 3820 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 3821 PetscCall(MatDestroy(&Mreuse)); 3822 } 3823 PetscFunctionReturn(0); 3824 } 3825 3826 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3827 { 3828 PetscInt m,cstart, cend,j,nnz,i,d; 3829 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3830 const PetscInt *JJ; 3831 PetscBool nooffprocentries; 3832 3833 PetscFunctionBegin; 3834 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3835 3836 PetscCall(PetscLayoutSetUp(B->rmap)); 3837 PetscCall(PetscLayoutSetUp(B->cmap)); 3838 m = B->rmap->n; 3839 cstart = B->cmap->rstart; 3840 cend = B->cmap->rend; 3841 rstart = B->rmap->rstart; 3842 3843 PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3844 3845 if (PetscDefined(USE_DEBUG)) { 3846 for (i=0; i<m; i++) { 3847 nnz = Ii[i+1]- Ii[i]; 3848 JJ = J + Ii[i]; 3849 PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3850 PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3851 PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3852 } 3853 } 3854 3855 for (i=0; i<m; i++) { 3856 nnz = Ii[i+1]- Ii[i]; 3857 JJ = J + Ii[i]; 3858 nnz_max = PetscMax(nnz_max,nnz); 3859 d = 0; 3860 for (j=0; j<nnz; j++) { 3861 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3862 } 3863 d_nnz[i] = d; 3864 o_nnz[i] = nnz - d; 3865 } 3866 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 3867 PetscCall(PetscFree2(d_nnz,o_nnz)); 3868 3869 for (i=0; i<m; i++) { 3870 ii = i + rstart; 3871 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3872 } 3873 nooffprocentries = B->nooffprocentries; 3874 B->nooffprocentries = PETSC_TRUE; 3875 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3876 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3877 B->nooffprocentries = nooffprocentries; 3878 3879 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3880 PetscFunctionReturn(0); 3881 } 3882 3883 /*@ 3884 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3885 (the default parallel PETSc format). 3886 3887 Collective 3888 3889 Input Parameters: 3890 + B - the matrix 3891 . i - the indices into j for the start of each local row (starts with zero) 3892 . j - the column indices for each local row (starts with zero) 3893 - v - optional values in the matrix 3894 3895 Level: developer 3896 3897 Notes: 3898 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3899 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3900 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3901 3902 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3903 3904 The format which is used for the sparse matrix input, is equivalent to a 3905 row-major ordering.. i.e for the following matrix, the input data expected is 3906 as shown 3907 3908 $ 1 0 0 3909 $ 2 0 3 P0 3910 $ ------- 3911 $ 4 5 6 P1 3912 $ 3913 $ Process0 [P0]: rows_owned=[0,1] 3914 $ i = {0,1,3} [size = nrow+1 = 2+1] 3915 $ j = {0,0,2} [size = 3] 3916 $ v = {1,2,3} [size = 3] 3917 $ 3918 $ Process1 [P1]: rows_owned=[2] 3919 $ i = {0,3} [size = nrow+1 = 1+1] 3920 $ j = {0,1,2} [size = 3] 3921 $ v = {4,5,6} [size = 3] 3922 3923 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3924 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3925 @*/ 3926 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3927 { 3928 PetscFunctionBegin; 3929 PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3930 PetscFunctionReturn(0); 3931 } 3932 3933 /*@C 3934 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3935 (the default parallel PETSc format). For good matrix assembly performance 3936 the user should preallocate the matrix storage by setting the parameters 3937 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3938 performance can be increased by more than a factor of 50. 3939 3940 Collective 3941 3942 Input Parameters: 3943 + B - the matrix 3944 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3945 (same value is used for all local rows) 3946 . d_nnz - array containing the number of nonzeros in the various rows of the 3947 DIAGONAL portion of the local submatrix (possibly different for each row) 3948 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3949 The size of this array is equal to the number of local rows, i.e 'm'. 3950 For matrices that will be factored, you must leave room for (and set) 3951 the diagonal entry even if it is zero. 3952 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3953 submatrix (same value is used for all local rows). 3954 - o_nnz - array containing the number of nonzeros in the various rows of the 3955 OFF-DIAGONAL portion of the local submatrix (possibly different for 3956 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3957 structure. The size of this array is equal to the number 3958 of local rows, i.e 'm'. 3959 3960 If the *_nnz parameter is given then the *_nz parameter is ignored 3961 3962 The AIJ format (also called the Yale sparse matrix format or 3963 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3964 storage. The stored row and column indices begin with zero. 3965 See Users-Manual: ch_mat for details. 3966 3967 The parallel matrix is partitioned such that the first m0 rows belong to 3968 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3969 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3970 3971 The DIAGONAL portion of the local submatrix of a processor can be defined 3972 as the submatrix which is obtained by extraction the part corresponding to 3973 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3974 first row that belongs to the processor, r2 is the last row belonging to 3975 the this processor, and c1-c2 is range of indices of the local part of a 3976 vector suitable for applying the matrix to. This is an mxn matrix. In the 3977 common case of a square matrix, the row and column ranges are the same and 3978 the DIAGONAL part is also square. The remaining portion of the local 3979 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3980 3981 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3982 3983 You can call MatGetInfo() to get information on how effective the preallocation was; 3984 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3985 You can also run with the option -info and look for messages with the string 3986 malloc in them to see if additional memory allocation was needed. 3987 3988 Example usage: 3989 3990 Consider the following 8x8 matrix with 34 non-zero values, that is 3991 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3992 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3993 as follows: 3994 3995 .vb 3996 1 2 0 | 0 3 0 | 0 4 3997 Proc0 0 5 6 | 7 0 0 | 8 0 3998 9 0 10 | 11 0 0 | 12 0 3999 ------------------------------------- 4000 13 0 14 | 15 16 17 | 0 0 4001 Proc1 0 18 0 | 19 20 21 | 0 0 4002 0 0 0 | 22 23 0 | 24 0 4003 ------------------------------------- 4004 Proc2 25 26 27 | 0 0 28 | 29 0 4005 30 0 0 | 31 32 33 | 0 34 4006 .ve 4007 4008 This can be represented as a collection of submatrices as: 4009 4010 .vb 4011 A B C 4012 D E F 4013 G H I 4014 .ve 4015 4016 Where the submatrices A,B,C are owned by proc0, D,E,F are 4017 owned by proc1, G,H,I are owned by proc2. 4018 4019 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4020 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4021 The 'M','N' parameters are 8,8, and have the same values on all procs. 4022 4023 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4024 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4025 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4026 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4027 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4028 matrix, ans [DF] as another SeqAIJ matrix. 4029 4030 When d_nz, o_nz parameters are specified, d_nz storage elements are 4031 allocated for every row of the local diagonal submatrix, and o_nz 4032 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4033 One way to choose d_nz and o_nz is to use the max nonzerors per local 4034 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4035 In this case, the values of d_nz,o_nz are: 4036 .vb 4037 proc0 : dnz = 2, o_nz = 2 4038 proc1 : dnz = 3, o_nz = 2 4039 proc2 : dnz = 1, o_nz = 4 4040 .ve 4041 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4042 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4043 for proc3. i.e we are using 12+15+10=37 storage locations to store 4044 34 values. 4045 4046 When d_nnz, o_nnz parameters are specified, the storage is specified 4047 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4048 In the above case the values for d_nnz,o_nnz are: 4049 .vb 4050 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4051 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4052 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4053 .ve 4054 Here the space allocated is sum of all the above values i.e 34, and 4055 hence pre-allocation is perfect. 4056 4057 Level: intermediate 4058 4059 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4060 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4061 @*/ 4062 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4063 { 4064 PetscFunctionBegin; 4065 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4066 PetscValidType(B,1); 4067 PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4068 PetscFunctionReturn(0); 4069 } 4070 4071 /*@ 4072 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4073 CSR format for the local rows. 4074 4075 Collective 4076 4077 Input Parameters: 4078 + comm - MPI communicator 4079 . m - number of local rows (Cannot be PETSC_DECIDE) 4080 . n - This value should be the same as the local size used in creating the 4081 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4082 calculated if N is given) For square matrices n is almost always m. 4083 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4084 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4085 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4086 . j - column indices 4087 - a - matrix values 4088 4089 Output Parameter: 4090 . mat - the matrix 4091 4092 Level: intermediate 4093 4094 Notes: 4095 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4096 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4097 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4098 4099 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4100 4101 The format which is used for the sparse matrix input, is equivalent to a 4102 row-major ordering.. i.e for the following matrix, the input data expected is 4103 as shown 4104 4105 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4106 4107 $ 1 0 0 4108 $ 2 0 3 P0 4109 $ ------- 4110 $ 4 5 6 P1 4111 $ 4112 $ Process0 [P0]: rows_owned=[0,1] 4113 $ i = {0,1,3} [size = nrow+1 = 2+1] 4114 $ j = {0,0,2} [size = 3] 4115 $ v = {1,2,3} [size = 3] 4116 $ 4117 $ Process1 [P1]: rows_owned=[2] 4118 $ i = {0,3} [size = nrow+1 = 1+1] 4119 $ j = {0,1,2} [size = 3] 4120 $ v = {4,5,6} [size = 3] 4121 4122 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4123 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4124 @*/ 4125 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4126 { 4127 PetscFunctionBegin; 4128 PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4129 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4130 PetscCall(MatCreate(comm,mat)); 4131 PetscCall(MatSetSizes(*mat,m,n,M,N)); 4132 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4133 PetscCall(MatSetType(*mat,MATMPIAIJ)); 4134 PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 4135 PetscFunctionReturn(0); 4136 } 4137 4138 /*@ 4139 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4140 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4141 4142 Collective 4143 4144 Input Parameters: 4145 + mat - the matrix 4146 . m - number of local rows (Cannot be PETSC_DECIDE) 4147 . n - This value should be the same as the local size used in creating the 4148 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4149 calculated if N is given) For square matrices n is almost always m. 4150 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4151 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4152 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4153 . J - column indices 4154 - v - matrix values 4155 4156 Level: intermediate 4157 4158 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4159 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4160 @*/ 4161 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4162 { 4163 PetscInt cstart,nnz,i,j; 4164 PetscInt *ld; 4165 PetscBool nooffprocentries; 4166 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4167 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4168 PetscScalar *ad,*ao; 4169 const PetscInt *Adi = Ad->i; 4170 PetscInt ldi,Iii,md; 4171 4172 PetscFunctionBegin; 4173 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4174 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4175 PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4176 PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4177 4178 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4179 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4180 cstart = mat->cmap->rstart; 4181 if (!Aij->ld) { 4182 /* count number of entries below block diagonal */ 4183 PetscCall(PetscCalloc1(m,&ld)); 4184 Aij->ld = ld; 4185 for (i=0; i<m; i++) { 4186 nnz = Ii[i+1]- Ii[i]; 4187 j = 0; 4188 while (J[j] < cstart && j < nnz) {j++;} 4189 J += nnz; 4190 ld[i] = j; 4191 } 4192 } else { 4193 ld = Aij->ld; 4194 } 4195 4196 for (i=0; i<m; i++) { 4197 nnz = Ii[i+1]- Ii[i]; 4198 Iii = Ii[i]; 4199 ldi = ld[i]; 4200 md = Adi[i+1]-Adi[i]; 4201 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4202 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4203 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4204 ad += md; 4205 ao += nnz - md; 4206 } 4207 nooffprocentries = mat->nooffprocentries; 4208 mat->nooffprocentries = PETSC_TRUE; 4209 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4210 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4211 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4212 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4213 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4214 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4215 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4216 mat->nooffprocentries = nooffprocentries; 4217 PetscFunctionReturn(0); 4218 } 4219 4220 /*@C 4221 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4222 (the default parallel PETSc format). For good matrix assembly performance 4223 the user should preallocate the matrix storage by setting the parameters 4224 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4225 performance can be increased by more than a factor of 50. 4226 4227 Collective 4228 4229 Input Parameters: 4230 + comm - MPI communicator 4231 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4232 This value should be the same as the local size used in creating the 4233 y vector for the matrix-vector product y = Ax. 4234 . n - This value should be the same as the local size used in creating the 4235 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4236 calculated if N is given) For square matrices n is almost always m. 4237 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4238 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4239 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4240 (same value is used for all local rows) 4241 . d_nnz - array containing the number of nonzeros in the various rows of the 4242 DIAGONAL portion of the local submatrix (possibly different for each row) 4243 or NULL, if d_nz is used to specify the nonzero structure. 4244 The size of this array is equal to the number of local rows, i.e 'm'. 4245 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4246 submatrix (same value is used for all local rows). 4247 - o_nnz - array containing the number of nonzeros in the various rows of the 4248 OFF-DIAGONAL portion of the local submatrix (possibly different for 4249 each row) or NULL, if o_nz is used to specify the nonzero 4250 structure. The size of this array is equal to the number 4251 of local rows, i.e 'm'. 4252 4253 Output Parameter: 4254 . A - the matrix 4255 4256 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4257 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4258 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4259 4260 Notes: 4261 If the *_nnz parameter is given then the *_nz parameter is ignored 4262 4263 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4264 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4265 storage requirements for this matrix. 4266 4267 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4268 processor than it must be used on all processors that share the object for 4269 that argument. 4270 4271 The user MUST specify either the local or global matrix dimensions 4272 (possibly both). 4273 4274 The parallel matrix is partitioned across processors such that the 4275 first m0 rows belong to process 0, the next m1 rows belong to 4276 process 1, the next m2 rows belong to process 2 etc.. where 4277 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4278 values corresponding to [m x N] submatrix. 4279 4280 The columns are logically partitioned with the n0 columns belonging 4281 to 0th partition, the next n1 columns belonging to the next 4282 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4283 4284 The DIAGONAL portion of the local submatrix on any given processor 4285 is the submatrix corresponding to the rows and columns m,n 4286 corresponding to the given processor. i.e diagonal matrix on 4287 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4288 etc. The remaining portion of the local submatrix [m x (N-n)] 4289 constitute the OFF-DIAGONAL portion. The example below better 4290 illustrates this concept. 4291 4292 For a square global matrix we define each processor's diagonal portion 4293 to be its local rows and the corresponding columns (a square submatrix); 4294 each processor's off-diagonal portion encompasses the remainder of the 4295 local matrix (a rectangular submatrix). 4296 4297 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4298 4299 When calling this routine with a single process communicator, a matrix of 4300 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4301 type of communicator, use the construction mechanism 4302 .vb 4303 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4304 .ve 4305 4306 $ MatCreate(...,&A); 4307 $ MatSetType(A,MATMPIAIJ); 4308 $ MatSetSizes(A, m,n,M,N); 4309 $ MatMPIAIJSetPreallocation(A,...); 4310 4311 By default, this format uses inodes (identical nodes) when possible. 4312 We search for consecutive rows with the same nonzero structure, thereby 4313 reusing matrix information to achieve increased efficiency. 4314 4315 Options Database Keys: 4316 + -mat_no_inode - Do not use inodes 4317 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4318 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4319 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4320 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4321 4322 Example usage: 4323 4324 Consider the following 8x8 matrix with 34 non-zero values, that is 4325 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4326 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4327 as follows 4328 4329 .vb 4330 1 2 0 | 0 3 0 | 0 4 4331 Proc0 0 5 6 | 7 0 0 | 8 0 4332 9 0 10 | 11 0 0 | 12 0 4333 ------------------------------------- 4334 13 0 14 | 15 16 17 | 0 0 4335 Proc1 0 18 0 | 19 20 21 | 0 0 4336 0 0 0 | 22 23 0 | 24 0 4337 ------------------------------------- 4338 Proc2 25 26 27 | 0 0 28 | 29 0 4339 30 0 0 | 31 32 33 | 0 34 4340 .ve 4341 4342 This can be represented as a collection of submatrices as 4343 4344 .vb 4345 A B C 4346 D E F 4347 G H I 4348 .ve 4349 4350 Where the submatrices A,B,C are owned by proc0, D,E,F are 4351 owned by proc1, G,H,I are owned by proc2. 4352 4353 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4354 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4355 The 'M','N' parameters are 8,8, and have the same values on all procs. 4356 4357 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4358 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4359 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4360 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4361 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4362 matrix, ans [DF] as another SeqAIJ matrix. 4363 4364 When d_nz, o_nz parameters are specified, d_nz storage elements are 4365 allocated for every row of the local diagonal submatrix, and o_nz 4366 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4367 One way to choose d_nz and o_nz is to use the max nonzerors per local 4368 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4369 In this case, the values of d_nz,o_nz are 4370 .vb 4371 proc0 : dnz = 2, o_nz = 2 4372 proc1 : dnz = 3, o_nz = 2 4373 proc2 : dnz = 1, o_nz = 4 4374 .ve 4375 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4376 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4377 for proc3. i.e we are using 12+15+10=37 storage locations to store 4378 34 values. 4379 4380 When d_nnz, o_nnz parameters are specified, the storage is specified 4381 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4382 In the above case the values for d_nnz,o_nnz are 4383 .vb 4384 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4385 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4386 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4387 .ve 4388 Here the space allocated is sum of all the above values i.e 34, and 4389 hence pre-allocation is perfect. 4390 4391 Level: intermediate 4392 4393 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4394 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4395 @*/ 4396 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4397 { 4398 PetscMPIInt size; 4399 4400 PetscFunctionBegin; 4401 PetscCall(MatCreate(comm,A)); 4402 PetscCall(MatSetSizes(*A,m,n,M,N)); 4403 PetscCallMPI(MPI_Comm_size(comm,&size)); 4404 if (size > 1) { 4405 PetscCall(MatSetType(*A,MATMPIAIJ)); 4406 PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4407 } else { 4408 PetscCall(MatSetType(*A,MATSEQAIJ)); 4409 PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4410 } 4411 PetscFunctionReturn(0); 4412 } 4413 4414 /*@C 4415 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4416 4417 Not collective 4418 4419 Input Parameter: 4420 . A - The MPIAIJ matrix 4421 4422 Output Parameters: 4423 + Ad - The local diagonal block as a SeqAIJ matrix 4424 . Ao - The local off-diagonal block as a SeqAIJ matrix 4425 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4426 4427 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4428 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4429 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4430 local column numbers to global column numbers in the original matrix. 4431 4432 Level: intermediate 4433 4434 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4435 @*/ 4436 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4437 { 4438 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4439 PetscBool flg; 4440 4441 PetscFunctionBegin; 4442 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 4443 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4444 if (Ad) *Ad = a->A; 4445 if (Ao) *Ao = a->B; 4446 if (colmap) *colmap = a->garray; 4447 PetscFunctionReturn(0); 4448 } 4449 4450 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4451 { 4452 PetscInt m,N,i,rstart,nnz,Ii; 4453 PetscInt *indx; 4454 PetscScalar *values; 4455 MatType rootType; 4456 4457 PetscFunctionBegin; 4458 PetscCall(MatGetSize(inmat,&m,&N)); 4459 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4460 PetscInt *dnz,*onz,sum,bs,cbs; 4461 4462 if (n == PETSC_DECIDE) { 4463 PetscCall(PetscSplitOwnership(comm,&n,&N)); 4464 } 4465 /* Check sum(n) = N */ 4466 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 4467 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4468 4469 PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 4470 rstart -= m; 4471 4472 MatPreallocateBegin(comm,m,n,dnz,onz); 4473 for (i=0; i<m; i++) { 4474 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4475 PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 4476 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4477 } 4478 4479 PetscCall(MatCreate(comm,outmat)); 4480 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4481 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 4482 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 4483 PetscCall(MatGetRootType_Private(inmat,&rootType)); 4484 PetscCall(MatSetType(*outmat,rootType)); 4485 PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 4486 PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4487 MatPreallocateEnd(dnz,onz); 4488 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 4489 } 4490 4491 /* numeric phase */ 4492 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 4493 for (i=0; i<m; i++) { 4494 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4495 Ii = i + rstart; 4496 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 4497 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4498 } 4499 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 4500 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4501 PetscFunctionReturn(0); 4502 } 4503 4504 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4505 { 4506 PetscMPIInt rank; 4507 PetscInt m,N,i,rstart,nnz; 4508 size_t len; 4509 const PetscInt *indx; 4510 PetscViewer out; 4511 char *name; 4512 Mat B; 4513 const PetscScalar *values; 4514 4515 PetscFunctionBegin; 4516 PetscCall(MatGetLocalSize(A,&m,NULL)); 4517 PetscCall(MatGetSize(A,NULL,&N)); 4518 /* Should this be the type of the diagonal block of A? */ 4519 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 4520 PetscCall(MatSetSizes(B,m,N,m,N)); 4521 PetscCall(MatSetBlockSizesFromMats(B,A,A)); 4522 PetscCall(MatSetType(B,MATSEQAIJ)); 4523 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 4524 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4525 for (i=0; i<m; i++) { 4526 PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 4527 PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 4528 PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4529 } 4530 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 4531 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4532 4533 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 4534 PetscCall(PetscStrlen(outfile,&len)); 4535 PetscCall(PetscMalloc1(len+6,&name)); 4536 PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 4537 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 4538 PetscCall(PetscFree(name)); 4539 PetscCall(MatView(B,out)); 4540 PetscCall(PetscViewerDestroy(&out)); 4541 PetscCall(MatDestroy(&B)); 4542 PetscFunctionReturn(0); 4543 } 4544 4545 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4546 { 4547 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4548 4549 PetscFunctionBegin; 4550 if (!merge) PetscFunctionReturn(0); 4551 PetscCall(PetscFree(merge->id_r)); 4552 PetscCall(PetscFree(merge->len_s)); 4553 PetscCall(PetscFree(merge->len_r)); 4554 PetscCall(PetscFree(merge->bi)); 4555 PetscCall(PetscFree(merge->bj)); 4556 PetscCall(PetscFree(merge->buf_ri[0])); 4557 PetscCall(PetscFree(merge->buf_ri)); 4558 PetscCall(PetscFree(merge->buf_rj[0])); 4559 PetscCall(PetscFree(merge->buf_rj)); 4560 PetscCall(PetscFree(merge->coi)); 4561 PetscCall(PetscFree(merge->coj)); 4562 PetscCall(PetscFree(merge->owners_co)); 4563 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4564 PetscCall(PetscFree(merge)); 4565 PetscFunctionReturn(0); 4566 } 4567 4568 #include <../src/mat/utils/freespace.h> 4569 #include <petscbt.h> 4570 4571 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4572 { 4573 MPI_Comm comm; 4574 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4575 PetscMPIInt size,rank,taga,*len_s; 4576 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4577 PetscInt proc,m; 4578 PetscInt **buf_ri,**buf_rj; 4579 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4580 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4581 MPI_Request *s_waits,*r_waits; 4582 MPI_Status *status; 4583 const MatScalar *aa,*a_a; 4584 MatScalar **abuf_r,*ba_i; 4585 Mat_Merge_SeqsToMPI *merge; 4586 PetscContainer container; 4587 4588 PetscFunctionBegin; 4589 PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 4590 PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 4591 4592 PetscCallMPI(MPI_Comm_size(comm,&size)); 4593 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4594 4595 PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 4596 PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4597 PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 4598 PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4599 aa = a_a; 4600 4601 bi = merge->bi; 4602 bj = merge->bj; 4603 buf_ri = merge->buf_ri; 4604 buf_rj = merge->buf_rj; 4605 4606 PetscCall(PetscMalloc1(size,&status)); 4607 owners = merge->rowmap->range; 4608 len_s = merge->len_s; 4609 4610 /* send and recv matrix values */ 4611 /*-----------------------------*/ 4612 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 4613 PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 4614 4615 PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 4616 for (proc=0,k=0; proc<size; proc++) { 4617 if (!len_s[proc]) continue; 4618 i = owners[proc]; 4619 PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 4620 k++; 4621 } 4622 4623 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 4624 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 4625 PetscCall(PetscFree(status)); 4626 4627 PetscCall(PetscFree(s_waits)); 4628 PetscCall(PetscFree(r_waits)); 4629 4630 /* insert mat values of mpimat */ 4631 /*----------------------------*/ 4632 PetscCall(PetscMalloc1(N,&ba_i)); 4633 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4634 4635 for (k=0; k<merge->nrecv; k++) { 4636 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4637 nrows = *(buf_ri_k[k]); 4638 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4639 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4640 } 4641 4642 /* set values of ba */ 4643 m = merge->rowmap->n; 4644 for (i=0; i<m; i++) { 4645 arow = owners[rank] + i; 4646 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4647 bnzi = bi[i+1] - bi[i]; 4648 PetscCall(PetscArrayzero(ba_i,bnzi)); 4649 4650 /* add local non-zero vals of this proc's seqmat into ba */ 4651 anzi = ai[arow+1] - ai[arow]; 4652 aj = a->j + ai[arow]; 4653 aa = a_a + ai[arow]; 4654 nextaj = 0; 4655 for (j=0; nextaj<anzi; j++) { 4656 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4657 ba_i[j] += aa[nextaj++]; 4658 } 4659 } 4660 4661 /* add received vals into ba */ 4662 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4663 /* i-th row */ 4664 if (i == *nextrow[k]) { 4665 anzi = *(nextai[k]+1) - *nextai[k]; 4666 aj = buf_rj[k] + *(nextai[k]); 4667 aa = abuf_r[k] + *(nextai[k]); 4668 nextaj = 0; 4669 for (j=0; nextaj<anzi; j++) { 4670 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4671 ba_i[j] += aa[nextaj++]; 4672 } 4673 } 4674 nextrow[k]++; nextai[k]++; 4675 } 4676 } 4677 PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 4678 } 4679 PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 4680 PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 4681 PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 4682 4683 PetscCall(PetscFree(abuf_r[0])); 4684 PetscCall(PetscFree(abuf_r)); 4685 PetscCall(PetscFree(ba_i)); 4686 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4687 PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 4688 PetscFunctionReturn(0); 4689 } 4690 4691 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4692 { 4693 Mat B_mpi; 4694 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4695 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4696 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4697 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4698 PetscInt len,proc,*dnz,*onz,bs,cbs; 4699 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4700 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4701 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4702 MPI_Status *status; 4703 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4704 PetscBT lnkbt; 4705 Mat_Merge_SeqsToMPI *merge; 4706 PetscContainer container; 4707 4708 PetscFunctionBegin; 4709 PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 4710 4711 /* make sure it is a PETSc comm */ 4712 PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 4713 PetscCallMPI(MPI_Comm_size(comm,&size)); 4714 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4715 4716 PetscCall(PetscNew(&merge)); 4717 PetscCall(PetscMalloc1(size,&status)); 4718 4719 /* determine row ownership */ 4720 /*---------------------------------------------------------*/ 4721 PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 4722 PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 4723 PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 4724 PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 4725 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4726 PetscCall(PetscMalloc1(size,&len_si)); 4727 PetscCall(PetscMalloc1(size,&merge->len_s)); 4728 4729 m = merge->rowmap->n; 4730 owners = merge->rowmap->range; 4731 4732 /* determine the number of messages to send, their lengths */ 4733 /*---------------------------------------------------------*/ 4734 len_s = merge->len_s; 4735 4736 len = 0; /* length of buf_si[] */ 4737 merge->nsend = 0; 4738 for (proc=0; proc<size; proc++) { 4739 len_si[proc] = 0; 4740 if (proc == rank) { 4741 len_s[proc] = 0; 4742 } else { 4743 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4744 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4745 } 4746 if (len_s[proc]) { 4747 merge->nsend++; 4748 nrows = 0; 4749 for (i=owners[proc]; i<owners[proc+1]; i++) { 4750 if (ai[i+1] > ai[i]) nrows++; 4751 } 4752 len_si[proc] = 2*(nrows+1); 4753 len += len_si[proc]; 4754 } 4755 } 4756 4757 /* determine the number and length of messages to receive for ij-structure */ 4758 /*-------------------------------------------------------------------------*/ 4759 PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 4760 PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4761 4762 /* post the Irecv of j-structure */ 4763 /*-------------------------------*/ 4764 PetscCall(PetscCommGetNewTag(comm,&tagj)); 4765 PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 4766 4767 /* post the Isend of j-structure */ 4768 /*--------------------------------*/ 4769 PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 4770 4771 for (proc=0, k=0; proc<size; proc++) { 4772 if (!len_s[proc]) continue; 4773 i = owners[proc]; 4774 PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 4775 k++; 4776 } 4777 4778 /* receives and sends of j-structure are complete */ 4779 /*------------------------------------------------*/ 4780 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 4781 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 4782 4783 /* send and recv i-structure */ 4784 /*---------------------------*/ 4785 PetscCall(PetscCommGetNewTag(comm,&tagi)); 4786 PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 4787 4788 PetscCall(PetscMalloc1(len+1,&buf_s)); 4789 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4790 for (proc=0,k=0; proc<size; proc++) { 4791 if (!len_s[proc]) continue; 4792 /* form outgoing message for i-structure: 4793 buf_si[0]: nrows to be sent 4794 [1:nrows]: row index (global) 4795 [nrows+1:2*nrows+1]: i-structure index 4796 */ 4797 /*-------------------------------------------*/ 4798 nrows = len_si[proc]/2 - 1; 4799 buf_si_i = buf_si + nrows+1; 4800 buf_si[0] = nrows; 4801 buf_si_i[0] = 0; 4802 nrows = 0; 4803 for (i=owners[proc]; i<owners[proc+1]; i++) { 4804 anzi = ai[i+1] - ai[i]; 4805 if (anzi) { 4806 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4807 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4808 nrows++; 4809 } 4810 } 4811 PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 4812 k++; 4813 buf_si += len_si[proc]; 4814 } 4815 4816 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 4817 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 4818 4819 PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 4820 for (i=0; i<merge->nrecv; i++) { 4821 PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 4822 } 4823 4824 PetscCall(PetscFree(len_si)); 4825 PetscCall(PetscFree(len_ri)); 4826 PetscCall(PetscFree(rj_waits)); 4827 PetscCall(PetscFree2(si_waits,sj_waits)); 4828 PetscCall(PetscFree(ri_waits)); 4829 PetscCall(PetscFree(buf_s)); 4830 PetscCall(PetscFree(status)); 4831 4832 /* compute a local seq matrix in each processor */ 4833 /*----------------------------------------------*/ 4834 /* allocate bi array and free space for accumulating nonzero column info */ 4835 PetscCall(PetscMalloc1(m+1,&bi)); 4836 bi[0] = 0; 4837 4838 /* create and initialize a linked list */ 4839 nlnk = N+1; 4840 PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 4841 4842 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4843 len = ai[owners[rank+1]] - ai[owners[rank]]; 4844 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 4845 4846 current_space = free_space; 4847 4848 /* determine symbolic info for each local row */ 4849 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4850 4851 for (k=0; k<merge->nrecv; k++) { 4852 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4853 nrows = *buf_ri_k[k]; 4854 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4855 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4856 } 4857 4858 MatPreallocateBegin(comm,m,n,dnz,onz); 4859 len = 0; 4860 for (i=0; i<m; i++) { 4861 bnzi = 0; 4862 /* add local non-zero cols of this proc's seqmat into lnk */ 4863 arow = owners[rank] + i; 4864 anzi = ai[arow+1] - ai[arow]; 4865 aj = a->j + ai[arow]; 4866 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4867 bnzi += nlnk; 4868 /* add received col data into lnk */ 4869 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4870 if (i == *nextrow[k]) { /* i-th row */ 4871 anzi = *(nextai[k]+1) - *nextai[k]; 4872 aj = buf_rj[k] + *nextai[k]; 4873 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4874 bnzi += nlnk; 4875 nextrow[k]++; nextai[k]++; 4876 } 4877 } 4878 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4879 4880 /* if free space is not available, make more free space */ 4881 if (current_space->local_remaining<bnzi) { 4882 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 4883 nspacedouble++; 4884 } 4885 /* copy data into free space, then initialize lnk */ 4886 PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 4887 PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4888 4889 current_space->array += bnzi; 4890 current_space->local_used += bnzi; 4891 current_space->local_remaining -= bnzi; 4892 4893 bi[i+1] = bi[i] + bnzi; 4894 } 4895 4896 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4897 4898 PetscCall(PetscMalloc1(bi[m]+1,&bj)); 4899 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4900 PetscCall(PetscLLDestroy(lnk,lnkbt)); 4901 4902 /* create symbolic parallel matrix B_mpi */ 4903 /*---------------------------------------*/ 4904 PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 4905 PetscCall(MatCreate(comm,&B_mpi)); 4906 if (n==PETSC_DECIDE) { 4907 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 4908 } else { 4909 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4910 } 4911 PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 4912 PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 4913 PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 4914 MatPreallocateEnd(dnz,onz); 4915 PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 4916 4917 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4918 B_mpi->assembled = PETSC_FALSE; 4919 merge->bi = bi; 4920 merge->bj = bj; 4921 merge->buf_ri = buf_ri; 4922 merge->buf_rj = buf_rj; 4923 merge->coi = NULL; 4924 merge->coj = NULL; 4925 merge->owners_co = NULL; 4926 4927 PetscCall(PetscCommDestroy(&comm)); 4928 4929 /* attach the supporting struct to B_mpi for reuse */ 4930 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 4931 PetscCall(PetscContainerSetPointer(container,merge)); 4932 PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 4933 PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 4934 PetscCall(PetscContainerDestroy(&container)); 4935 *mpimat = B_mpi; 4936 4937 PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 4938 PetscFunctionReturn(0); 4939 } 4940 4941 /*@C 4942 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4943 matrices from each processor 4944 4945 Collective 4946 4947 Input Parameters: 4948 + comm - the communicators the parallel matrix will live on 4949 . seqmat - the input sequential matrices 4950 . m - number of local rows (or PETSC_DECIDE) 4951 . n - number of local columns (or PETSC_DECIDE) 4952 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4953 4954 Output Parameter: 4955 . mpimat - the parallel matrix generated 4956 4957 Level: advanced 4958 4959 Notes: 4960 The dimensions of the sequential matrix in each processor MUST be the same. 4961 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4962 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4963 @*/ 4964 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4965 { 4966 PetscMPIInt size; 4967 4968 PetscFunctionBegin; 4969 PetscCallMPI(MPI_Comm_size(comm,&size)); 4970 if (size == 1) { 4971 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4972 if (scall == MAT_INITIAL_MATRIX) { 4973 PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 4974 } else { 4975 PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 4976 } 4977 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4978 PetscFunctionReturn(0); 4979 } 4980 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4981 if (scall == MAT_INITIAL_MATRIX) { 4982 PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 4983 } 4984 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 4985 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4986 PetscFunctionReturn(0); 4987 } 4988 4989 /*@ 4990 MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4991 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4992 with MatGetSize() 4993 4994 Not Collective 4995 4996 Input Parameters: 4997 + A - the matrix 4998 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4999 5000 Output Parameter: 5001 . A_loc - the local sequential matrix generated 5002 5003 Level: developer 5004 5005 Notes: 5006 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5007 5008 Destroy the matrix with MatDestroy() 5009 5010 .seealso: MatMPIAIJGetLocalMat() 5011 5012 @*/ 5013 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc) 5014 { 5015 PetscBool mpi; 5016 5017 PetscFunctionBegin; 5018 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi)); 5019 if (mpi) { 5020 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc)); 5021 } else { 5022 *A_loc = A; 5023 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5024 } 5025 PetscFunctionReturn(0); 5026 } 5027 5028 /*@ 5029 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5030 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5031 with MatGetSize() 5032 5033 Not Collective 5034 5035 Input Parameters: 5036 + A - the matrix 5037 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5038 5039 Output Parameter: 5040 . A_loc - the local sequential matrix generated 5041 5042 Level: developer 5043 5044 Notes: 5045 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5046 5047 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5048 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5049 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5050 modify the values of the returned A_loc. 5051 5052 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5053 @*/ 5054 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5055 { 5056 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5057 Mat_SeqAIJ *mat,*a,*b; 5058 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5059 const PetscScalar *aa,*ba,*aav,*bav; 5060 PetscScalar *ca,*cam; 5061 PetscMPIInt size; 5062 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5063 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5064 PetscBool match; 5065 5066 PetscFunctionBegin; 5067 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 5068 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5069 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5070 if (size == 1) { 5071 if (scall == MAT_INITIAL_MATRIX) { 5072 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5073 *A_loc = mpimat->A; 5074 } else if (scall == MAT_REUSE_MATRIX) { 5075 PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 5076 } 5077 PetscFunctionReturn(0); 5078 } 5079 5080 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5081 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5082 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5083 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5084 PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 5085 PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5086 aa = aav; 5087 ba = bav; 5088 if (scall == MAT_INITIAL_MATRIX) { 5089 PetscCall(PetscMalloc1(1+am,&ci)); 5090 ci[0] = 0; 5091 for (i=0; i<am; i++) { 5092 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5093 } 5094 PetscCall(PetscMalloc1(1+ci[am],&cj)); 5095 PetscCall(PetscMalloc1(1+ci[am],&ca)); 5096 k = 0; 5097 for (i=0; i<am; i++) { 5098 ncols_o = bi[i+1] - bi[i]; 5099 ncols_d = ai[i+1] - ai[i]; 5100 /* off-diagonal portion of A */ 5101 for (jo=0; jo<ncols_o; jo++) { 5102 col = cmap[*bj]; 5103 if (col >= cstart) break; 5104 cj[k] = col; bj++; 5105 ca[k++] = *ba++; 5106 } 5107 /* diagonal portion of A */ 5108 for (j=0; j<ncols_d; j++) { 5109 cj[k] = cstart + *aj++; 5110 ca[k++] = *aa++; 5111 } 5112 /* off-diagonal portion of A */ 5113 for (j=jo; j<ncols_o; j++) { 5114 cj[k] = cmap[*bj++]; 5115 ca[k++] = *ba++; 5116 } 5117 } 5118 /* put together the new matrix */ 5119 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5120 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5121 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5122 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5123 mat->free_a = PETSC_TRUE; 5124 mat->free_ij = PETSC_TRUE; 5125 mat->nonew = 0; 5126 } else if (scall == MAT_REUSE_MATRIX) { 5127 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5128 ci = mat->i; 5129 cj = mat->j; 5130 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 5131 for (i=0; i<am; i++) { 5132 /* off-diagonal portion of A */ 5133 ncols_o = bi[i+1] - bi[i]; 5134 for (jo=0; jo<ncols_o; jo++) { 5135 col = cmap[*bj]; 5136 if (col >= cstart) break; 5137 *cam++ = *ba++; bj++; 5138 } 5139 /* diagonal portion of A */ 5140 ncols_d = ai[i+1] - ai[i]; 5141 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5142 /* off-diagonal portion of A */ 5143 for (j=jo; j<ncols_o; j++) { 5144 *cam++ = *ba++; bj++; 5145 } 5146 } 5147 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 5148 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5149 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 5150 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 5151 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5152 PetscFunctionReturn(0); 5153 } 5154 5155 /*@ 5156 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5157 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5158 5159 Not Collective 5160 5161 Input Parameters: 5162 + A - the matrix 5163 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5164 5165 Output Parameters: 5166 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5167 - A_loc - the local sequential matrix generated 5168 5169 Level: developer 5170 5171 Notes: 5172 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5173 5174 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5175 5176 @*/ 5177 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5178 { 5179 Mat Ao,Ad; 5180 const PetscInt *cmap; 5181 PetscMPIInt size; 5182 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5183 5184 PetscFunctionBegin; 5185 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 5186 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5187 if (size == 1) { 5188 if (scall == MAT_INITIAL_MATRIX) { 5189 PetscCall(PetscObjectReference((PetscObject)Ad)); 5190 *A_loc = Ad; 5191 } else if (scall == MAT_REUSE_MATRIX) { 5192 PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5193 } 5194 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5195 PetscFunctionReturn(0); 5196 } 5197 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 5198 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5199 if (f) { 5200 PetscCall((*f)(A,scall,glob,A_loc)); 5201 } else { 5202 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5203 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5204 Mat_SeqAIJ *c; 5205 PetscInt *ai = a->i, *aj = a->j; 5206 PetscInt *bi = b->i, *bj = b->j; 5207 PetscInt *ci,*cj; 5208 const PetscScalar *aa,*ba; 5209 PetscScalar *ca; 5210 PetscInt i,j,am,dn,on; 5211 5212 PetscCall(MatGetLocalSize(Ad,&am,&dn)); 5213 PetscCall(MatGetLocalSize(Ao,NULL,&on)); 5214 PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 5215 PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5216 if (scall == MAT_INITIAL_MATRIX) { 5217 PetscInt k; 5218 PetscCall(PetscMalloc1(1+am,&ci)); 5219 PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 5220 PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5221 ci[0] = 0; 5222 for (i=0,k=0; i<am; i++) { 5223 const PetscInt ncols_o = bi[i+1] - bi[i]; 5224 const PetscInt ncols_d = ai[i+1] - ai[i]; 5225 ci[i+1] = ci[i] + ncols_o + ncols_d; 5226 /* diagonal portion of A */ 5227 for (j=0; j<ncols_d; j++,k++) { 5228 cj[k] = *aj++; 5229 ca[k] = *aa++; 5230 } 5231 /* off-diagonal portion of A */ 5232 for (j=0; j<ncols_o; j++,k++) { 5233 cj[k] = dn + *bj++; 5234 ca[k] = *ba++; 5235 } 5236 } 5237 /* put together the new matrix */ 5238 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5239 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5240 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5241 c = (Mat_SeqAIJ*)(*A_loc)->data; 5242 c->free_a = PETSC_TRUE; 5243 c->free_ij = PETSC_TRUE; 5244 c->nonew = 0; 5245 PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5246 } else if (scall == MAT_REUSE_MATRIX) { 5247 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5248 for (i=0; i<am; i++) { 5249 const PetscInt ncols_d = ai[i+1] - ai[i]; 5250 const PetscInt ncols_o = bi[i+1] - bi[i]; 5251 /* diagonal portion of A */ 5252 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5253 /* off-diagonal portion of A */ 5254 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5255 } 5256 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 5257 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5258 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 5259 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5260 if (glob) { 5261 PetscInt cst, *gidx; 5262 5263 PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 5264 PetscCall(PetscMalloc1(dn+on,&gidx)); 5265 for (i=0; i<dn; i++) gidx[i] = cst + i; 5266 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5267 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5268 } 5269 } 5270 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5271 PetscFunctionReturn(0); 5272 } 5273 5274 /*@C 5275 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5276 5277 Not Collective 5278 5279 Input Parameters: 5280 + A - the matrix 5281 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5282 - row, col - index sets of rows and columns to extract (or NULL) 5283 5284 Output Parameter: 5285 . A_loc - the local sequential matrix generated 5286 5287 Level: developer 5288 5289 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5290 5291 @*/ 5292 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5293 { 5294 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5295 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5296 IS isrowa,iscola; 5297 Mat *aloc; 5298 PetscBool match; 5299 5300 PetscFunctionBegin; 5301 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 5302 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5303 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 5304 if (!row) { 5305 start = A->rmap->rstart; end = A->rmap->rend; 5306 PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 5307 } else { 5308 isrowa = *row; 5309 } 5310 if (!col) { 5311 start = A->cmap->rstart; 5312 cmap = a->garray; 5313 nzA = a->A->cmap->n; 5314 nzB = a->B->cmap->n; 5315 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5316 ncols = 0; 5317 for (i=0; i<nzB; i++) { 5318 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5319 else break; 5320 } 5321 imark = i; 5322 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5323 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5324 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 5325 } else { 5326 iscola = *col; 5327 } 5328 if (scall != MAT_INITIAL_MATRIX) { 5329 PetscCall(PetscMalloc1(1,&aloc)); 5330 aloc[0] = *A_loc; 5331 } 5332 PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5333 if (!col) { /* attach global id of condensed columns */ 5334 PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5335 } 5336 *A_loc = aloc[0]; 5337 PetscCall(PetscFree(aloc)); 5338 if (!row) { 5339 PetscCall(ISDestroy(&isrowa)); 5340 } 5341 if (!col) { 5342 PetscCall(ISDestroy(&iscola)); 5343 } 5344 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 5345 PetscFunctionReturn(0); 5346 } 5347 5348 /* 5349 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5350 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5351 * on a global size. 5352 * */ 5353 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5354 { 5355 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5356 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5357 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5358 PetscMPIInt owner; 5359 PetscSFNode *iremote,*oiremote; 5360 const PetscInt *lrowindices; 5361 PetscSF sf,osf; 5362 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5363 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5364 MPI_Comm comm; 5365 ISLocalToGlobalMapping mapping; 5366 const PetscScalar *pd_a,*po_a; 5367 5368 PetscFunctionBegin; 5369 PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 5370 /* plocalsize is the number of roots 5371 * nrows is the number of leaves 5372 * */ 5373 PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 5374 PetscCall(ISGetLocalSize(rows,&nrows)); 5375 PetscCall(PetscCalloc1(nrows,&iremote)); 5376 PetscCall(ISGetIndices(rows,&lrowindices)); 5377 for (i=0;i<nrows;i++) { 5378 /* Find a remote index and an owner for a row 5379 * The row could be local or remote 5380 * */ 5381 owner = 0; 5382 lidx = 0; 5383 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 5384 iremote[i].index = lidx; 5385 iremote[i].rank = owner; 5386 } 5387 /* Create SF to communicate how many nonzero columns for each row */ 5388 PetscCall(PetscSFCreate(comm,&sf)); 5389 /* SF will figure out the number of nonzero colunms for each row, and their 5390 * offsets 5391 * */ 5392 PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5393 PetscCall(PetscSFSetFromOptions(sf)); 5394 PetscCall(PetscSFSetUp(sf)); 5395 5396 PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 5397 PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 5398 PetscCall(PetscCalloc1(nrows,&pnnz)); 5399 roffsets[0] = 0; 5400 roffsets[1] = 0; 5401 for (i=0;i<plocalsize;i++) { 5402 /* diag */ 5403 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5404 /* off diag */ 5405 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5406 /* compute offsets so that we relative location for each row */ 5407 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5408 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5409 } 5410 PetscCall(PetscCalloc1(2*nrows,&nlcols)); 5411 PetscCall(PetscCalloc1(2*nrows,&loffsets)); 5412 /* 'r' means root, and 'l' means leaf */ 5413 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5414 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5415 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5416 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5417 PetscCall(PetscSFDestroy(&sf)); 5418 PetscCall(PetscFree(roffsets)); 5419 PetscCall(PetscFree(nrcols)); 5420 dntotalcols = 0; 5421 ontotalcols = 0; 5422 ncol = 0; 5423 for (i=0;i<nrows;i++) { 5424 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5425 ncol = PetscMax(pnnz[i],ncol); 5426 /* diag */ 5427 dntotalcols += nlcols[i*2+0]; 5428 /* off diag */ 5429 ontotalcols += nlcols[i*2+1]; 5430 } 5431 /* We do not need to figure the right number of columns 5432 * since all the calculations will be done by going through the raw data 5433 * */ 5434 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 5435 PetscCall(MatSetUp(*P_oth)); 5436 PetscCall(PetscFree(pnnz)); 5437 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5438 /* diag */ 5439 PetscCall(PetscCalloc1(dntotalcols,&iremote)); 5440 /* off diag */ 5441 PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 5442 /* diag */ 5443 PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 5444 /* off diag */ 5445 PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 5446 dntotalcols = 0; 5447 ontotalcols = 0; 5448 ntotalcols = 0; 5449 for (i=0;i<nrows;i++) { 5450 owner = 0; 5451 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 5452 /* Set iremote for diag matrix */ 5453 for (j=0;j<nlcols[i*2+0];j++) { 5454 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5455 iremote[dntotalcols].rank = owner; 5456 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5457 ilocal[dntotalcols++] = ntotalcols++; 5458 } 5459 /* off diag */ 5460 for (j=0;j<nlcols[i*2+1];j++) { 5461 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5462 oiremote[ontotalcols].rank = owner; 5463 oilocal[ontotalcols++] = ntotalcols++; 5464 } 5465 } 5466 PetscCall(ISRestoreIndices(rows,&lrowindices)); 5467 PetscCall(PetscFree(loffsets)); 5468 PetscCall(PetscFree(nlcols)); 5469 PetscCall(PetscSFCreate(comm,&sf)); 5470 /* P serves as roots and P_oth is leaves 5471 * Diag matrix 5472 * */ 5473 PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5474 PetscCall(PetscSFSetFromOptions(sf)); 5475 PetscCall(PetscSFSetUp(sf)); 5476 5477 PetscCall(PetscSFCreate(comm,&osf)); 5478 /* Off diag */ 5479 PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 5480 PetscCall(PetscSFSetFromOptions(osf)); 5481 PetscCall(PetscSFSetUp(osf)); 5482 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5483 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5484 /* We operate on the matrix internal data for saving memory */ 5485 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5486 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5487 PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 5488 /* Convert to global indices for diag matrix */ 5489 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5490 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5491 /* We want P_oth store global indices */ 5492 PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 5493 /* Use memory scalable approach */ 5494 PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 5495 PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 5496 PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5497 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5498 /* Convert back to local indices */ 5499 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5500 PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5501 nout = 0; 5502 PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 5503 PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5504 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5505 /* Exchange values */ 5506 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5507 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5508 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5509 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5510 /* Stop PETSc from shrinking memory */ 5511 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5512 PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 5513 PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 5514 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5515 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 5516 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 5517 PetscCall(PetscSFDestroy(&sf)); 5518 PetscCall(PetscSFDestroy(&osf)); 5519 PetscFunctionReturn(0); 5520 } 5521 5522 /* 5523 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5524 * This supports MPIAIJ and MAIJ 5525 * */ 5526 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5527 { 5528 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5529 Mat_SeqAIJ *p_oth; 5530 IS rows,map; 5531 PetscHMapI hamp; 5532 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5533 MPI_Comm comm; 5534 PetscSF sf,osf; 5535 PetscBool has; 5536 5537 PetscFunctionBegin; 5538 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5539 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 5540 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5541 * and then create a submatrix (that often is an overlapping matrix) 5542 * */ 5543 if (reuse == MAT_INITIAL_MATRIX) { 5544 /* Use a hash table to figure out unique keys */ 5545 PetscCall(PetscHMapICreate(&hamp)); 5546 PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 5547 PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5548 count = 0; 5549 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5550 for (i=0;i<a->B->cmap->n;i++) { 5551 key = a->garray[i]/dof; 5552 PetscCall(PetscHMapIHas(hamp,key,&has)); 5553 if (!has) { 5554 mapping[i] = count; 5555 PetscCall(PetscHMapISet(hamp,key,count++)); 5556 } else { 5557 /* Current 'i' has the same value the previous step */ 5558 mapping[i] = count-1; 5559 } 5560 } 5561 PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 5562 PetscCall(PetscHMapIGetSize(hamp,&htsize)); 5563 PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5564 PetscCall(PetscCalloc1(htsize,&rowindices)); 5565 off = 0; 5566 PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 5567 PetscCall(PetscHMapIDestroy(&hamp)); 5568 PetscCall(PetscSortInt(htsize,rowindices)); 5569 PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 5570 /* In case, the matrix was already created but users want to recreate the matrix */ 5571 PetscCall(MatDestroy(P_oth)); 5572 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 5573 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 5574 PetscCall(ISDestroy(&map)); 5575 PetscCall(ISDestroy(&rows)); 5576 } else if (reuse == MAT_REUSE_MATRIX) { 5577 /* If matrix was already created, we simply update values using SF objects 5578 * that as attached to the matrix ealier. 5579 */ 5580 const PetscScalar *pd_a,*po_a; 5581 5582 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 5583 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 5584 PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5585 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5586 /* Update values in place */ 5587 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5588 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5589 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5590 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5591 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5592 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5593 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5594 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5595 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5596 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 5597 PetscFunctionReturn(0); 5598 } 5599 5600 /*@C 5601 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5602 5603 Collective on Mat 5604 5605 Input Parameters: 5606 + A - the first matrix in mpiaij format 5607 . B - the second matrix in mpiaij format 5608 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5609 5610 Output Parameters: 5611 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5612 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5613 - B_seq - the sequential matrix generated 5614 5615 Level: developer 5616 5617 @*/ 5618 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5619 { 5620 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5621 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5622 IS isrowb,iscolb; 5623 Mat *bseq=NULL; 5624 5625 PetscFunctionBegin; 5626 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5627 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5628 } 5629 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 5630 5631 if (scall == MAT_INITIAL_MATRIX) { 5632 start = A->cmap->rstart; 5633 cmap = a->garray; 5634 nzA = a->A->cmap->n; 5635 nzB = a->B->cmap->n; 5636 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5637 ncols = 0; 5638 for (i=0; i<nzB; i++) { /* row < local row index */ 5639 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5640 else break; 5641 } 5642 imark = i; 5643 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5644 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5645 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 5646 PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 5647 } else { 5648 PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5649 isrowb = *rowb; iscolb = *colb; 5650 PetscCall(PetscMalloc1(1,&bseq)); 5651 bseq[0] = *B_seq; 5652 } 5653 PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 5654 *B_seq = bseq[0]; 5655 PetscCall(PetscFree(bseq)); 5656 if (!rowb) { 5657 PetscCall(ISDestroy(&isrowb)); 5658 } else { 5659 *rowb = isrowb; 5660 } 5661 if (!colb) { 5662 PetscCall(ISDestroy(&iscolb)); 5663 } else { 5664 *colb = iscolb; 5665 } 5666 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 5667 PetscFunctionReturn(0); 5668 } 5669 5670 /* 5671 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5672 of the OFF-DIAGONAL portion of local A 5673 5674 Collective on Mat 5675 5676 Input Parameters: 5677 + A,B - the matrices in mpiaij format 5678 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5679 5680 Output Parameter: 5681 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5682 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5683 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5684 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5685 5686 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5687 for this matrix. This is not desirable.. 5688 5689 Level: developer 5690 5691 */ 5692 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5693 { 5694 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5695 Mat_SeqAIJ *b_oth; 5696 VecScatter ctx; 5697 MPI_Comm comm; 5698 const PetscMPIInt *rprocs,*sprocs; 5699 const PetscInt *srow,*rstarts,*sstarts; 5700 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5701 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5702 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5703 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5704 PetscMPIInt size,tag,rank,nreqs; 5705 5706 PetscFunctionBegin; 5707 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5708 PetscCallMPI(MPI_Comm_size(comm,&size)); 5709 5710 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5711 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5712 } 5713 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 5714 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5715 5716 if (size == 1) { 5717 startsj_s = NULL; 5718 bufa_ptr = NULL; 5719 *B_oth = NULL; 5720 PetscFunctionReturn(0); 5721 } 5722 5723 ctx = a->Mvctx; 5724 tag = ((PetscObject)ctx)->tag; 5725 5726 PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5727 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5728 PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 5729 PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 5730 PetscCall(PetscMalloc1(nreqs,&reqs)); 5731 rwaits = reqs; 5732 swaits = reqs + nrecvs; 5733 5734 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5735 if (scall == MAT_INITIAL_MATRIX) { 5736 /* i-array */ 5737 /*---------*/ 5738 /* post receives */ 5739 if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5740 for (i=0; i<nrecvs; i++) { 5741 rowlen = rvalues + rstarts[i]*rbs; 5742 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5743 PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5744 } 5745 5746 /* pack the outgoing message */ 5747 PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 5748 5749 sstartsj[0] = 0; 5750 rstartsj[0] = 0; 5751 len = 0; /* total length of j or a array to be sent */ 5752 if (nsends) { 5753 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5754 PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 5755 } 5756 for (i=0; i<nsends; i++) { 5757 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5758 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5759 for (j=0; j<nrows; j++) { 5760 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5761 for (l=0; l<sbs; l++) { 5762 PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 5763 5764 rowlen[j*sbs+l] = ncols; 5765 5766 len += ncols; 5767 PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5768 } 5769 k++; 5770 } 5771 PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5772 5773 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5774 } 5775 /* recvs and sends of i-array are completed */ 5776 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5777 PetscCall(PetscFree(svalues)); 5778 5779 /* allocate buffers for sending j and a arrays */ 5780 PetscCall(PetscMalloc1(len+1,&bufj)); 5781 PetscCall(PetscMalloc1(len+1,&bufa)); 5782 5783 /* create i-array of B_oth */ 5784 PetscCall(PetscMalloc1(aBn+2,&b_othi)); 5785 5786 b_othi[0] = 0; 5787 len = 0; /* total length of j or a array to be received */ 5788 k = 0; 5789 for (i=0; i<nrecvs; i++) { 5790 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5791 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5792 for (j=0; j<nrows; j++) { 5793 b_othi[k+1] = b_othi[k] + rowlen[j]; 5794 PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5795 k++; 5796 } 5797 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5798 } 5799 PetscCall(PetscFree(rvalues)); 5800 5801 /* allocate space for j and a arrays of B_oth */ 5802 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 5803 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5804 5805 /* j-array */ 5806 /*---------*/ 5807 /* post receives of j-array */ 5808 for (i=0; i<nrecvs; i++) { 5809 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5810 PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5811 } 5812 5813 /* pack the outgoing message j-array */ 5814 if (nsends) k = sstarts[0]; 5815 for (i=0; i<nsends; i++) { 5816 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5817 bufJ = bufj+sstartsj[i]; 5818 for (j=0; j<nrows; j++) { 5819 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5820 for (ll=0; ll<sbs; ll++) { 5821 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5822 for (l=0; l<ncols; l++) { 5823 *bufJ++ = cols[l]; 5824 } 5825 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5826 } 5827 } 5828 PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5829 } 5830 5831 /* recvs and sends of j-array are completed */ 5832 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5833 } else if (scall == MAT_REUSE_MATRIX) { 5834 sstartsj = *startsj_s; 5835 rstartsj = *startsj_r; 5836 bufa = *bufa_ptr; 5837 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5838 PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5839 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5840 5841 /* a-array */ 5842 /*---------*/ 5843 /* post receives of a-array */ 5844 for (i=0; i<nrecvs; i++) { 5845 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5846 PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 5847 } 5848 5849 /* pack the outgoing message a-array */ 5850 if (nsends) k = sstarts[0]; 5851 for (i=0; i<nsends; i++) { 5852 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5853 bufA = bufa+sstartsj[i]; 5854 for (j=0; j<nrows; j++) { 5855 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5856 for (ll=0; ll<sbs; ll++) { 5857 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5858 for (l=0; l<ncols; l++) { 5859 *bufA++ = vals[l]; 5860 } 5861 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5862 } 5863 } 5864 PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5865 } 5866 /* recvs and sends of a-array are completed */ 5867 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5868 PetscCall(PetscFree(reqs)); 5869 5870 if (scall == MAT_INITIAL_MATRIX) { 5871 /* put together the new matrix */ 5872 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5873 5874 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5875 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5876 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5877 b_oth->free_a = PETSC_TRUE; 5878 b_oth->free_ij = PETSC_TRUE; 5879 b_oth->nonew = 0; 5880 5881 PetscCall(PetscFree(bufj)); 5882 if (!startsj_s || !bufa_ptr) { 5883 PetscCall(PetscFree2(sstartsj,rstartsj)); 5884 PetscCall(PetscFree(bufa_ptr)); 5885 } else { 5886 *startsj_s = sstartsj; 5887 *startsj_r = rstartsj; 5888 *bufa_ptr = bufa; 5889 } 5890 } else if (scall == MAT_REUSE_MATRIX) { 5891 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5892 } 5893 5894 PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5895 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 5896 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5897 PetscFunctionReturn(0); 5898 } 5899 5900 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5901 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5902 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5903 #if defined(PETSC_HAVE_MKL_SPARSE) 5904 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5905 #endif 5906 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5907 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5908 #if defined(PETSC_HAVE_ELEMENTAL) 5909 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5910 #endif 5911 #if defined(PETSC_HAVE_SCALAPACK) 5912 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5913 #endif 5914 #if defined(PETSC_HAVE_HYPRE) 5915 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5916 #endif 5917 #if defined(PETSC_HAVE_CUDA) 5918 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5919 #endif 5920 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5921 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5922 #endif 5923 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5924 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5925 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5926 5927 /* 5928 Computes (B'*A')' since computing B*A directly is untenable 5929 5930 n p p 5931 [ ] [ ] [ ] 5932 m [ A ] * n [ B ] = m [ C ] 5933 [ ] [ ] [ ] 5934 5935 */ 5936 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5937 { 5938 Mat At,Bt,Ct; 5939 5940 PetscFunctionBegin; 5941 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 5942 PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 5943 PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 5944 PetscCall(MatDestroy(&At)); 5945 PetscCall(MatDestroy(&Bt)); 5946 PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 5947 PetscCall(MatDestroy(&Ct)); 5948 PetscFunctionReturn(0); 5949 } 5950 5951 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5952 { 5953 PetscBool cisdense; 5954 5955 PetscFunctionBegin; 5956 PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 5957 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 5958 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 5959 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 5960 if (!cisdense) { 5961 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 5962 } 5963 PetscCall(MatSetUp(C)); 5964 5965 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5966 PetscFunctionReturn(0); 5967 } 5968 5969 /* ----------------------------------------------------------------*/ 5970 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5971 { 5972 Mat_Product *product = C->product; 5973 Mat A = product->A,B=product->B; 5974 5975 PetscFunctionBegin; 5976 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5977 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5978 5979 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5980 C->ops->productsymbolic = MatProductSymbolic_AB; 5981 PetscFunctionReturn(0); 5982 } 5983 5984 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5985 { 5986 Mat_Product *product = C->product; 5987 5988 PetscFunctionBegin; 5989 if (product->type == MATPRODUCT_AB) { 5990 PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 5991 } 5992 PetscFunctionReturn(0); 5993 } 5994 5995 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 5996 5997 Input Parameters: 5998 5999 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6000 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6001 6002 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6003 6004 For Set1, j1[] contains column indices of the nonzeros. 6005 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6006 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6007 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6008 6009 Similar for Set2. 6010 6011 This routine merges the two sets of nonzeros row by row and removes repeats. 6012 6013 Output Parameters: (memory is allocated by the caller) 6014 6015 i[],j[]: the CSR of the merged matrix, which has m rows. 6016 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6017 imap2[]: similar to imap1[], but for Set2. 6018 Note we order nonzeros row-by-row and from left to right. 6019 */ 6020 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 6021 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 6022 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 6023 { 6024 PetscInt r,m; /* Row index of mat */ 6025 PetscCount t,t1,t2,b1,e1,b2,e2; 6026 6027 PetscFunctionBegin; 6028 PetscCall(MatGetLocalSize(mat,&m,NULL)); 6029 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6030 i[0] = 0; 6031 for (r=0; r<m; r++) { /* Do row by row merging */ 6032 b1 = rowBegin1[r]; 6033 e1 = rowEnd1[r]; 6034 b2 = rowBegin2[r]; 6035 e2 = rowEnd2[r]; 6036 while (b1 < e1 && b2 < e2) { 6037 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6038 j[t] = j1[b1]; 6039 imap1[t1] = t; 6040 imap2[t2] = t; 6041 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6042 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6043 t1++; t2++; t++; 6044 } else if (j1[b1] < j2[b2]) { 6045 j[t] = j1[b1]; 6046 imap1[t1] = t; 6047 b1 += jmap1[t1+1] - jmap1[t1]; 6048 t1++; t++; 6049 } else { 6050 j[t] = j2[b2]; 6051 imap2[t2] = t; 6052 b2 += jmap2[t2+1] - jmap2[t2]; 6053 t2++; t++; 6054 } 6055 } 6056 /* Merge the remaining in either j1[] or j2[] */ 6057 while (b1 < e1) { 6058 j[t] = j1[b1]; 6059 imap1[t1] = t; 6060 b1 += jmap1[t1+1] - jmap1[t1]; 6061 t1++; t++; 6062 } 6063 while (b2 < e2) { 6064 j[t] = j2[b2]; 6065 imap2[t2] = t; 6066 b2 += jmap2[t2+1] - jmap2[t2]; 6067 t2++; t++; 6068 } 6069 i[r+1] = t; 6070 } 6071 PetscFunctionReturn(0); 6072 } 6073 6074 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6075 6076 Input Parameters: 6077 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6078 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6079 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6080 6081 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6082 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6083 6084 Output Parameters: 6085 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6086 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6087 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6088 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6089 6090 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6091 Atot: number of entries belonging to the diagonal block. 6092 Annz: number of unique nonzeros belonging to the diagonal block. 6093 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6094 repeats (i.e., same 'i,j' pair). 6095 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6096 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6097 6098 Atot: number of entries belonging to the diagonal block 6099 Annz: number of unique nonzeros belonging to the diagonal block. 6100 6101 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6102 6103 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6104 */ 6105 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6106 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6107 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6108 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6109 { 6110 PetscInt cstart,cend,rstart,rend,row,col; 6111 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6112 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6113 PetscCount k,m,p,q,r,s,mid; 6114 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6115 6116 PetscFunctionBegin; 6117 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6118 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6119 m = rend - rstart; 6120 6121 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6122 6123 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6124 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6125 */ 6126 while (k<n) { 6127 row = i[k]; 6128 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6129 for (s=k; s<n; s++) if (i[s] != row) break; 6130 for (p=k; p<s; p++) { 6131 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6132 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6133 } 6134 PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6135 PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6136 rowBegin[row-rstart] = k; 6137 rowMid[row-rstart] = mid; 6138 rowEnd[row-rstart] = s; 6139 6140 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6141 Atot += mid - k; 6142 Btot += s - mid; 6143 6144 /* Count unique nonzeros of this diag/offdiag row */ 6145 for (p=k; p<mid;) { 6146 col = j[p]; 6147 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6148 Annz++; 6149 } 6150 6151 for (p=mid; p<s;) { 6152 col = j[p]; 6153 do {p++;} while (p<s && j[p] == col); 6154 Bnnz++; 6155 } 6156 k = s; 6157 } 6158 6159 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6160 PetscCall(PetscMalloc1(Atot,&Aperm)); 6161 PetscCall(PetscMalloc1(Btot,&Bperm)); 6162 PetscCall(PetscMalloc1(Annz+1,&Ajmap)); 6163 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap)); 6164 6165 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6166 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6167 for (r=0; r<m; r++) { 6168 k = rowBegin[r]; 6169 mid = rowMid[r]; 6170 s = rowEnd[r]; 6171 PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 6172 PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6173 Atot += mid - k; 6174 Btot += s - mid; 6175 6176 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6177 for (p=k; p<mid;) { 6178 col = j[p]; 6179 q = p; 6180 do {p++;} while (p<mid && j[p] == col); 6181 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6182 Annz++; 6183 } 6184 6185 for (p=mid; p<s;) { 6186 col = j[p]; 6187 q = p; 6188 do {p++;} while (p<s && j[p] == col); 6189 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6190 Bnnz++; 6191 } 6192 } 6193 /* Output */ 6194 *Aperm_ = Aperm; 6195 *Annz_ = Annz; 6196 *Atot_ = Atot; 6197 *Ajmap_ = Ajmap; 6198 *Bperm_ = Bperm; 6199 *Bnnz_ = Bnnz; 6200 *Btot_ = Btot; 6201 *Bjmap_ = Bjmap; 6202 PetscFunctionReturn(0); 6203 } 6204 6205 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6206 6207 Input Parameters: 6208 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6209 nnz: number of unique nonzeros in the merged matrix 6210 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6211 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6212 6213 Output Parameter: (memory is allocated by the caller) 6214 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6215 6216 Example: 6217 nnz1 = 4 6218 nnz = 6 6219 imap = [1,3,4,5] 6220 jmap = [0,3,5,6,7] 6221 then, 6222 jmap_new = [0,0,3,3,5,6,7] 6223 */ 6224 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[]) 6225 { 6226 PetscCount k,p; 6227 6228 PetscFunctionBegin; 6229 jmap_new[0] = 0; 6230 p = nnz; /* p loops over jmap_new[] backwards */ 6231 for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */ 6232 for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1]; 6233 } 6234 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6235 PetscFunctionReturn(0); 6236 } 6237 6238 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6239 { 6240 MPI_Comm comm; 6241 PetscMPIInt rank,size; 6242 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6243 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6244 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6245 6246 PetscFunctionBegin; 6247 PetscCall(PetscFree(mpiaij->garray)); 6248 PetscCall(VecDestroy(&mpiaij->lvec)); 6249 #if defined(PETSC_USE_CTABLE) 6250 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6251 #else 6252 PetscCall(PetscFree(mpiaij->colmap)); 6253 #endif 6254 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6255 mat->assembled = PETSC_FALSE; 6256 mat->was_assembled = PETSC_FALSE; 6257 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6258 6259 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 6260 PetscCallMPI(MPI_Comm_size(comm,&size)); 6261 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 6262 PetscCall(PetscLayoutSetUp(mat->rmap)); 6263 PetscCall(PetscLayoutSetUp(mat->cmap)); 6264 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6265 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6266 PetscCall(MatGetLocalSize(mat,&m,&n)); 6267 PetscCall(MatGetSize(mat,&M,&N)); 6268 6269 /* ---------------------------------------------------------------------------*/ 6270 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6271 /* entries come first, then local rows, then remote rows. */ 6272 /* ---------------------------------------------------------------------------*/ 6273 PetscCount n1 = coo_n,*perm1; 6274 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6275 PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1)); 6276 PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */ 6277 PetscCall(PetscArraycpy(j1,coo_j,n1)); 6278 for (k=0; k<n1; k++) perm1[k] = k; 6279 6280 /* Manipulate indices so that entries with negative row or col indices will have smallest 6281 row indices, local entries will have greater but negative row indices, and remote entries 6282 will have positive row indices. 6283 */ 6284 for (k=0; k<n1; k++) { 6285 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6286 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6287 else { 6288 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6289 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6290 } 6291 } 6292 6293 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6294 PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6295 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6296 PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6297 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6298 6299 /* ---------------------------------------------------------------------------*/ 6300 /* Split local rows into diag/offdiag portions */ 6301 /* ---------------------------------------------------------------------------*/ 6302 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6303 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6304 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6305 6306 PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 6307 PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 6308 PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6309 6310 /* ---------------------------------------------------------------------------*/ 6311 /* Send remote rows to their owner */ 6312 /* ---------------------------------------------------------------------------*/ 6313 /* Find which rows should be sent to which remote ranks*/ 6314 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6315 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6316 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6317 const PetscInt *ranges; 6318 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6319 6320 PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 6321 PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6322 for (k=rem; k<n1;) { 6323 PetscMPIInt owner; 6324 PetscInt firstRow,lastRow; 6325 6326 /* Locate a row range */ 6327 firstRow = i1[k]; /* first row of this owner */ 6328 PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6329 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6330 6331 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6332 PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6333 6334 /* All entries in [k,p) belong to this remote owner */ 6335 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6336 PetscMPIInt *sendto2; 6337 PetscInt *nentries2; 6338 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6339 6340 PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 6341 PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 6342 PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 6343 PetscCall(PetscFree2(sendto,nentries2)); 6344 sendto = sendto2; 6345 nentries = nentries2; 6346 maxNsend = maxNsend2; 6347 } 6348 sendto[nsend] = owner; 6349 nentries[nsend] = p - k; 6350 PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6351 nsend++; 6352 k = p; 6353 } 6354 6355 /* Build 1st SF to know offsets on remote to send data */ 6356 PetscSF sf1; 6357 PetscInt nroots = 1,nroots2 = 0; 6358 PetscInt nleaves = nsend,nleaves2 = 0; 6359 PetscInt *offsets; 6360 PetscSFNode *iremote; 6361 6362 PetscCall(PetscSFCreate(comm,&sf1)); 6363 PetscCall(PetscMalloc1(nsend,&iremote)); 6364 PetscCall(PetscMalloc1(nsend,&offsets)); 6365 for (k=0; k<nsend; k++) { 6366 iremote[k].rank = sendto[k]; 6367 iremote[k].index = 0; 6368 nleaves2 += nentries[k]; 6369 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6370 } 6371 PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6372 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 6373 PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6374 PetscCall(PetscSFDestroy(&sf1)); 6375 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem); 6376 6377 /* Build 2nd SF to send remote COOs to their owner */ 6378 PetscSF sf2; 6379 nroots = nroots2; 6380 nleaves = nleaves2; 6381 PetscCall(PetscSFCreate(comm,&sf2)); 6382 PetscCall(PetscSFSetFromOptions(sf2)); 6383 PetscCall(PetscMalloc1(nleaves,&iremote)); 6384 p = 0; 6385 for (k=0; k<nsend; k++) { 6386 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6387 for (q=0; q<nentries[k]; q++,p++) { 6388 iremote[p].rank = sendto[k]; 6389 iremote[p].index = offsets[k] + q; 6390 } 6391 } 6392 PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6393 6394 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6395 PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6396 6397 /* Send the remote COOs to their owner */ 6398 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6399 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6400 PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 6401 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 6402 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 6403 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 6404 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6405 6406 PetscCall(PetscFree(offsets)); 6407 PetscCall(PetscFree2(sendto,nentries)); 6408 6409 /* ---------------------------------------------------------------*/ 6410 /* Sort received COOs by row along with the permutation array */ 6411 /* ---------------------------------------------------------------*/ 6412 for (k=0; k<n2; k++) perm2[k] = k; 6413 PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6414 6415 /* ---------------------------------------------------------------*/ 6416 /* Split received COOs into diag/offdiag portions */ 6417 /* ---------------------------------------------------------------*/ 6418 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6419 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6420 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6421 6422 PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 6423 PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6424 6425 /* --------------------------------------------------------------------------*/ 6426 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6427 /* --------------------------------------------------------------------------*/ 6428 PetscInt *Ai,*Bi; 6429 PetscInt *Aj,*Bj; 6430 6431 PetscCall(PetscMalloc1(m+1,&Ai)); 6432 PetscCall(PetscMalloc1(m+1,&Bi)); 6433 PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6434 PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6435 6436 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6437 PetscCall(PetscMalloc1(Annz1,&Aimap1)); 6438 PetscCall(PetscMalloc1(Bnnz1,&Bimap1)); 6439 PetscCall(PetscMalloc1(Annz2,&Aimap2)); 6440 PetscCall(PetscMalloc1(Bnnz2,&Bimap2)); 6441 6442 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 6443 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6444 6445 /* --------------------------------------------------------------------------*/ 6446 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6447 /* expect nonzeros in A/B most likely have local contributing entries */ 6448 /* --------------------------------------------------------------------------*/ 6449 PetscInt Annz = Ai[m]; 6450 PetscInt Bnnz = Bi[m]; 6451 PetscCount *Ajmap1_new,*Bjmap1_new; 6452 6453 PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new)); 6454 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new)); 6455 6456 PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new)); 6457 PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new)); 6458 6459 PetscCall(PetscFree(Aimap1)); 6460 PetscCall(PetscFree(Ajmap1)); 6461 PetscCall(PetscFree(Bimap1)); 6462 PetscCall(PetscFree(Bjmap1)); 6463 PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 6464 PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 6465 PetscCall(PetscFree3(i1,j1,perm1)); 6466 PetscCall(PetscFree3(i2,j2,perm2)); 6467 6468 Ajmap1 = Ajmap1_new; 6469 Bjmap1 = Bjmap1_new; 6470 6471 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6472 if (Annz < Annz1 + Annz2) { 6473 PetscInt *Aj_new; 6474 PetscCall(PetscMalloc1(Annz,&Aj_new)); 6475 PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 6476 PetscCall(PetscFree(Aj)); 6477 Aj = Aj_new; 6478 } 6479 6480 if (Bnnz < Bnnz1 + Bnnz2) { 6481 PetscInt *Bj_new; 6482 PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 6483 PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 6484 PetscCall(PetscFree(Bj)); 6485 Bj = Bj_new; 6486 } 6487 6488 /* --------------------------------------------------------------------------------*/ 6489 /* Create new submatrices for on-process and off-process coupling */ 6490 /* --------------------------------------------------------------------------------*/ 6491 PetscScalar *Aa,*Ba; 6492 MatType rtype; 6493 Mat_SeqAIJ *a,*b; 6494 PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 6495 PetscCall(PetscCalloc1(Bnnz,&Ba)); 6496 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6497 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6498 PetscCall(MatDestroy(&mpiaij->A)); 6499 PetscCall(MatDestroy(&mpiaij->B)); 6500 PetscCall(MatGetRootType_Private(mat,&rtype)); 6501 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 6502 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 6503 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6504 6505 a = (Mat_SeqAIJ*)mpiaij->A->data; 6506 b = (Mat_SeqAIJ*)mpiaij->B->data; 6507 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6508 a->free_a = b->free_a = PETSC_TRUE; 6509 a->free_ij = b->free_ij = PETSC_TRUE; 6510 6511 /* conversion must happen AFTER multiply setup */ 6512 PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 6513 PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 6514 PetscCall(VecDestroy(&mpiaij->lvec)); 6515 PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 6516 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6517 6518 mpiaij->coo_n = coo_n; 6519 mpiaij->coo_sf = sf2; 6520 mpiaij->sendlen = nleaves; 6521 mpiaij->recvlen = nroots; 6522 6523 mpiaij->Annz = Annz; 6524 mpiaij->Bnnz = Bnnz; 6525 6526 mpiaij->Annz2 = Annz2; 6527 mpiaij->Bnnz2 = Bnnz2; 6528 6529 mpiaij->Atot1 = Atot1; 6530 mpiaij->Atot2 = Atot2; 6531 mpiaij->Btot1 = Btot1; 6532 mpiaij->Btot2 = Btot2; 6533 6534 mpiaij->Ajmap1 = Ajmap1; 6535 mpiaij->Aperm1 = Aperm1; 6536 6537 mpiaij->Bjmap1 = Bjmap1; 6538 mpiaij->Bperm1 = Bperm1; 6539 6540 mpiaij->Aimap2 = Aimap2; 6541 mpiaij->Ajmap2 = Ajmap2; 6542 mpiaij->Aperm2 = Aperm2; 6543 6544 mpiaij->Bimap2 = Bimap2; 6545 mpiaij->Bjmap2 = Bjmap2; 6546 mpiaij->Bperm2 = Bperm2; 6547 6548 mpiaij->Cperm1 = Cperm1; 6549 6550 /* Allocate in preallocation. If not used, it has zero cost on host */ 6551 PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6552 PetscFunctionReturn(0); 6553 } 6554 6555 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6556 { 6557 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6558 Mat A = mpiaij->A,B = mpiaij->B; 6559 PetscCount Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2; 6560 PetscScalar *Aa,*Ba; 6561 PetscScalar *sendbuf = mpiaij->sendbuf; 6562 PetscScalar *recvbuf = mpiaij->recvbuf; 6563 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2; 6564 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2; 6565 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6566 const PetscCount *Cperm1 = mpiaij->Cperm1; 6567 6568 PetscFunctionBegin; 6569 PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 6570 PetscCall(MatSeqAIJGetArray(B,&Ba)); 6571 6572 /* Pack entries to be sent to remote */ 6573 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6574 6575 /* Send remote entries to their owner and overlap the communication with local computation */ 6576 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6577 /* Add local entries to A and B */ 6578 for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6579 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6580 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]]; 6581 Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum; 6582 } 6583 for (PetscCount i=0; i<Bnnz; i++) { 6584 PetscScalar sum = 0.0; 6585 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]]; 6586 Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum; 6587 } 6588 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6589 6590 /* Add received remote entries to A and B */ 6591 for (PetscCount i=0; i<Annz2; i++) { 6592 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6593 } 6594 for (PetscCount i=0; i<Bnnz2; i++) { 6595 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6596 } 6597 PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 6598 PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6599 PetscFunctionReturn(0); 6600 } 6601 6602 /* ----------------------------------------------------------------*/ 6603 6604 /*MC 6605 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6606 6607 Options Database Keys: 6608 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6609 6610 Level: beginner 6611 6612 Notes: 6613 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6614 in this case the values associated with the rows and columns one passes in are set to zero 6615 in the matrix 6616 6617 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6618 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6619 6620 .seealso: `MatCreateAIJ()` 6621 M*/ 6622 6623 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6624 { 6625 Mat_MPIAIJ *b; 6626 PetscMPIInt size; 6627 6628 PetscFunctionBegin; 6629 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 6630 6631 PetscCall(PetscNewLog(B,&b)); 6632 B->data = (void*)b; 6633 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6634 B->assembled = PETSC_FALSE; 6635 B->insertmode = NOT_SET_VALUES; 6636 b->size = size; 6637 6638 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6639 6640 /* build cache for off array entries formed */ 6641 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 6642 6643 b->donotstash = PETSC_FALSE; 6644 b->colmap = NULL; 6645 b->garray = NULL; 6646 b->roworiented = PETSC_TRUE; 6647 6648 /* stuff used for matrix vector multiply */ 6649 b->lvec = NULL; 6650 b->Mvctx = NULL; 6651 6652 /* stuff for MatGetRow() */ 6653 b->rowindices = NULL; 6654 b->rowvalues = NULL; 6655 b->getrowactive = PETSC_FALSE; 6656 6657 /* flexible pointer used in CUSPARSE classes */ 6658 b->spptr = NULL; 6659 6660 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6661 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 6662 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 6663 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 6664 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 6665 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 6666 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6667 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 6668 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 6669 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 6670 #if defined(PETSC_HAVE_CUDA) 6671 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6672 #endif 6673 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6674 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 6675 #endif 6676 #if defined(PETSC_HAVE_MKL_SPARSE) 6677 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6678 #endif 6679 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 6680 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 6681 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 6682 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 6683 #if defined(PETSC_HAVE_ELEMENTAL) 6684 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 6685 #endif 6686 #if defined(PETSC_HAVE_SCALAPACK) 6687 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6688 #endif 6689 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 6690 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 6691 #if defined(PETSC_HAVE_HYPRE) 6692 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 6693 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6694 #endif 6695 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 6696 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 6697 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 6698 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 6699 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6700 PetscFunctionReturn(0); 6701 } 6702 6703 /*@C 6704 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6705 and "off-diagonal" part of the matrix in CSR format. 6706 6707 Collective 6708 6709 Input Parameters: 6710 + comm - MPI communicator 6711 . m - number of local rows (Cannot be PETSC_DECIDE) 6712 . n - This value should be the same as the local size used in creating the 6713 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6714 calculated if N is given) For square matrices n is almost always m. 6715 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6716 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6717 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6718 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6719 . a - matrix values 6720 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6721 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6722 - oa - matrix values 6723 6724 Output Parameter: 6725 . mat - the matrix 6726 6727 Level: advanced 6728 6729 Notes: 6730 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6731 must free the arrays once the matrix has been destroyed and not before. 6732 6733 The i and j indices are 0 based 6734 6735 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6736 6737 This sets local rows and cannot be used to set off-processor values. 6738 6739 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6740 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6741 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6742 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6743 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6744 communication if it is known that only local entries will be set. 6745 6746 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6747 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6748 @*/ 6749 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6750 { 6751 Mat_MPIAIJ *maij; 6752 6753 PetscFunctionBegin; 6754 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6755 PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6756 PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6757 PetscCall(MatCreate(comm,mat)); 6758 PetscCall(MatSetSizes(*mat,m,n,M,N)); 6759 PetscCall(MatSetType(*mat,MATMPIAIJ)); 6760 maij = (Mat_MPIAIJ*) (*mat)->data; 6761 6762 (*mat)->preallocated = PETSC_TRUE; 6763 6764 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6765 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6766 6767 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 6768 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 6769 6770 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 6771 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 6772 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 6773 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 6774 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 6775 PetscFunctionReturn(0); 6776 } 6777 6778 typedef struct { 6779 Mat *mp; /* intermediate products */ 6780 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6781 PetscInt cp; /* number of intermediate products */ 6782 6783 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6784 PetscInt *startsj_s,*startsj_r; 6785 PetscScalar *bufa; 6786 Mat P_oth; 6787 6788 /* may take advantage of merging product->B */ 6789 Mat Bloc; /* B-local by merging diag and off-diag */ 6790 6791 /* cusparse does not have support to split between symbolic and numeric phases. 6792 When api_user is true, we don't need to update the numerical values 6793 of the temporary storage */ 6794 PetscBool reusesym; 6795 6796 /* support for COO values insertion */ 6797 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6798 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6799 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6800 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6801 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6802 PetscMemType mtype; 6803 6804 /* customization */ 6805 PetscBool abmerge; 6806 PetscBool P_oth_bind; 6807 } MatMatMPIAIJBACKEND; 6808 6809 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6810 { 6811 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6812 PetscInt i; 6813 6814 PetscFunctionBegin; 6815 PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 6816 PetscCall(PetscFree(mmdata->bufa)); 6817 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 6818 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 6819 PetscCall(MatDestroy(&mmdata->P_oth)); 6820 PetscCall(MatDestroy(&mmdata->Bloc)); 6821 PetscCall(PetscSFDestroy(&mmdata->sf)); 6822 for (i = 0; i < mmdata->cp; i++) { 6823 PetscCall(MatDestroy(&mmdata->mp[i])); 6824 } 6825 PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 6826 PetscCall(PetscFree(mmdata->own[0])); 6827 PetscCall(PetscFree(mmdata->own)); 6828 PetscCall(PetscFree(mmdata->off[0])); 6829 PetscCall(PetscFree(mmdata->off)); 6830 PetscCall(PetscFree(mmdata)); 6831 PetscFunctionReturn(0); 6832 } 6833 6834 /* Copy selected n entries with indices in idx[] of A to v[]. 6835 If idx is NULL, copy the whole data array of A to v[] 6836 */ 6837 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6838 { 6839 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6840 6841 PetscFunctionBegin; 6842 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6843 if (f) { 6844 PetscCall((*f)(A,n,idx,v)); 6845 } else { 6846 const PetscScalar *vv; 6847 6848 PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6849 if (n && idx) { 6850 PetscScalar *w = v; 6851 const PetscInt *oi = idx; 6852 PetscInt j; 6853 6854 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6855 } else { 6856 PetscCall(PetscArraycpy(v,vv,n)); 6857 } 6858 PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6859 } 6860 PetscFunctionReturn(0); 6861 } 6862 6863 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6864 { 6865 MatMatMPIAIJBACKEND *mmdata; 6866 PetscInt i,n_d,n_o; 6867 6868 PetscFunctionBegin; 6869 MatCheckProduct(C,1); 6870 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6871 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6872 if (!mmdata->reusesym) { /* update temporary matrices */ 6873 if (mmdata->P_oth) { 6874 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6875 } 6876 if (mmdata->Bloc) { 6877 PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 6878 } 6879 } 6880 mmdata->reusesym = PETSC_FALSE; 6881 6882 for (i = 0; i < mmdata->cp; i++) { 6883 PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6884 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6885 } 6886 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6887 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6888 6889 if (mmdata->mptmp[i]) continue; 6890 if (noff) { 6891 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6892 6893 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 6894 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 6895 n_o += noff; 6896 n_d += nown; 6897 } else { 6898 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6899 6900 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 6901 n_d += mm->nz; 6902 } 6903 } 6904 if (mmdata->hasoffproc) { /* offprocess insertion */ 6905 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6906 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6907 } 6908 PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 6909 PetscFunctionReturn(0); 6910 } 6911 6912 /* Support for Pt * A, A * P, or Pt * A * P */ 6913 #define MAX_NUMBER_INTERMEDIATE 4 6914 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6915 { 6916 Mat_Product *product = C->product; 6917 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6918 Mat_MPIAIJ *a,*p; 6919 MatMatMPIAIJBACKEND *mmdata; 6920 ISLocalToGlobalMapping P_oth_l2g = NULL; 6921 IS glob = NULL; 6922 const char *prefix; 6923 char pprefix[256]; 6924 const PetscInt *globidx,*P_oth_idx; 6925 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 6926 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 6927 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6928 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6929 /* a base offset; type-2: sparse with a local to global map table */ 6930 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6931 6932 MatProductType ptype; 6933 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6934 PetscMPIInt size; 6935 6936 PetscFunctionBegin; 6937 MatCheckProduct(C,1); 6938 PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6939 ptype = product->type; 6940 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6941 ptype = MATPRODUCT_AB; 6942 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6943 } 6944 switch (ptype) { 6945 case MATPRODUCT_AB: 6946 A = product->A; 6947 P = product->B; 6948 m = A->rmap->n; 6949 n = P->cmap->n; 6950 M = A->rmap->N; 6951 N = P->cmap->N; 6952 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6953 break; 6954 case MATPRODUCT_AtB: 6955 P = product->A; 6956 A = product->B; 6957 m = P->cmap->n; 6958 n = A->cmap->n; 6959 M = P->cmap->N; 6960 N = A->cmap->N; 6961 hasoffproc = PETSC_TRUE; 6962 break; 6963 case MATPRODUCT_PtAP: 6964 A = product->A; 6965 P = product->B; 6966 m = P->cmap->n; 6967 n = P->cmap->n; 6968 M = P->cmap->N; 6969 N = P->cmap->N; 6970 hasoffproc = PETSC_TRUE; 6971 break; 6972 default: 6973 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6974 } 6975 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 6976 if (size == 1) hasoffproc = PETSC_FALSE; 6977 6978 /* defaults */ 6979 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6980 mp[i] = NULL; 6981 mptmp[i] = PETSC_FALSE; 6982 rmapt[i] = -1; 6983 cmapt[i] = -1; 6984 rmapa[i] = NULL; 6985 cmapa[i] = NULL; 6986 } 6987 6988 /* customization */ 6989 PetscCall(PetscNew(&mmdata)); 6990 mmdata->reusesym = product->api_user; 6991 if (ptype == MATPRODUCT_AB) { 6992 if (product->api_user) { 6993 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 6994 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6995 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6996 PetscOptionsEnd(); 6997 } else { 6998 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 6999 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 7000 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7001 PetscOptionsEnd(); 7002 } 7003 } else if (ptype == MATPRODUCT_PtAP) { 7004 if (product->api_user) { 7005 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 7006 PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7007 PetscOptionsEnd(); 7008 } else { 7009 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 7010 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7011 PetscOptionsEnd(); 7012 } 7013 } 7014 a = (Mat_MPIAIJ*)A->data; 7015 p = (Mat_MPIAIJ*)P->data; 7016 PetscCall(MatSetSizes(C,m,n,M,N)); 7017 PetscCall(PetscLayoutSetUp(C->rmap)); 7018 PetscCall(PetscLayoutSetUp(C->cmap)); 7019 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 7020 PetscCall(MatGetOptionsPrefix(C,&prefix)); 7021 7022 cp = 0; 7023 switch (ptype) { 7024 case MATPRODUCT_AB: /* A * P */ 7025 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7026 7027 /* A_diag * P_local (merged or not) */ 7028 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7029 /* P is product->B */ 7030 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7031 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7032 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7033 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7034 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7035 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7036 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7037 mp[cp]->product->api_user = product->api_user; 7038 PetscCall(MatProductSetFromOptions(mp[cp])); 7039 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7040 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7041 PetscCall(ISGetIndices(glob,&globidx)); 7042 rmapt[cp] = 1; 7043 cmapt[cp] = 2; 7044 cmapa[cp] = globidx; 7045 mptmp[cp] = PETSC_FALSE; 7046 cp++; 7047 } else { /* A_diag * P_diag and A_diag * P_off */ 7048 PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 7049 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7050 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7051 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7052 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7053 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7054 mp[cp]->product->api_user = product->api_user; 7055 PetscCall(MatProductSetFromOptions(mp[cp])); 7056 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7057 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7058 rmapt[cp] = 1; 7059 cmapt[cp] = 1; 7060 mptmp[cp] = PETSC_FALSE; 7061 cp++; 7062 PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 7063 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7064 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7065 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7066 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7067 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7068 mp[cp]->product->api_user = product->api_user; 7069 PetscCall(MatProductSetFromOptions(mp[cp])); 7070 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7071 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7072 rmapt[cp] = 1; 7073 cmapt[cp] = 2; 7074 cmapa[cp] = p->garray; 7075 mptmp[cp] = PETSC_FALSE; 7076 cp++; 7077 } 7078 7079 /* A_off * P_other */ 7080 if (mmdata->P_oth) { 7081 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 7082 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7083 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7084 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7085 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7086 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7087 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7088 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7089 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7090 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7091 mp[cp]->product->api_user = product->api_user; 7092 PetscCall(MatProductSetFromOptions(mp[cp])); 7093 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7094 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7095 rmapt[cp] = 1; 7096 cmapt[cp] = 2; 7097 cmapa[cp] = P_oth_idx; 7098 mptmp[cp] = PETSC_FALSE; 7099 cp++; 7100 } 7101 break; 7102 7103 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7104 /* A is product->B */ 7105 PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7106 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7107 PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 7108 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7109 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7110 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7111 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7112 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7113 mp[cp]->product->api_user = product->api_user; 7114 PetscCall(MatProductSetFromOptions(mp[cp])); 7115 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7116 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7117 PetscCall(ISGetIndices(glob,&globidx)); 7118 rmapt[cp] = 2; 7119 rmapa[cp] = globidx; 7120 cmapt[cp] = 2; 7121 cmapa[cp] = globidx; 7122 mptmp[cp] = PETSC_FALSE; 7123 cp++; 7124 } else { 7125 PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 7126 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7127 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7128 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7129 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7130 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7131 mp[cp]->product->api_user = product->api_user; 7132 PetscCall(MatProductSetFromOptions(mp[cp])); 7133 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7134 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7135 PetscCall(ISGetIndices(glob,&globidx)); 7136 rmapt[cp] = 1; 7137 cmapt[cp] = 2; 7138 cmapa[cp] = globidx; 7139 mptmp[cp] = PETSC_FALSE; 7140 cp++; 7141 PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 7142 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7143 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7144 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7145 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7146 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7147 mp[cp]->product->api_user = product->api_user; 7148 PetscCall(MatProductSetFromOptions(mp[cp])); 7149 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7150 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7151 rmapt[cp] = 2; 7152 rmapa[cp] = p->garray; 7153 cmapt[cp] = 2; 7154 cmapa[cp] = globidx; 7155 mptmp[cp] = PETSC_FALSE; 7156 cp++; 7157 } 7158 break; 7159 case MATPRODUCT_PtAP: 7160 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7161 /* P is product->B */ 7162 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7163 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7164 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 7165 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7166 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7167 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7168 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7169 mp[cp]->product->api_user = product->api_user; 7170 PetscCall(MatProductSetFromOptions(mp[cp])); 7171 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7172 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7173 PetscCall(ISGetIndices(glob,&globidx)); 7174 rmapt[cp] = 2; 7175 rmapa[cp] = globidx; 7176 cmapt[cp] = 2; 7177 cmapa[cp] = globidx; 7178 mptmp[cp] = PETSC_FALSE; 7179 cp++; 7180 if (mmdata->P_oth) { 7181 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 7182 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7183 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7184 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7185 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7186 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7187 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7188 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7189 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7190 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7191 mp[cp]->product->api_user = product->api_user; 7192 PetscCall(MatProductSetFromOptions(mp[cp])); 7193 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7194 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7195 mptmp[cp] = PETSC_TRUE; 7196 cp++; 7197 PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 7198 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7199 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7200 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7201 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7202 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7203 mp[cp]->product->api_user = product->api_user; 7204 PetscCall(MatProductSetFromOptions(mp[cp])); 7205 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7206 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7207 rmapt[cp] = 2; 7208 rmapa[cp] = globidx; 7209 cmapt[cp] = 2; 7210 cmapa[cp] = P_oth_idx; 7211 mptmp[cp] = PETSC_FALSE; 7212 cp++; 7213 } 7214 break; 7215 default: 7216 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7217 } 7218 /* sanity check */ 7219 if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7220 7221 PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7222 for (i = 0; i < cp; i++) { 7223 mmdata->mp[i] = mp[i]; 7224 mmdata->mptmp[i] = mptmp[i]; 7225 } 7226 mmdata->cp = cp; 7227 C->product->data = mmdata; 7228 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7229 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7230 7231 /* memory type */ 7232 mmdata->mtype = PETSC_MEMTYPE_HOST; 7233 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 7234 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7235 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7236 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7237 7238 /* prepare coo coordinates for values insertion */ 7239 7240 /* count total nonzeros of those intermediate seqaij Mats 7241 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7242 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7243 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7244 */ 7245 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7246 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7247 if (mptmp[cp]) continue; 7248 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7249 const PetscInt *rmap = rmapa[cp]; 7250 const PetscInt mr = mp[cp]->rmap->n; 7251 const PetscInt rs = C->rmap->rstart; 7252 const PetscInt re = C->rmap->rend; 7253 const PetscInt *ii = mm->i; 7254 for (i = 0; i < mr; i++) { 7255 const PetscInt gr = rmap[i]; 7256 const PetscInt nz = ii[i+1] - ii[i]; 7257 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7258 else ncoo_oown += nz; /* this row is local */ 7259 } 7260 } else ncoo_d += mm->nz; 7261 } 7262 7263 /* 7264 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7265 7266 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7267 7268 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7269 7270 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7271 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7272 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7273 7274 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7275 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7276 */ 7277 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 7278 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7279 7280 /* gather (i,j) of nonzeros inserted by remote procs */ 7281 if (hasoffproc) { 7282 PetscSF msf; 7283 PetscInt ncoo2,*coo_i2,*coo_j2; 7284 7285 PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 7286 PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 7287 PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7288 7289 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7290 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7291 PetscInt *idxoff = mmdata->off[cp]; 7292 PetscInt *idxown = mmdata->own[cp]; 7293 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7294 const PetscInt *rmap = rmapa[cp]; 7295 const PetscInt *cmap = cmapa[cp]; 7296 const PetscInt *ii = mm->i; 7297 PetscInt *coi = coo_i + ncoo_o; 7298 PetscInt *coj = coo_j + ncoo_o; 7299 const PetscInt mr = mp[cp]->rmap->n; 7300 const PetscInt rs = C->rmap->rstart; 7301 const PetscInt re = C->rmap->rend; 7302 const PetscInt cs = C->cmap->rstart; 7303 for (i = 0; i < mr; i++) { 7304 const PetscInt *jj = mm->j + ii[i]; 7305 const PetscInt gr = rmap[i]; 7306 const PetscInt nz = ii[i+1] - ii[i]; 7307 if (gr < rs || gr >= re) { /* this is an offproc row */ 7308 for (j = ii[i]; j < ii[i+1]; j++) { 7309 *coi++ = gr; 7310 *idxoff++ = j; 7311 } 7312 if (!cmapt[cp]) { /* already global */ 7313 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7314 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7315 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7316 } else { /* offdiag */ 7317 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7318 } 7319 ncoo_o += nz; 7320 } else { /* this is a local row */ 7321 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7322 } 7323 } 7324 } 7325 mmdata->off[cp + 1] = idxoff; 7326 mmdata->own[cp + 1] = idxown; 7327 } 7328 7329 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7330 PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 7331 PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 7332 PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 7333 ncoo = ncoo_d + ncoo_oown + ncoo2; 7334 PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 7335 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7336 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 7337 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7338 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7339 PetscCall(PetscFree2(coo_i,coo_j)); 7340 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7341 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 7342 coo_i = coo_i2; 7343 coo_j = coo_j2; 7344 } else { /* no offproc values insertion */ 7345 ncoo = ncoo_d; 7346 PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7347 7348 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7349 PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 7350 PetscCall(PetscSFSetUp(mmdata->sf)); 7351 } 7352 mmdata->hasoffproc = hasoffproc; 7353 7354 /* gather (i,j) of nonzeros inserted locally */ 7355 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7356 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7357 PetscInt *coi = coo_i + ncoo_d; 7358 PetscInt *coj = coo_j + ncoo_d; 7359 const PetscInt *jj = mm->j; 7360 const PetscInt *ii = mm->i; 7361 const PetscInt *cmap = cmapa[cp]; 7362 const PetscInt *rmap = rmapa[cp]; 7363 const PetscInt mr = mp[cp]->rmap->n; 7364 const PetscInt rs = C->rmap->rstart; 7365 const PetscInt re = C->rmap->rend; 7366 const PetscInt cs = C->cmap->rstart; 7367 7368 if (mptmp[cp]) continue; 7369 if (rmapt[cp] == 1) { /* consecutive rows */ 7370 /* fill coo_i */ 7371 for (i = 0; i < mr; i++) { 7372 const PetscInt gr = i + rs; 7373 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7374 } 7375 /* fill coo_j */ 7376 if (!cmapt[cp]) { /* type-0, already global */ 7377 PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7378 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7379 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7380 } else { /* type-2, local to global for sparse columns */ 7381 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7382 } 7383 ncoo_d += mm->nz; 7384 } else if (rmapt[cp] == 2) { /* sparse rows */ 7385 for (i = 0; i < mr; i++) { 7386 const PetscInt *jj = mm->j + ii[i]; 7387 const PetscInt gr = rmap[i]; 7388 const PetscInt nz = ii[i+1] - ii[i]; 7389 if (gr >= rs && gr < re) { /* local rows */ 7390 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7391 if (!cmapt[cp]) { /* type-0, already global */ 7392 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7393 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7394 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7395 } else { /* type-2, local to global for sparse columns */ 7396 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7397 } 7398 ncoo_d += nz; 7399 } 7400 } 7401 } 7402 } 7403 if (glob) { 7404 PetscCall(ISRestoreIndices(glob,&globidx)); 7405 } 7406 PetscCall(ISDestroy(&glob)); 7407 if (P_oth_l2g) { 7408 PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 7409 } 7410 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7411 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7412 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 7413 7414 /* preallocate with COO data */ 7415 PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 7416 PetscCall(PetscFree2(coo_i,coo_j)); 7417 PetscFunctionReturn(0); 7418 } 7419 7420 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7421 { 7422 Mat_Product *product = mat->product; 7423 #if defined(PETSC_HAVE_DEVICE) 7424 PetscBool match = PETSC_FALSE; 7425 PetscBool usecpu = PETSC_FALSE; 7426 #else 7427 PetscBool match = PETSC_TRUE; 7428 #endif 7429 7430 PetscFunctionBegin; 7431 MatCheckProduct(mat,1); 7432 #if defined(PETSC_HAVE_DEVICE) 7433 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7434 PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 7435 } 7436 if (match) { /* we can always fallback to the CPU if requested */ 7437 switch (product->type) { 7438 case MATPRODUCT_AB: 7439 if (product->api_user) { 7440 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat"); 7441 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7442 PetscOptionsEnd(); 7443 } else { 7444 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat"); 7445 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7446 PetscOptionsEnd(); 7447 } 7448 break; 7449 case MATPRODUCT_AtB: 7450 if (product->api_user) { 7451 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat"); 7452 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7453 PetscOptionsEnd(); 7454 } else { 7455 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat"); 7456 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7457 PetscOptionsEnd(); 7458 } 7459 break; 7460 case MATPRODUCT_PtAP: 7461 if (product->api_user) { 7462 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat"); 7463 PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7464 PetscOptionsEnd(); 7465 } else { 7466 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat"); 7467 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7468 PetscOptionsEnd(); 7469 } 7470 break; 7471 default: 7472 break; 7473 } 7474 match = (PetscBool)!usecpu; 7475 } 7476 #endif 7477 if (match) { 7478 switch (product->type) { 7479 case MATPRODUCT_AB: 7480 case MATPRODUCT_AtB: 7481 case MATPRODUCT_PtAP: 7482 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7483 break; 7484 default: 7485 break; 7486 } 7487 } 7488 /* fallback to MPIAIJ ops */ 7489 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7490 PetscFunctionReturn(0); 7491 } 7492 7493 /* 7494 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7495 7496 n - the number of block indices in cc[] 7497 cc - the block indices (must be large enough to contain the indices) 7498 */ 7499 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc) 7500 { 7501 PetscInt cnt = -1,nidx,j; 7502 const PetscInt *idx; 7503 7504 PetscFunctionBegin; 7505 PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL)); 7506 if (nidx) { 7507 cnt = 0; 7508 cc[cnt] = idx[0]/bs; 7509 for (j=1; j<nidx; j++) { 7510 if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs; 7511 } 7512 } 7513 PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL)); 7514 *n = cnt+1; 7515 PetscFunctionReturn(0); 7516 } 7517 7518 /* 7519 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7520 7521 ncollapsed - the number of block indices 7522 collapsed - the block indices (must be large enough to contain the indices) 7523 */ 7524 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed) 7525 { 7526 PetscInt i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp; 7527 7528 PetscFunctionBegin; 7529 PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev)); 7530 for (i=start+1; i<start+bs; i++) { 7531 PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur)); 7532 PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged)); 7533 cprevtmp = cprev; cprev = merged; merged = cprevtmp; 7534 } 7535 *ncollapsed = nprev; 7536 if (collapsed) *collapsed = cprev; 7537 PetscFunctionReturn(0); 7538 } 7539 7540 /* -------------------------------------------------------------------------- */ 7541 /* 7542 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7543 7544 Input Parameter: 7545 . Amat - matrix 7546 - symmetrize - make the result symmetric 7547 + scale - scale with diagonal 7548 7549 Output Parameter: 7550 . a_Gmat - output scalar graph >= 0 7551 7552 */ 7553 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat) 7554 { 7555 PetscInt Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs; 7556 MPI_Comm comm; 7557 Mat Gmat; 7558 PetscBool ismpiaij,isseqaij; 7559 Mat a, b, c; 7560 MatType jtype; 7561 7562 PetscFunctionBegin; 7563 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 7564 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7565 PetscCall(MatGetSize(Amat, &MM, &NN)); 7566 PetscCall(MatGetBlockSize(Amat, &bs)); 7567 nloc = (Iend-Istart)/bs; 7568 7569 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij)); 7570 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij)); 7571 PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type"); 7572 7573 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7574 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7575 implementation */ 7576 if (bs > 1) { 7577 PetscCall(MatGetType(Amat,&jtype)); 7578 PetscCall(MatCreate(comm, &Gmat)); 7579 PetscCall(MatSetType(Gmat, jtype)); 7580 PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE)); 7581 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7582 if (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray) { 7583 PetscInt *d_nnz, *o_nnz; 7584 MatScalar *aa,val,AA[4096]; 7585 PetscInt *aj,*ai,AJ[4096],nc; 7586 if (isseqaij) { a = Amat; b = NULL; } 7587 else { 7588 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data; 7589 a = d->A; b = d->B; 7590 } 7591 PetscCall(PetscInfo(Amat,"New bs>1 Graph. nloc=%" PetscInt_FMT "\n",nloc)); 7592 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7593 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7594 PetscInt *nnz = (c==a) ? d_nnz : o_nnz, nmax=0; 7595 const PetscInt *cols; 7596 for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows 7597 PetscCall(MatGetRow(c,brow,&jj,&cols,NULL)); 7598 nnz[brow/bs] = jj/bs; 7599 if (jj%bs) ok = 0; 7600 if (cols) j0 = cols[0]; 7601 else j0 = -1; 7602 PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL)); 7603 if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs]; 7604 for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks 7605 PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL)); 7606 if (jj%bs) ok = 0; 7607 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7608 if (nnz[brow/bs] != jj/bs) ok = 0; 7609 PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL)); 7610 } 7611 if (!ok) { 7612 PetscCall(PetscFree2(d_nnz,o_nnz)); 7613 goto old_bs; 7614 } 7615 } 7616 PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax); 7617 } 7618 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7619 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7620 PetscCall(PetscFree2(d_nnz,o_nnz)); 7621 // diag 7622 for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows 7623 Mat_SeqAIJ *aseq = (Mat_SeqAIJ*)a->data; 7624 ai = aseq->i; 7625 n = ai[brow+1] - ai[brow]; 7626 aj = aseq->j + ai[brow]; 7627 for (int k=0; k<n; k += bs) { // block columns 7628 AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart) 7629 val = 0; 7630 for (int ii=0; ii<bs; ii++) { // rows in block 7631 aa = aseq->a + ai[brow+ii] + k; 7632 for (int jj=0; jj<bs; jj++) { // columns in block 7633 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7634 } 7635 } 7636 AA[k/bs] = val; 7637 } 7638 grow = Istart/bs + brow/bs; 7639 PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES)); 7640 } 7641 // off-diag 7642 if (ismpiaij) { 7643 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)Amat->data; 7644 const PetscScalar *vals; 7645 const PetscInt *cols, *garray = aij->garray; 7646 PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?"); 7647 for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows 7648 PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL)); 7649 for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) { 7650 AA[k/bs] = 0; 7651 AJ[cidx] = garray[cols[k]]/bs; 7652 } 7653 nc = ncols/bs; 7654 PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL)); 7655 for (int ii=0; ii<bs; ii++) { // rows in block 7656 PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals)); 7657 for (int k=0; k<ncols; k += bs) { 7658 for (int jj=0; jj<bs; jj++) { // cols in block 7659 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj])); 7660 } 7661 } 7662 PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals)); 7663 } 7664 grow = Istart/bs + brow/bs; 7665 PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES)); 7666 } 7667 } 7668 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7669 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7670 } else { 7671 const PetscScalar *vals; 7672 const PetscInt *idx; 7673 PetscInt *d_nnz, *o_nnz,*w0,*w1,*w2; 7674 old_bs: 7675 /* 7676 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7677 */ 7678 PetscCall(PetscInfo(Amat,"OLD bs>1 CreateGraph\n")); 7679 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7680 if (isseqaij) { 7681 PetscInt max_d_nnz; 7682 /* 7683 Determine exact preallocation count for (sequential) scalar matrix 7684 */ 7685 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz)); 7686 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7687 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7688 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) { 7689 PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7690 } 7691 PetscCall(PetscFree3(w0,w1,w2)); 7692 } else if (ismpiaij) { 7693 Mat Daij,Oaij; 7694 const PetscInt *garray; 7695 PetscInt max_d_nnz; 7696 PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray)); 7697 /* 7698 Determine exact preallocation count for diagonal block portion of scalar matrix 7699 */ 7700 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz)); 7701 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7702 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7703 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7704 PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7705 } 7706 PetscCall(PetscFree3(w0,w1,w2)); 7707 /* 7708 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7709 */ 7710 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7711 o_nnz[jj] = 0; 7712 for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */ 7713 PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7714 o_nnz[jj] += ncols; 7715 PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7716 } 7717 if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc; 7718 } 7719 } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type"); 7720 /* get scalar copy (norms) of matrix */ 7721 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7722 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7723 PetscCall(PetscFree2(d_nnz,o_nnz)); 7724 for (Ii = Istart; Ii < Iend; Ii++) { 7725 PetscInt dest_row = Ii/bs; 7726 PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals)); 7727 for (jj=0; jj<ncols; jj++) { 7728 PetscInt dest_col = idx[jj]/bs; 7729 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7730 PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES)); 7731 } 7732 PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals)); 7733 } 7734 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7735 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7736 } 7737 } else { 7738 /* TODO GPU: optimization proposal, each class provides fast implementation of this 7739 procedure via MatAbs API */ 7740 /* just copy scalar matrix & abs() */ 7741 PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7742 if (isseqaij) { a = Gmat; b = NULL; } 7743 else { 7744 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7745 a = d->A; b = d->B; 7746 } 7747 /* abs */ 7748 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7749 MatInfo info; 7750 PetscScalar *avals; 7751 PetscCall(MatGetInfo(c,MAT_LOCAL,&info)); 7752 PetscCall(MatSeqAIJGetArray(c,&avals)); 7753 for (int jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7754 PetscCall(MatSeqAIJRestoreArray(c,&avals)); 7755 } 7756 } 7757 if (symmetrize) { 7758 PetscBool issym; 7759 PetscCall(MatGetOption(Amat,MAT_SYMMETRIC,&issym)); 7760 if (!issym) { 7761 Mat matTrans; 7762 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7763 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7764 PetscCall(MatDestroy(&matTrans)); 7765 } 7766 PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE)); 7767 } else { 7768 PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7769 } 7770 if (scale) { 7771 /* scale c for all diagonal values = 1 or -1 */ 7772 Vec diag; 7773 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7774 PetscCall(MatGetDiagonal(Gmat, diag)); 7775 PetscCall(VecReciprocal(diag)); 7776 PetscCall(VecSqrtAbs(diag)); 7777 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7778 PetscCall(VecDestroy(&diag)); 7779 } 7780 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7781 *a_Gmat = Gmat; 7782 PetscFunctionReturn(0); 7783 } 7784 7785 /* -------------------------------------------------------------------------- */ 7786 /*@C 7787 MatFilter_AIJ - filter values with small absolute values 7788 With vfilter < 0 does nothing so should not be called. 7789 7790 Collective on Mat 7791 7792 Input Parameters: 7793 + Gmat - the graph 7794 . vfilter - threshold parameter [0,1) 7795 7796 Output Parameter: 7797 . filteredG - output filtered scalar graph 7798 7799 Level: developer 7800 7801 Notes: 7802 This is called before graph coarsers are called. 7803 This could go into Mat, move 'symm' to GAMG 7804 7805 .seealso: `PCGAMGSetThreshold()` 7806 @*/ 7807 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat,PetscReal vfilter, Mat *filteredG) 7808 { 7809 PetscInt Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc; 7810 Mat tGmat; 7811 MPI_Comm comm; 7812 const PetscScalar *vals; 7813 const PetscInt *idx; 7814 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols=0; 7815 MatScalar *AA; // this is checked in graph 7816 PetscBool isseqaij; 7817 Mat a, b, c; 7818 MatType jtype; 7819 7820 PetscFunctionBegin; 7821 PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm)); 7822 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij)); 7823 PetscCall(MatGetType(Gmat,&jtype)); 7824 PetscCall(MatCreate(comm, &tGmat)); 7825 PetscCall(MatSetType(tGmat, jtype)); 7826 7827 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7828 Also, if the matrix is symmetric, can we skip this 7829 operation? It can be very expensive on large matrices. */ 7830 7831 // global sizes 7832 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7833 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7834 nloc = Iend - Istart; 7835 PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz)); 7836 if (isseqaij) { a = Gmat; b = NULL; } 7837 else { 7838 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7839 a = d->A; b = d->B; 7840 garray = d->garray; 7841 } 7842 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7843 for (PetscInt row=0; row < nloc; row++) { 7844 PetscCall(MatGetRow(a,row,&ncols,NULL,NULL)); 7845 d_nnz[row] = ncols; 7846 if (ncols>maxcols) maxcols=ncols; 7847 PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL)); 7848 } 7849 if (b) { 7850 for (PetscInt row=0; row < nloc; row++) { 7851 PetscCall(MatGetRow(b,row,&ncols,NULL,NULL)); 7852 o_nnz[row] = ncols; 7853 if (ncols>maxcols) maxcols=ncols; 7854 PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL)); 7855 } 7856 } 7857 PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM)); 7858 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7859 PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz)); 7860 PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz)); 7861 PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 7862 PetscCall(PetscFree2(d_nnz,o_nnz)); 7863 // 7864 PetscCall(PetscMalloc2(maxcols, &AA,maxcols, &AJ)); 7865 nnz0 = nnz1 = 0; 7866 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7867 for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) { 7868 PetscCall(MatGetRow(c,row,&ncols,&idx,&vals)); 7869 for (ncol_row=jj=0; jj<ncols; jj++,nnz0++) { 7870 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7871 if (PetscRealPart(sv) > vfilter) { 7872 nnz1++; 7873 PetscInt cid = idx[jj] + Istart; //diag 7874 if (c!=a) cid = garray[idx[jj]]; 7875 AA[ncol_row] = vals[jj]; 7876 AJ[ncol_row] = cid; 7877 ncol_row++; 7878 } 7879 } 7880 PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals)); 7881 PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES)); 7882 } 7883 } 7884 PetscCall(PetscFree2(AA,AJ)); 7885 PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY)); 7886 PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY)); 7887 PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */ 7888 7889 PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", 7890 (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter, 7891 (!nloc) ? 1. : (double)nnz0/(double)nloc,MM,(int)maxcols)); 7892 7893 *filteredG = tGmat; 7894 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7895 PetscFunctionReturn(0); 7896 } 7897 7898 /* 7899 Special version for direct calls from Fortran 7900 */ 7901 #include <petsc/private/fortranimpl.h> 7902 7903 /* Change these macros so can be used in void function */ 7904 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7905 #undef PetscCall 7906 #define PetscCall(...) do { \ 7907 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7908 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7909 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7910 return; \ 7911 } \ 7912 } while (0) 7913 7914 #undef SETERRQ 7915 #define SETERRQ(comm,ierr,...) do { \ 7916 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 7917 return; \ 7918 } while (0) 7919 7920 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7921 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7922 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7923 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7924 #else 7925 #endif 7926 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 7927 { 7928 Mat mat = *mmat; 7929 PetscInt m = *mm, n = *mn; 7930 InsertMode addv = *maddv; 7931 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 7932 PetscScalar value; 7933 7934 MatCheckPreallocated(mat,1); 7935 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 7936 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 7937 { 7938 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 7939 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 7940 PetscBool roworiented = aij->roworiented; 7941 7942 /* Some Variables required in the macro */ 7943 Mat A = aij->A; 7944 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 7945 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 7946 MatScalar *aa; 7947 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 7948 Mat B = aij->B; 7949 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 7950 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 7951 MatScalar *ba; 7952 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 7953 * cannot use "#if defined" inside a macro. */ 7954 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 7955 7956 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 7957 PetscInt nonew = a->nonew; 7958 MatScalar *ap1,*ap2; 7959 7960 PetscFunctionBegin; 7961 PetscCall(MatSeqAIJGetArray(A,&aa)); 7962 PetscCall(MatSeqAIJGetArray(B,&ba)); 7963 for (i=0; i<m; i++) { 7964 if (im[i] < 0) continue; 7965 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 7966 if (im[i] >= rstart && im[i] < rend) { 7967 row = im[i] - rstart; 7968 lastcol1 = -1; 7969 rp1 = aj + ai[row]; 7970 ap1 = aa + ai[row]; 7971 rmax1 = aimax[row]; 7972 nrow1 = ailen[row]; 7973 low1 = 0; 7974 high1 = nrow1; 7975 lastcol2 = -1; 7976 rp2 = bj + bi[row]; 7977 ap2 = ba + bi[row]; 7978 rmax2 = bimax[row]; 7979 nrow2 = bilen[row]; 7980 low2 = 0; 7981 high2 = nrow2; 7982 7983 for (j=0; j<n; j++) { 7984 if (roworiented) value = v[i*n+j]; 7985 else value = v[i+j*m]; 7986 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 7987 if (in[j] >= cstart && in[j] < cend) { 7988 col = in[j] - cstart; 7989 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 7990 } else if (in[j] < 0) continue; 7991 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 7992 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 7993 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 7994 } else { 7995 if (mat->was_assembled) { 7996 if (!aij->colmap) { 7997 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 7998 } 7999 #if defined(PETSC_USE_CTABLE) 8000 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 8001 col--; 8002 #else 8003 col = aij->colmap[in[j]] - 1; 8004 #endif 8005 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 8006 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8007 col = in[j]; 8008 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8009 B = aij->B; 8010 b = (Mat_SeqAIJ*)B->data; 8011 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 8012 rp2 = bj + bi[row]; 8013 ap2 = ba + bi[row]; 8014 rmax2 = bimax[row]; 8015 nrow2 = bilen[row]; 8016 low2 = 0; 8017 high2 = nrow2; 8018 bm = aij->B->rmap->n; 8019 ba = b->a; 8020 inserted = PETSC_FALSE; 8021 } 8022 } else col = in[j]; 8023 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 8024 } 8025 } 8026 } else if (!aij->donotstash) { 8027 if (roworiented) { 8028 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8029 } else { 8030 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8031 } 8032 } 8033 } 8034 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 8035 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 8036 } 8037 PetscFunctionReturnVoid(); 8038 } 8039 8040 /* Undefining these here since they were redefined from their original definition above! No 8041 * other PETSc functions should be defined past this point, as it is impossible to recover the 8042 * original definitions */ 8043 #undef PetscCall 8044 #undef SETERRQ 8045