1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B)); 15 PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B)); 16 PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 17 PetscFunctionReturn(0); 18 } 19 20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 21 { 22 Mat B; 23 24 PetscFunctionBegin; 25 PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B)); 26 PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 27 PetscCall(MatDestroy(&B)); 28 PetscFunctionReturn(0); 29 } 30 31 /*MC 32 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 33 34 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 35 and MATMPIAIJ otherwise. As a result, for single process communicators, 36 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 37 for communicators controlling multiple processes. It is recommended that you call both of 38 the above preallocation routines for simplicity. 39 40 Options Database Keys: 41 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 42 43 Developer Notes: 44 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 45 enough exist. 46 47 Level: beginner 48 49 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 50 M*/ 51 52 /*MC 53 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 54 55 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 56 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 57 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 58 for communicators controlling multiple processes. It is recommended that you call both of 59 the above preallocation routines for simplicity. 60 61 Options Database Keys: 62 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 63 64 Level: beginner 65 66 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 67 M*/ 68 69 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 70 { 71 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 72 73 PetscFunctionBegin; 74 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 75 A->boundtocpu = flg; 76 #endif 77 if (a->A) PetscCall(MatBindToCPU(a->A,flg)); 78 if (a->B) PetscCall(MatBindToCPU(a->B,flg)); 79 80 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 81 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 82 * to differ from the parent matrix. */ 83 if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg)); 84 if (a->diag) PetscCall(VecBindToCPU(a->diag,flg)); 85 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 90 { 91 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 92 93 PetscFunctionBegin; 94 if (mat->A) { 95 PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 96 PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 97 } 98 PetscFunctionReturn(0); 99 } 100 101 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 102 { 103 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 104 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 105 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 106 const PetscInt *ia,*ib; 107 const MatScalar *aa,*bb,*aav,*bav; 108 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 109 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 110 111 PetscFunctionBegin; 112 *keptrows = NULL; 113 114 ia = a->i; 115 ib = b->i; 116 PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 117 PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 118 for (i=0; i<m; i++) { 119 na = ia[i+1] - ia[i]; 120 nb = ib[i+1] - ib[i]; 121 if (!na && !nb) { 122 cnt++; 123 goto ok1; 124 } 125 aa = aav + ia[i]; 126 for (j=0; j<na; j++) { 127 if (aa[j] != 0.0) goto ok1; 128 } 129 bb = bav + ib[i]; 130 for (j=0; j <nb; j++) { 131 if (bb[j] != 0.0) goto ok1; 132 } 133 cnt++; 134 ok1:; 135 } 136 PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 137 if (!n0rows) { 138 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 139 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 140 PetscFunctionReturn(0); 141 } 142 PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 143 cnt = 0; 144 for (i=0; i<m; i++) { 145 na = ia[i+1] - ia[i]; 146 nb = ib[i+1] - ib[i]; 147 if (!na && !nb) continue; 148 aa = aav + ia[i]; 149 for (j=0; j<na;j++) { 150 if (aa[j] != 0.0) { 151 rows[cnt++] = rstart + i; 152 goto ok2; 153 } 154 } 155 bb = bav + ib[i]; 156 for (j=0; j<nb; j++) { 157 if (bb[j] != 0.0) { 158 rows[cnt++] = rstart + i; 159 goto ok2; 160 } 161 } 162 ok2:; 163 } 164 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 165 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 166 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 167 PetscFunctionReturn(0); 168 } 169 170 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 171 { 172 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 173 PetscBool cong; 174 175 PetscFunctionBegin; 176 PetscCall(MatHasCongruentLayouts(Y,&cong)); 177 if (Y->assembled && cong) { 178 PetscCall(MatDiagonalSet(aij->A,D,is)); 179 } else { 180 PetscCall(MatDiagonalSet_Default(Y,D,is)); 181 } 182 PetscFunctionReturn(0); 183 } 184 185 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 188 PetscInt i,rstart,nrows,*rows; 189 190 PetscFunctionBegin; 191 *zrows = NULL; 192 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 193 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 194 for (i=0; i<nrows; i++) rows[i] += rstart; 195 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 200 { 201 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 202 PetscInt i,m,n,*garray = aij->garray; 203 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 204 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 205 PetscReal *work; 206 const PetscScalar *dummy; 207 208 PetscFunctionBegin; 209 PetscCall(MatGetSize(A,&m,&n)); 210 PetscCall(PetscCalloc1(n,&work)); 211 PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 212 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 213 PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 214 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 215 if (type == NORM_2) { 216 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 217 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 218 } 219 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 220 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 221 } 222 } else if (type == NORM_1) { 223 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 224 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 225 } 226 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 227 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 228 } 229 } else if (type == NORM_INFINITY) { 230 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 231 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 232 } 233 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 234 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 235 } 236 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 237 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 238 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 239 } 240 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 241 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 242 } 243 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 244 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 245 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 246 } 247 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 248 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 249 } 250 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 251 if (type == NORM_INFINITY) { 252 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 253 } else { 254 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 255 } 256 PetscCall(PetscFree(work)); 257 if (type == NORM_2) { 258 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 259 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 260 for (i=0; i<n; i++) reductions[i] /= m; 261 } 262 PetscFunctionReturn(0); 263 } 264 265 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 266 { 267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 268 IS sis,gis; 269 const PetscInt *isis,*igis; 270 PetscInt n,*iis,nsis,ngis,rstart,i; 271 272 PetscFunctionBegin; 273 PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 274 PetscCall(MatFindNonzeroRows(a->B,&gis)); 275 PetscCall(ISGetSize(gis,&ngis)); 276 PetscCall(ISGetSize(sis,&nsis)); 277 PetscCall(ISGetIndices(sis,&isis)); 278 PetscCall(ISGetIndices(gis,&igis)); 279 280 PetscCall(PetscMalloc1(ngis+nsis,&iis)); 281 PetscCall(PetscArraycpy(iis,igis,ngis)); 282 PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 283 n = ngis + nsis; 284 PetscCall(PetscSortRemoveDupsInt(&n,iis)); 285 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 286 for (i=0; i<n; i++) iis[i] += rstart; 287 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 288 289 PetscCall(ISRestoreIndices(sis,&isis)); 290 PetscCall(ISRestoreIndices(gis,&igis)); 291 PetscCall(ISDestroy(&sis)); 292 PetscCall(ISDestroy(&gis)); 293 PetscFunctionReturn(0); 294 } 295 296 /* 297 Local utility routine that creates a mapping from the global column 298 number to the local number in the off-diagonal part of the local 299 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 300 a slightly higher hash table cost; without it it is not scalable (each processor 301 has an order N integer array but is fast to access. 302 */ 303 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscInt n = aij->B->cmap->n,i; 307 308 PetscFunctionBegin; 309 PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 310 #if defined(PETSC_USE_CTABLE) 311 PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 312 for (i=0; i<n; i++) { 313 PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 314 } 315 #else 316 PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 317 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 318 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 319 #endif 320 PetscFunctionReturn(0); 321 } 322 323 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 324 { \ 325 if (col <= lastcol1) low1 = 0; \ 326 else high1 = nrow1; \ 327 lastcol1 = col;\ 328 while (high1-low1 > 5) { \ 329 t = (low1+high1)/2; \ 330 if (rp1[t] > col) high1 = t; \ 331 else low1 = t; \ 332 } \ 333 for (_i=low1; _i<high1; _i++) { \ 334 if (rp1[_i] > col) break; \ 335 if (rp1[_i] == col) { \ 336 if (addv == ADD_VALUES) { \ 337 ap1[_i] += value; \ 338 /* Not sure LogFlops will slow dow the code or not */ \ 339 (void)PetscLogFlops(1.0); \ 340 } \ 341 else ap1[_i] = value; \ 342 goto a_noinsert; \ 343 } \ 344 } \ 345 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 346 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 347 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 348 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 349 N = nrow1++ - 1; a->nz++; high1++; \ 350 /* shift up all the later entries in this row */ \ 351 PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 352 PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 353 rp1[_i] = col; \ 354 ap1[_i] = value; \ 355 A->nonzerostate++;\ 356 a_noinsert: ; \ 357 ailen[row] = nrow1; \ 358 } 359 360 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 361 { \ 362 if (col <= lastcol2) low2 = 0; \ 363 else high2 = nrow2; \ 364 lastcol2 = col; \ 365 while (high2-low2 > 5) { \ 366 t = (low2+high2)/2; \ 367 if (rp2[t] > col) high2 = t; \ 368 else low2 = t; \ 369 } \ 370 for (_i=low2; _i<high2; _i++) { \ 371 if (rp2[_i] > col) break; \ 372 if (rp2[_i] == col) { \ 373 if (addv == ADD_VALUES) { \ 374 ap2[_i] += value; \ 375 (void)PetscLogFlops(1.0); \ 376 } \ 377 else ap2[_i] = value; \ 378 goto b_noinsert; \ 379 } \ 380 } \ 381 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 382 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 383 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 384 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 385 N = nrow2++ - 1; b->nz++; high2++; \ 386 /* shift up all the later entries in this row */ \ 387 PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 388 PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 389 rp2[_i] = col; \ 390 ap2[_i] = value; \ 391 B->nonzerostate++; \ 392 b_noinsert: ; \ 393 bilen[row] = nrow2; \ 394 } 395 396 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 397 { 398 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 399 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 400 PetscInt l,*garray = mat->garray,diag; 401 PetscScalar *aa,*ba; 402 403 PetscFunctionBegin; 404 /* code only works for square matrices A */ 405 406 /* find size of row to the left of the diagonal part */ 407 PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 408 row = row - diag; 409 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 410 if (garray[b->j[b->i[row]+l]] > diag) break; 411 } 412 if (l) { 413 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 414 PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 415 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 416 } 417 418 /* diagonal part */ 419 if (a->i[row+1]-a->i[row]) { 420 PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 421 PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 422 PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 423 } 424 425 /* right of diagonal part */ 426 if (b->i[row+1]-b->i[row]-l) { 427 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 428 PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 429 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 430 } 431 PetscFunctionReturn(0); 432 } 433 434 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 435 { 436 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 437 PetscScalar value = 0.0; 438 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 439 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 440 PetscBool roworiented = aij->roworiented; 441 442 /* Some Variables required in the macro */ 443 Mat A = aij->A; 444 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 445 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 446 PetscBool ignorezeroentries = a->ignorezeroentries; 447 Mat B = aij->B; 448 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 449 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 450 MatScalar *aa,*ba; 451 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 452 PetscInt nonew; 453 MatScalar *ap1,*ap2; 454 455 PetscFunctionBegin; 456 PetscCall(MatSeqAIJGetArray(A,&aa)); 457 PetscCall(MatSeqAIJGetArray(B,&ba)); 458 for (i=0; i<m; i++) { 459 if (im[i] < 0) continue; 460 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 461 if (im[i] >= rstart && im[i] < rend) { 462 row = im[i] - rstart; 463 lastcol1 = -1; 464 rp1 = aj + ai[row]; 465 ap1 = aa + ai[row]; 466 rmax1 = aimax[row]; 467 nrow1 = ailen[row]; 468 low1 = 0; 469 high1 = nrow1; 470 lastcol2 = -1; 471 rp2 = bj + bi[row]; 472 ap2 = ba + bi[row]; 473 rmax2 = bimax[row]; 474 nrow2 = bilen[row]; 475 low2 = 0; 476 high2 = nrow2; 477 478 for (j=0; j<n; j++) { 479 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 480 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 481 if (in[j] >= cstart && in[j] < cend) { 482 col = in[j] - cstart; 483 nonew = a->nonew; 484 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 485 } else if (in[j] < 0) continue; 486 else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 487 else { 488 if (mat->was_assembled) { 489 if (!aij->colmap) { 490 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 491 } 492 #if defined(PETSC_USE_CTABLE) 493 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 494 col--; 495 #else 496 col = aij->colmap[in[j]] - 1; 497 #endif 498 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 499 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 500 col = in[j]; 501 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 502 B = aij->B; 503 b = (Mat_SeqAIJ*)B->data; 504 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 505 rp2 = bj + bi[row]; 506 ap2 = ba + bi[row]; 507 rmax2 = bimax[row]; 508 nrow2 = bilen[row]; 509 low2 = 0; 510 high2 = nrow2; 511 bm = aij->B->rmap->n; 512 ba = b->a; 513 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 514 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 515 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 516 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 517 } 518 } else col = in[j]; 519 nonew = b->nonew; 520 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 521 } 522 } 523 } else { 524 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 525 if (!aij->donotstash) { 526 mat->assembled = PETSC_FALSE; 527 if (roworiented) { 528 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 529 } else { 530 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 531 } 532 } 533 } 534 } 535 PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 536 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 537 PetscFunctionReturn(0); 538 } 539 540 /* 541 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 542 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 543 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 544 */ 545 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 546 { 547 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 548 Mat A = aij->A; /* diagonal part of the matrix */ 549 Mat B = aij->B; /* offdiagonal part of the matrix */ 550 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 551 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 552 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 553 PetscInt *ailen = a->ilen,*aj = a->j; 554 PetscInt *bilen = b->ilen,*bj = b->j; 555 PetscInt am = aij->A->rmap->n,j; 556 PetscInt diag_so_far = 0,dnz; 557 PetscInt offd_so_far = 0,onz; 558 559 PetscFunctionBegin; 560 /* Iterate over all rows of the matrix */ 561 for (j=0; j<am; j++) { 562 dnz = onz = 0; 563 /* Iterate over all non-zero columns of the current row */ 564 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 565 /* If column is in the diagonal */ 566 if (mat_j[col] >= cstart && mat_j[col] < cend) { 567 aj[diag_so_far++] = mat_j[col] - cstart; 568 dnz++; 569 } else { /* off-diagonal entries */ 570 bj[offd_so_far++] = mat_j[col]; 571 onz++; 572 } 573 } 574 ailen[j] = dnz; 575 bilen[j] = onz; 576 } 577 PetscFunctionReturn(0); 578 } 579 580 /* 581 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 582 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 583 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 584 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 585 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 586 */ 587 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 588 { 589 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 590 Mat A = aij->A; /* diagonal part of the matrix */ 591 Mat B = aij->B; /* offdiagonal part of the matrix */ 592 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 593 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 594 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 595 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 596 PetscInt *ailen = a->ilen,*aj = a->j; 597 PetscInt *bilen = b->ilen,*bj = b->j; 598 PetscInt am = aij->A->rmap->n,j; 599 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 600 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 601 PetscScalar *aa = a->a,*ba = b->a; 602 603 PetscFunctionBegin; 604 /* Iterate over all rows of the matrix */ 605 for (j=0; j<am; j++) { 606 dnz_row = onz_row = 0; 607 rowstart_offd = full_offd_i[j]; 608 rowstart_diag = full_diag_i[j]; 609 /* Iterate over all non-zero columns of the current row */ 610 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 611 /* If column is in the diagonal */ 612 if (mat_j[col] >= cstart && mat_j[col] < cend) { 613 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 614 aa[rowstart_diag+dnz_row] = mat_a[col]; 615 dnz_row++; 616 } else { /* off-diagonal entries */ 617 bj[rowstart_offd+onz_row] = mat_j[col]; 618 ba[rowstart_offd+onz_row] = mat_a[col]; 619 onz_row++; 620 } 621 } 622 ailen[j] = dnz_row; 623 bilen[j] = onz_row; 624 } 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 632 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 633 634 PetscFunctionBegin; 635 for (i=0; i<m; i++) { 636 if (idxm[i] < 0) continue; /* negative row */ 637 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 638 if (idxm[i] >= rstart && idxm[i] < rend) { 639 row = idxm[i] - rstart; 640 for (j=0; j<n; j++) { 641 if (idxn[j] < 0) continue; /* negative column */ 642 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 643 if (idxn[j] >= cstart && idxn[j] < cend) { 644 col = idxn[j] - cstart; 645 PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 646 } else { 647 if (!aij->colmap) { 648 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 649 } 650 #if defined(PETSC_USE_CTABLE) 651 PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 652 col--; 653 #else 654 col = aij->colmap[idxn[j]] - 1; 655 #endif 656 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 657 else { 658 PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 659 } 660 } 661 } 662 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 663 } 664 PetscFunctionReturn(0); 665 } 666 667 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 668 { 669 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 670 PetscInt nstash,reallocs; 671 672 PetscFunctionBegin; 673 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 674 675 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 676 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 677 PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 678 PetscFunctionReturn(0); 679 } 680 681 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 682 { 683 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 684 PetscMPIInt n; 685 PetscInt i,j,rstart,ncols,flg; 686 PetscInt *row,*col; 687 PetscBool other_disassembled; 688 PetscScalar *val; 689 690 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 691 692 PetscFunctionBegin; 693 if (!aij->donotstash && !mat->nooffprocentries) { 694 while (1) { 695 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 696 if (!flg) break; 697 698 for (i=0; i<n;) { 699 /* Now identify the consecutive vals belonging to the same row */ 700 for (j=i,rstart=row[j]; j<n; j++) { 701 if (row[j] != rstart) break; 702 } 703 if (j < n) ncols = j-i; 704 else ncols = n-i; 705 /* Now assemble all these values with a single function call */ 706 PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 707 i = j; 708 } 709 } 710 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 711 } 712 #if defined(PETSC_HAVE_DEVICE) 713 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 714 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 715 if (mat->boundtocpu) { 716 PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 717 PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 718 } 719 #endif 720 PetscCall(MatAssemblyBegin(aij->A,mode)); 721 PetscCall(MatAssemblyEnd(aij->A,mode)); 722 723 /* determine if any processor has disassembled, if so we must 724 also disassemble ourself, in order that we may reassemble. */ 725 /* 726 if nonzero structure of submatrix B cannot change then we know that 727 no processor disassembled thus we can skip this stuff 728 */ 729 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 730 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 731 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 732 PetscCall(MatDisAssemble_MPIAIJ(mat)); 733 } 734 } 735 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 736 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 737 } 738 PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 739 #if defined(PETSC_HAVE_DEVICE) 740 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 741 #endif 742 PetscCall(MatAssemblyBegin(aij->B,mode)); 743 PetscCall(MatAssemblyEnd(aij->B,mode)); 744 745 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 746 747 aij->rowvalues = NULL; 748 749 PetscCall(VecDestroy(&aij->diag)); 750 751 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 752 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 753 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 754 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 755 } 756 #if defined(PETSC_HAVE_DEVICE) 757 mat->offloadmask = PETSC_OFFLOAD_BOTH; 758 #endif 759 PetscFunctionReturn(0); 760 } 761 762 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 763 { 764 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 765 766 PetscFunctionBegin; 767 PetscCall(MatZeroEntries(l->A)); 768 PetscCall(MatZeroEntries(l->B)); 769 PetscFunctionReturn(0); 770 } 771 772 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 773 { 774 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 775 PetscObjectState sA, sB; 776 PetscInt *lrows; 777 PetscInt r, len; 778 PetscBool cong, lch, gch; 779 780 PetscFunctionBegin; 781 /* get locally owned rows */ 782 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 783 PetscCall(MatHasCongruentLayouts(A,&cong)); 784 /* fix right hand side if needed */ 785 if (x && b) { 786 const PetscScalar *xx; 787 PetscScalar *bb; 788 789 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 790 PetscCall(VecGetArrayRead(x, &xx)); 791 PetscCall(VecGetArray(b, &bb)); 792 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 793 PetscCall(VecRestoreArrayRead(x, &xx)); 794 PetscCall(VecRestoreArray(b, &bb)); 795 } 796 797 sA = mat->A->nonzerostate; 798 sB = mat->B->nonzerostate; 799 800 if (diag != 0.0 && cong) { 801 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 802 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 803 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 804 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 805 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 806 PetscInt nnwA, nnwB; 807 PetscBool nnzA, nnzB; 808 809 nnwA = aijA->nonew; 810 nnwB = aijB->nonew; 811 nnzA = aijA->keepnonzeropattern; 812 nnzB = aijB->keepnonzeropattern; 813 if (!nnzA) { 814 PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 815 aijA->nonew = 0; 816 } 817 if (!nnzB) { 818 PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 819 aijB->nonew = 0; 820 } 821 /* Must zero here before the next loop */ 822 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 823 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 824 for (r = 0; r < len; ++r) { 825 const PetscInt row = lrows[r] + A->rmap->rstart; 826 if (row >= A->cmap->N) continue; 827 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 828 } 829 aijA->nonew = nnwA; 830 aijB->nonew = nnwB; 831 } else { 832 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 833 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 834 } 835 PetscCall(PetscFree(lrows)); 836 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 837 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 838 839 /* reduce nonzerostate */ 840 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 841 PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 842 if (gch) A->nonzerostate++; 843 PetscFunctionReturn(0); 844 } 845 846 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 847 { 848 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 849 PetscMPIInt n = A->rmap->n; 850 PetscInt i,j,r,m,len = 0; 851 PetscInt *lrows,*owners = A->rmap->range; 852 PetscMPIInt p = 0; 853 PetscSFNode *rrows; 854 PetscSF sf; 855 const PetscScalar *xx; 856 PetscScalar *bb,*mask,*aij_a; 857 Vec xmask,lmask; 858 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 859 const PetscInt *aj, *ii,*ridx; 860 PetscScalar *aa; 861 862 PetscFunctionBegin; 863 /* Create SF where leaves are input rows and roots are owned rows */ 864 PetscCall(PetscMalloc1(n, &lrows)); 865 for (r = 0; r < n; ++r) lrows[r] = -1; 866 PetscCall(PetscMalloc1(N, &rrows)); 867 for (r = 0; r < N; ++r) { 868 const PetscInt idx = rows[r]; 869 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 870 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 871 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 872 } 873 rrows[r].rank = p; 874 rrows[r].index = rows[r] - owners[p]; 875 } 876 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 877 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 878 /* Collect flags for rows to be zeroed */ 879 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 880 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 881 PetscCall(PetscSFDestroy(&sf)); 882 /* Compress and put in row numbers */ 883 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 884 /* zero diagonal part of matrix */ 885 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 886 /* handle off diagonal part of matrix */ 887 PetscCall(MatCreateVecs(A,&xmask,NULL)); 888 PetscCall(VecDuplicate(l->lvec,&lmask)); 889 PetscCall(VecGetArray(xmask,&bb)); 890 for (i=0; i<len; i++) bb[lrows[i]] = 1; 891 PetscCall(VecRestoreArray(xmask,&bb)); 892 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 893 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 894 PetscCall(VecDestroy(&xmask)); 895 if (x && b) { /* this code is buggy when the row and column layout don't match */ 896 PetscBool cong; 897 898 PetscCall(MatHasCongruentLayouts(A,&cong)); 899 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 900 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 901 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 902 PetscCall(VecGetArrayRead(l->lvec,&xx)); 903 PetscCall(VecGetArray(b,&bb)); 904 } 905 PetscCall(VecGetArray(lmask,&mask)); 906 /* remove zeroed rows of off diagonal matrix */ 907 PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 908 ii = aij->i; 909 for (i=0; i<len; i++) { 910 PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 911 } 912 /* loop over all elements of off process part of matrix zeroing removed columns*/ 913 if (aij->compressedrow.use) { 914 m = aij->compressedrow.nrows; 915 ii = aij->compressedrow.i; 916 ridx = aij->compressedrow.rindex; 917 for (i=0; i<m; i++) { 918 n = ii[i+1] - ii[i]; 919 aj = aij->j + ii[i]; 920 aa = aij_a + ii[i]; 921 922 for (j=0; j<n; j++) { 923 if (PetscAbsScalar(mask[*aj])) { 924 if (b) bb[*ridx] -= *aa*xx[*aj]; 925 *aa = 0.0; 926 } 927 aa++; 928 aj++; 929 } 930 ridx++; 931 } 932 } else { /* do not use compressed row format */ 933 m = l->B->rmap->n; 934 for (i=0; i<m; i++) { 935 n = ii[i+1] - ii[i]; 936 aj = aij->j + ii[i]; 937 aa = aij_a + ii[i]; 938 for (j=0; j<n; j++) { 939 if (PetscAbsScalar(mask[*aj])) { 940 if (b) bb[i] -= *aa*xx[*aj]; 941 *aa = 0.0; 942 } 943 aa++; 944 aj++; 945 } 946 } 947 } 948 if (x && b) { 949 PetscCall(VecRestoreArray(b,&bb)); 950 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 951 } 952 PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 953 PetscCall(VecRestoreArray(lmask,&mask)); 954 PetscCall(VecDestroy(&lmask)); 955 PetscCall(PetscFree(lrows)); 956 957 /* only change matrix nonzero state if pattern was allowed to be changed */ 958 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 959 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 960 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 961 } 962 PetscFunctionReturn(0); 963 } 964 965 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 966 { 967 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 968 PetscInt nt; 969 VecScatter Mvctx = a->Mvctx; 970 971 PetscFunctionBegin; 972 PetscCall(VecGetLocalSize(xx,&nt)); 973 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 974 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 975 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 976 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 977 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 982 { 983 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 984 985 PetscFunctionBegin; 986 PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 987 PetscFunctionReturn(0); 988 } 989 990 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 991 { 992 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 993 VecScatter Mvctx = a->Mvctx; 994 995 PetscFunctionBegin; 996 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 997 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 998 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 999 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1004 { 1005 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1006 1007 PetscFunctionBegin; 1008 /* do nondiagonal part */ 1009 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1010 /* do local part */ 1011 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1012 /* add partial results together */ 1013 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1014 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1015 PetscFunctionReturn(0); 1016 } 1017 1018 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1019 { 1020 MPI_Comm comm; 1021 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1022 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1023 IS Me,Notme; 1024 PetscInt M,N,first,last,*notme,i; 1025 PetscBool lf; 1026 PetscMPIInt size; 1027 1028 PetscFunctionBegin; 1029 /* Easy test: symmetric diagonal block */ 1030 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1031 PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 1032 PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1033 if (!*f) PetscFunctionReturn(0); 1034 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 1035 PetscCallMPI(MPI_Comm_size(comm,&size)); 1036 if (size == 1) PetscFunctionReturn(0); 1037 1038 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1039 PetscCall(MatGetSize(Amat,&M,&N)); 1040 PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 1041 PetscCall(PetscMalloc1(N-last+first,¬me)); 1042 for (i=0; i<first; i++) notme[i] = i; 1043 for (i=last; i<M; i++) notme[i-last+first] = i; 1044 PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 1045 PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 1046 PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 1047 Aoff = Aoffs[0]; 1048 PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 1049 Boff = Boffs[0]; 1050 PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 1051 PetscCall(MatDestroyMatrices(1,&Aoffs)); 1052 PetscCall(MatDestroyMatrices(1,&Boffs)); 1053 PetscCall(ISDestroy(&Me)); 1054 PetscCall(ISDestroy(&Notme)); 1055 PetscCall(PetscFree(notme)); 1056 PetscFunctionReturn(0); 1057 } 1058 1059 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1060 { 1061 PetscFunctionBegin; 1062 PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1067 { 1068 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1069 1070 PetscFunctionBegin; 1071 /* do nondiagonal part */ 1072 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1073 /* do local part */ 1074 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1075 /* add partial results together */ 1076 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1077 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1078 PetscFunctionReturn(0); 1079 } 1080 1081 /* 1082 This only works correctly for square matrices where the subblock A->A is the 1083 diagonal block 1084 */ 1085 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1086 { 1087 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1088 1089 PetscFunctionBegin; 1090 PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1091 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1092 PetscCall(MatGetDiagonal(a->A,v)); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1097 { 1098 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1099 1100 PetscFunctionBegin; 1101 PetscCall(MatScale(a->A,aa)); 1102 PetscCall(MatScale(a->B,aa)); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1107 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1108 { 1109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1110 1111 PetscFunctionBegin; 1112 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1113 PetscCall(PetscFree(aij->Aperm1)); 1114 PetscCall(PetscFree(aij->Bperm1)); 1115 PetscCall(PetscFree(aij->Ajmap1)); 1116 PetscCall(PetscFree(aij->Bjmap1)); 1117 1118 PetscCall(PetscFree(aij->Aimap2)); 1119 PetscCall(PetscFree(aij->Bimap2)); 1120 PetscCall(PetscFree(aij->Aperm2)); 1121 PetscCall(PetscFree(aij->Bperm2)); 1122 PetscCall(PetscFree(aij->Ajmap2)); 1123 PetscCall(PetscFree(aij->Bjmap2)); 1124 1125 PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 1126 PetscCall(PetscFree(aij->Cperm1)); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1131 { 1132 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1133 1134 PetscFunctionBegin; 1135 #if defined(PETSC_USE_LOG) 1136 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1137 #endif 1138 PetscCall(MatStashDestroy_Private(&mat->stash)); 1139 PetscCall(VecDestroy(&aij->diag)); 1140 PetscCall(MatDestroy(&aij->A)); 1141 PetscCall(MatDestroy(&aij->B)); 1142 #if defined(PETSC_USE_CTABLE) 1143 PetscCall(PetscTableDestroy(&aij->colmap)); 1144 #else 1145 PetscCall(PetscFree(aij->colmap)); 1146 #endif 1147 PetscCall(PetscFree(aij->garray)); 1148 PetscCall(VecDestroy(&aij->lvec)); 1149 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1150 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 1151 PetscCall(PetscFree(aij->ld)); 1152 1153 /* Free COO */ 1154 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1155 1156 PetscCall(PetscFree(mat->data)); 1157 1158 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1159 PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 1160 1161 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1162 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1164 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 1166 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 1167 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1169 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 1171 #if defined(PETSC_HAVE_CUDA) 1172 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 1173 #endif 1174 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1175 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 1176 #endif 1177 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 1178 #if defined(PETSC_HAVE_ELEMENTAL) 1179 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 1180 #endif 1181 #if defined(PETSC_HAVE_SCALAPACK) 1182 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1183 #endif 1184 #if defined(PETSC_HAVE_HYPRE) 1185 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 1186 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 1187 #endif 1188 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1189 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 1190 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 1192 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 1194 #if defined(PETSC_HAVE_MKL_SPARSE) 1195 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 1196 #endif 1197 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 1198 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1199 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 1200 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 1201 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1206 { 1207 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1208 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1209 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1210 const PetscInt *garray = aij->garray; 1211 const PetscScalar *aa,*ba; 1212 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1213 PetscInt *rowlens; 1214 PetscInt *colidxs; 1215 PetscScalar *matvals; 1216 1217 PetscFunctionBegin; 1218 PetscCall(PetscViewerSetUp(viewer)); 1219 1220 M = mat->rmap->N; 1221 N = mat->cmap->N; 1222 m = mat->rmap->n; 1223 rs = mat->rmap->rstart; 1224 cs = mat->cmap->rstart; 1225 nz = A->nz + B->nz; 1226 1227 /* write matrix header */ 1228 header[0] = MAT_FILE_CLASSID; 1229 header[1] = M; header[2] = N; header[3] = nz; 1230 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1231 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1232 1233 /* fill in and store row lengths */ 1234 PetscCall(PetscMalloc1(m,&rowlens)); 1235 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1236 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1237 PetscCall(PetscFree(rowlens)); 1238 1239 /* fill in and store column indices */ 1240 PetscCall(PetscMalloc1(nz,&colidxs)); 1241 for (cnt=0, i=0; i<m; i++) { 1242 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1243 if (garray[B->j[jb]] > cs) break; 1244 colidxs[cnt++] = garray[B->j[jb]]; 1245 } 1246 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1247 colidxs[cnt++] = A->j[ja] + cs; 1248 for (; jb<B->i[i+1]; jb++) 1249 colidxs[cnt++] = garray[B->j[jb]]; 1250 } 1251 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1252 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 1253 PetscCall(PetscFree(colidxs)); 1254 1255 /* fill in and store nonzero values */ 1256 PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 1257 PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 1258 PetscCall(PetscMalloc1(nz,&matvals)); 1259 for (cnt=0, i=0; i<m; i++) { 1260 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1261 if (garray[B->j[jb]] > cs) break; 1262 matvals[cnt++] = ba[jb]; 1263 } 1264 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1265 matvals[cnt++] = aa[ja]; 1266 for (; jb<B->i[i+1]; jb++) 1267 matvals[cnt++] = ba[jb]; 1268 } 1269 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 1270 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 1271 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1272 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 1273 PetscCall(PetscFree(matvals)); 1274 1275 /* write block size option to the viewer's .info file */ 1276 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #include <petscdraw.h> 1281 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1282 { 1283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1284 PetscMPIInt rank = aij->rank,size = aij->size; 1285 PetscBool isdraw,iascii,isbinary; 1286 PetscViewer sviewer; 1287 PetscViewerFormat format; 1288 1289 PetscFunctionBegin; 1290 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1291 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1292 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1293 if (iascii) { 1294 PetscCall(PetscViewerGetFormat(viewer,&format)); 1295 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1296 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1297 PetscCall(PetscMalloc1(size,&nz)); 1298 PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1299 for (i=0; i<(PetscInt)size; i++) { 1300 nmax = PetscMax(nmax,nz[i]); 1301 nmin = PetscMin(nmin,nz[i]); 1302 navg += nz[i]; 1303 } 1304 PetscCall(PetscFree(nz)); 1305 navg = navg/size; 1306 PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1307 PetscFunctionReturn(0); 1308 } 1309 PetscCall(PetscViewerGetFormat(viewer,&format)); 1310 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1311 MatInfo info; 1312 PetscInt *inodes=NULL; 1313 1314 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 1315 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 1316 PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 1317 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1318 if (!inodes) { 1319 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1320 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1321 } else { 1322 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1323 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1324 } 1325 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 1326 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1327 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 1328 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1329 PetscCall(PetscViewerFlush(viewer)); 1330 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1331 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 1332 PetscCall(VecScatterView(aij->Mvctx,viewer)); 1333 PetscFunctionReturn(0); 1334 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1335 PetscInt inodecount,inodelimit,*inodes; 1336 PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1337 if (inodes) { 1338 PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1339 } else { 1340 PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1341 } 1342 PetscFunctionReturn(0); 1343 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1344 PetscFunctionReturn(0); 1345 } 1346 } else if (isbinary) { 1347 if (size == 1) { 1348 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1349 PetscCall(MatView(aij->A,viewer)); 1350 } else { 1351 PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 1352 } 1353 PetscFunctionReturn(0); 1354 } else if (iascii && size == 1) { 1355 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1356 PetscCall(MatView(aij->A,viewer)); 1357 PetscFunctionReturn(0); 1358 } else if (isdraw) { 1359 PetscDraw draw; 1360 PetscBool isnull; 1361 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1362 PetscCall(PetscDrawIsNull(draw,&isnull)); 1363 if (isnull) PetscFunctionReturn(0); 1364 } 1365 1366 { /* assemble the entire matrix onto first processor */ 1367 Mat A = NULL, Av; 1368 IS isrow,iscol; 1369 1370 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1371 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1372 PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 1373 PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 1374 /* The commented code uses MatCreateSubMatrices instead */ 1375 /* 1376 Mat *AA, A = NULL, Av; 1377 IS isrow,iscol; 1378 1379 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1380 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1381 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1382 if (rank == 0) { 1383 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1384 A = AA[0]; 1385 Av = AA[0]; 1386 } 1387 PetscCall(MatDestroySubMatrices(1,&AA)); 1388 */ 1389 PetscCall(ISDestroy(&iscol)); 1390 PetscCall(ISDestroy(&isrow)); 1391 /* 1392 Everyone has to call to draw the matrix since the graphics waits are 1393 synchronized across all processors that share the PetscDraw object 1394 */ 1395 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1396 if (rank == 0) { 1397 if (((PetscObject)mat)->name) { 1398 PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 1399 } 1400 PetscCall(MatView_SeqAIJ(Av,sviewer)); 1401 } 1402 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1403 PetscCall(PetscViewerFlush(viewer)); 1404 PetscCall(MatDestroy(&A)); 1405 } 1406 PetscFunctionReturn(0); 1407 } 1408 1409 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1410 { 1411 PetscBool iascii,isdraw,issocket,isbinary; 1412 1413 PetscFunctionBegin; 1414 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1415 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1416 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1417 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1418 if (iascii || isdraw || isbinary || issocket) { 1419 PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1420 } 1421 PetscFunctionReturn(0); 1422 } 1423 1424 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1425 { 1426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1427 Vec bb1 = NULL; 1428 PetscBool hasop; 1429 1430 PetscFunctionBegin; 1431 if (flag == SOR_APPLY_UPPER) { 1432 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1437 PetscCall(VecDuplicate(bb,&bb1)); 1438 } 1439 1440 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1441 if (flag & SOR_ZERO_INITIAL_GUESS) { 1442 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1443 its--; 1444 } 1445 1446 while (its--) { 1447 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1448 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1449 1450 /* update rhs: bb1 = bb - B*x */ 1451 PetscCall(VecScale(mat->lvec,-1.0)); 1452 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1453 1454 /* local sweep */ 1455 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 1456 } 1457 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1458 if (flag & SOR_ZERO_INITIAL_GUESS) { 1459 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1460 its--; 1461 } 1462 while (its--) { 1463 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1464 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1465 1466 /* update rhs: bb1 = bb - B*x */ 1467 PetscCall(VecScale(mat->lvec,-1.0)); 1468 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1469 1470 /* local sweep */ 1471 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 1472 } 1473 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1474 if (flag & SOR_ZERO_INITIAL_GUESS) { 1475 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1476 its--; 1477 } 1478 while (its--) { 1479 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1480 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1481 1482 /* update rhs: bb1 = bb - B*x */ 1483 PetscCall(VecScale(mat->lvec,-1.0)); 1484 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1485 1486 /* local sweep */ 1487 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 1488 } 1489 } else if (flag & SOR_EISENSTAT) { 1490 Vec xx1; 1491 1492 PetscCall(VecDuplicate(bb,&xx1)); 1493 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1494 1495 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1496 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1497 if (!mat->diag) { 1498 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 1499 PetscCall(MatGetDiagonal(matin,mat->diag)); 1500 } 1501 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1502 if (hasop) { 1503 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1504 } else { 1505 PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1506 } 1507 PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1508 1509 PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1510 1511 /* local sweep */ 1512 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 1513 PetscCall(VecAXPY(xx,1.0,xx1)); 1514 PetscCall(VecDestroy(&xx1)); 1515 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1516 1517 PetscCall(VecDestroy(&bb1)); 1518 1519 matin->factorerrortype = mat->A->factorerrortype; 1520 PetscFunctionReturn(0); 1521 } 1522 1523 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1524 { 1525 Mat aA,aB,Aperm; 1526 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1527 PetscScalar *aa,*ba; 1528 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1529 PetscSF rowsf,sf; 1530 IS parcolp = NULL; 1531 PetscBool done; 1532 1533 PetscFunctionBegin; 1534 PetscCall(MatGetLocalSize(A,&m,&n)); 1535 PetscCall(ISGetIndices(rowp,&rwant)); 1536 PetscCall(ISGetIndices(colp,&cwant)); 1537 PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 1538 1539 /* Invert row permutation to find out where my rows should go */ 1540 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 1541 PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 1542 PetscCall(PetscSFSetFromOptions(rowsf)); 1543 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1544 PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1545 PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1546 1547 /* Invert column permutation to find out where my columns should go */ 1548 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1549 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 1550 PetscCall(PetscSFSetFromOptions(sf)); 1551 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1552 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1553 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1554 PetscCall(PetscSFDestroy(&sf)); 1555 1556 PetscCall(ISRestoreIndices(rowp,&rwant)); 1557 PetscCall(ISRestoreIndices(colp,&cwant)); 1558 PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 1559 1560 /* Find out where my gcols should go */ 1561 PetscCall(MatGetSize(aB,NULL,&ng)); 1562 PetscCall(PetscMalloc1(ng,&gcdest)); 1563 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1564 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 1565 PetscCall(PetscSFSetFromOptions(sf)); 1566 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1567 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1568 PetscCall(PetscSFDestroy(&sf)); 1569 1570 PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 1571 PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1572 PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1573 for (i=0; i<m; i++) { 1574 PetscInt row = rdest[i]; 1575 PetscMPIInt rowner; 1576 PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 1577 for (j=ai[i]; j<ai[i+1]; j++) { 1578 PetscInt col = cdest[aj[j]]; 1579 PetscMPIInt cowner; 1580 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 1581 if (rowner == cowner) dnnz[i]++; 1582 else onnz[i]++; 1583 } 1584 for (j=bi[i]; j<bi[i+1]; j++) { 1585 PetscInt col = gcdest[bj[j]]; 1586 PetscMPIInt cowner; 1587 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 1588 if (rowner == cowner) dnnz[i]++; 1589 else onnz[i]++; 1590 } 1591 } 1592 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1593 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1594 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1595 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1596 PetscCall(PetscSFDestroy(&rowsf)); 1597 1598 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 1599 PetscCall(MatSeqAIJGetArray(aA,&aa)); 1600 PetscCall(MatSeqAIJGetArray(aB,&ba)); 1601 for (i=0; i<m; i++) { 1602 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1603 PetscInt j0,rowlen; 1604 rowlen = ai[i+1] - ai[i]; 1605 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1606 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1607 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1608 } 1609 rowlen = bi[i+1] - bi[i]; 1610 for (j0=j=0; j<rowlen; j0=j) { 1611 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1612 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1613 } 1614 } 1615 PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 1616 PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 1617 PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1618 PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1619 PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 1620 PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 1621 PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 1622 PetscCall(PetscFree3(work,rdest,cdest)); 1623 PetscCall(PetscFree(gcdest)); 1624 if (parcolp) PetscCall(ISDestroy(&colp)); 1625 *B = Aperm; 1626 PetscFunctionReturn(0); 1627 } 1628 1629 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1630 { 1631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1632 1633 PetscFunctionBegin; 1634 PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1635 if (ghosts) *ghosts = aij->garray; 1636 PetscFunctionReturn(0); 1637 } 1638 1639 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1640 { 1641 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1642 Mat A = mat->A,B = mat->B; 1643 PetscLogDouble isend[5],irecv[5]; 1644 1645 PetscFunctionBegin; 1646 info->block_size = 1.0; 1647 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1648 1649 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1650 isend[3] = info->memory; isend[4] = info->mallocs; 1651 1652 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1653 1654 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1655 isend[3] += info->memory; isend[4] += info->mallocs; 1656 if (flag == MAT_LOCAL) { 1657 info->nz_used = isend[0]; 1658 info->nz_allocated = isend[1]; 1659 info->nz_unneeded = isend[2]; 1660 info->memory = isend[3]; 1661 info->mallocs = isend[4]; 1662 } else if (flag == MAT_GLOBAL_MAX) { 1663 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1664 1665 info->nz_used = irecv[0]; 1666 info->nz_allocated = irecv[1]; 1667 info->nz_unneeded = irecv[2]; 1668 info->memory = irecv[3]; 1669 info->mallocs = irecv[4]; 1670 } else if (flag == MAT_GLOBAL_SUM) { 1671 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1672 1673 info->nz_used = irecv[0]; 1674 info->nz_allocated = irecv[1]; 1675 info->nz_unneeded = irecv[2]; 1676 info->memory = irecv[3]; 1677 info->mallocs = irecv[4]; 1678 } 1679 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1680 info->fill_ratio_needed = 0; 1681 info->factor_mallocs = 0; 1682 PetscFunctionReturn(0); 1683 } 1684 1685 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1686 { 1687 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1688 1689 PetscFunctionBegin; 1690 switch (op) { 1691 case MAT_NEW_NONZERO_LOCATIONS: 1692 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1693 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1694 case MAT_KEEP_NONZERO_PATTERN: 1695 case MAT_NEW_NONZERO_LOCATION_ERR: 1696 case MAT_USE_INODES: 1697 case MAT_IGNORE_ZERO_ENTRIES: 1698 case MAT_FORM_EXPLICIT_TRANSPOSE: 1699 MatCheckPreallocated(A,1); 1700 PetscCall(MatSetOption(a->A,op,flg)); 1701 PetscCall(MatSetOption(a->B,op,flg)); 1702 break; 1703 case MAT_ROW_ORIENTED: 1704 MatCheckPreallocated(A,1); 1705 a->roworiented = flg; 1706 1707 PetscCall(MatSetOption(a->A,op,flg)); 1708 PetscCall(MatSetOption(a->B,op,flg)); 1709 break; 1710 case MAT_FORCE_DIAGONAL_ENTRIES: 1711 case MAT_SORTED_FULL: 1712 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1713 break; 1714 case MAT_IGNORE_OFF_PROC_ENTRIES: 1715 a->donotstash = flg; 1716 break; 1717 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1718 case MAT_SPD: 1719 case MAT_SYMMETRIC: 1720 case MAT_STRUCTURALLY_SYMMETRIC: 1721 case MAT_HERMITIAN: 1722 case MAT_SYMMETRY_ETERNAL: 1723 break; 1724 case MAT_SUBMAT_SINGLEIS: 1725 A->submat_singleis = flg; 1726 break; 1727 case MAT_STRUCTURE_ONLY: 1728 /* The option is handled directly by MatSetOption() */ 1729 break; 1730 default: 1731 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1732 } 1733 PetscFunctionReturn(0); 1734 } 1735 1736 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1737 { 1738 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1739 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1740 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1741 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1742 PetscInt *cmap,*idx_p; 1743 1744 PetscFunctionBegin; 1745 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1746 mat->getrowactive = PETSC_TRUE; 1747 1748 if (!mat->rowvalues && (idx || v)) { 1749 /* 1750 allocate enough space to hold information from the longest row. 1751 */ 1752 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1753 PetscInt max = 1,tmp; 1754 for (i=0; i<matin->rmap->n; i++) { 1755 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1756 if (max < tmp) max = tmp; 1757 } 1758 PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 1759 } 1760 1761 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1762 lrow = row - rstart; 1763 1764 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1765 if (!v) {pvA = NULL; pvB = NULL;} 1766 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1767 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1768 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1769 nztot = nzA + nzB; 1770 1771 cmap = mat->garray; 1772 if (v || idx) { 1773 if (nztot) { 1774 /* Sort by increasing column numbers, assuming A and B already sorted */ 1775 PetscInt imark = -1; 1776 if (v) { 1777 *v = v_p = mat->rowvalues; 1778 for (i=0; i<nzB; i++) { 1779 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1780 else break; 1781 } 1782 imark = i; 1783 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1784 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1785 } 1786 if (idx) { 1787 *idx = idx_p = mat->rowindices; 1788 if (imark > -1) { 1789 for (i=0; i<imark; i++) { 1790 idx_p[i] = cmap[cworkB[i]]; 1791 } 1792 } else { 1793 for (i=0; i<nzB; i++) { 1794 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1795 else break; 1796 } 1797 imark = i; 1798 } 1799 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1800 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1801 } 1802 } else { 1803 if (idx) *idx = NULL; 1804 if (v) *v = NULL; 1805 } 1806 } 1807 *nz = nztot; 1808 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1809 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1810 PetscFunctionReturn(0); 1811 } 1812 1813 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1814 { 1815 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1816 1817 PetscFunctionBegin; 1818 PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1819 aij->getrowactive = PETSC_FALSE; 1820 PetscFunctionReturn(0); 1821 } 1822 1823 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1824 { 1825 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1826 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1827 PetscInt i,j,cstart = mat->cmap->rstart; 1828 PetscReal sum = 0.0; 1829 const MatScalar *v,*amata,*bmata; 1830 1831 PetscFunctionBegin; 1832 if (aij->size == 1) { 1833 PetscCall(MatNorm(aij->A,type,norm)); 1834 } else { 1835 PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 1836 PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 1837 if (type == NORM_FROBENIUS) { 1838 v = amata; 1839 for (i=0; i<amat->nz; i++) { 1840 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1841 } 1842 v = bmata; 1843 for (i=0; i<bmat->nz; i++) { 1844 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1845 } 1846 PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1847 *norm = PetscSqrtReal(*norm); 1848 PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 1849 } else if (type == NORM_1) { /* max column norm */ 1850 PetscReal *tmp,*tmp2; 1851 PetscInt *jj,*garray = aij->garray; 1852 PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 1853 PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 1854 *norm = 0.0; 1855 v = amata; jj = amat->j; 1856 for (j=0; j<amat->nz; j++) { 1857 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1858 } 1859 v = bmata; jj = bmat->j; 1860 for (j=0; j<bmat->nz; j++) { 1861 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1862 } 1863 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1864 for (j=0; j<mat->cmap->N; j++) { 1865 if (tmp2[j] > *norm) *norm = tmp2[j]; 1866 } 1867 PetscCall(PetscFree(tmp)); 1868 PetscCall(PetscFree(tmp2)); 1869 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1870 } else if (type == NORM_INFINITY) { /* max row norm */ 1871 PetscReal ntemp = 0.0; 1872 for (j=0; j<aij->A->rmap->n; j++) { 1873 v = amata + amat->i[j]; 1874 sum = 0.0; 1875 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1876 sum += PetscAbsScalar(*v); v++; 1877 } 1878 v = bmata + bmat->i[j]; 1879 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1880 sum += PetscAbsScalar(*v); v++; 1881 } 1882 if (sum > ntemp) ntemp = sum; 1883 } 1884 PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 1885 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1886 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1887 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 1888 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 1889 } 1890 PetscFunctionReturn(0); 1891 } 1892 1893 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1894 { 1895 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1896 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1897 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1898 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1899 Mat B,A_diag,*B_diag; 1900 const MatScalar *pbv,*bv; 1901 1902 PetscFunctionBegin; 1903 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1904 ai = Aloc->i; aj = Aloc->j; 1905 bi = Bloc->i; bj = Bloc->j; 1906 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1907 PetscInt *d_nnz,*g_nnz,*o_nnz; 1908 PetscSFNode *oloc; 1909 PETSC_UNUSED PetscSF sf; 1910 1911 PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 1912 /* compute d_nnz for preallocation */ 1913 PetscCall(PetscArrayzero(d_nnz,na)); 1914 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1915 /* compute local off-diagonal contributions */ 1916 PetscCall(PetscArrayzero(g_nnz,nb)); 1917 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1918 /* map those to global */ 1919 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1920 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 1921 PetscCall(PetscSFSetFromOptions(sf)); 1922 PetscCall(PetscArrayzero(o_nnz,na)); 1923 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1924 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1925 PetscCall(PetscSFDestroy(&sf)); 1926 1927 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1928 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1929 PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1930 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1931 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 1932 PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1933 } else { 1934 B = *matout; 1935 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1936 } 1937 1938 b = (Mat_MPIAIJ*)B->data; 1939 A_diag = a->A; 1940 B_diag = &b->A; 1941 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1942 A_diag_ncol = A_diag->cmap->N; 1943 B_diag_ilen = sub_B_diag->ilen; 1944 B_diag_i = sub_B_diag->i; 1945 1946 /* Set ilen for diagonal of B */ 1947 for (i=0; i<A_diag_ncol; i++) { 1948 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1949 } 1950 1951 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1952 very quickly (=without using MatSetValues), because all writes are local. */ 1953 PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1954 1955 /* copy over the B part */ 1956 PetscCall(PetscMalloc1(bi[mb],&cols)); 1957 PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1958 pbv = bv; 1959 row = A->rmap->rstart; 1960 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1961 cols_tmp = cols; 1962 for (i=0; i<mb; i++) { 1963 ncol = bi[i+1]-bi[i]; 1964 PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 1965 row++; 1966 pbv += ncol; cols_tmp += ncol; 1967 } 1968 PetscCall(PetscFree(cols)); 1969 PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1970 1971 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1972 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1973 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1974 *matout = B; 1975 } else { 1976 PetscCall(MatHeaderMerge(A,&B)); 1977 } 1978 PetscFunctionReturn(0); 1979 } 1980 1981 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1982 { 1983 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1984 Mat a = aij->A,b = aij->B; 1985 PetscInt s1,s2,s3; 1986 1987 PetscFunctionBegin; 1988 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1989 if (rr) { 1990 PetscCall(VecGetLocalSize(rr,&s1)); 1991 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1992 /* Overlap communication with computation. */ 1993 PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1994 } 1995 if (ll) { 1996 PetscCall(VecGetLocalSize(ll,&s1)); 1997 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1998 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1999 } 2000 /* scale the diagonal block */ 2001 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 2002 2003 if (rr) { 2004 /* Do a scatter end and then right scale the off-diagonal block */ 2005 PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2006 PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 2007 } 2008 PetscFunctionReturn(0); 2009 } 2010 2011 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2012 { 2013 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2014 2015 PetscFunctionBegin; 2016 PetscCall(MatSetUnfactored(a->A)); 2017 PetscFunctionReturn(0); 2018 } 2019 2020 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2021 { 2022 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2023 Mat a,b,c,d; 2024 PetscBool flg; 2025 2026 PetscFunctionBegin; 2027 a = matA->A; b = matA->B; 2028 c = matB->A; d = matB->B; 2029 2030 PetscCall(MatEqual(a,c,&flg)); 2031 if (flg) { 2032 PetscCall(MatEqual(b,d,&flg)); 2033 } 2034 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2035 PetscFunctionReturn(0); 2036 } 2037 2038 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2039 { 2040 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2041 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2042 2043 PetscFunctionBegin; 2044 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2045 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2046 /* because of the column compression in the off-processor part of the matrix a->B, 2047 the number of columns in a->B and b->B may be different, hence we cannot call 2048 the MatCopy() directly on the two parts. If need be, we can provide a more 2049 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2050 then copying the submatrices */ 2051 PetscCall(MatCopy_Basic(A,B,str)); 2052 } else { 2053 PetscCall(MatCopy(a->A,b->A,str)); 2054 PetscCall(MatCopy(a->B,b->B,str)); 2055 } 2056 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2057 PetscFunctionReturn(0); 2058 } 2059 2060 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2061 { 2062 PetscFunctionBegin; 2063 PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2064 PetscFunctionReturn(0); 2065 } 2066 2067 /* 2068 Computes the number of nonzeros per row needed for preallocation when X and Y 2069 have different nonzero structure. 2070 */ 2071 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2072 { 2073 PetscInt i,j,k,nzx,nzy; 2074 2075 PetscFunctionBegin; 2076 /* Set the number of nonzeros in the new matrix */ 2077 for (i=0; i<m; i++) { 2078 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2079 nzx = xi[i+1] - xi[i]; 2080 nzy = yi[i+1] - yi[i]; 2081 nnz[i] = 0; 2082 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2083 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2084 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2085 nnz[i]++; 2086 } 2087 for (; k<nzy; k++) nnz[i]++; 2088 } 2089 PetscFunctionReturn(0); 2090 } 2091 2092 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2093 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2094 { 2095 PetscInt m = Y->rmap->N; 2096 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2097 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2098 2099 PetscFunctionBegin; 2100 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2101 PetscFunctionReturn(0); 2102 } 2103 2104 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2105 { 2106 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2107 2108 PetscFunctionBegin; 2109 if (str == SAME_NONZERO_PATTERN) { 2110 PetscCall(MatAXPY(yy->A,a,xx->A,str)); 2111 PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2112 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2113 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2114 } else { 2115 Mat B; 2116 PetscInt *nnz_d,*nnz_o; 2117 2118 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 2119 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 2120 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2121 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2122 PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 2123 PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 2124 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 2125 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 2126 PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 2127 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2128 PetscCall(MatHeaderMerge(Y,&B)); 2129 PetscCall(PetscFree(nnz_d)); 2130 PetscCall(PetscFree(nnz_o)); 2131 } 2132 PetscFunctionReturn(0); 2133 } 2134 2135 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2136 2137 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2138 { 2139 PetscFunctionBegin; 2140 if (PetscDefined(USE_COMPLEX)) { 2141 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2142 2143 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2144 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2145 } 2146 PetscFunctionReturn(0); 2147 } 2148 2149 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2150 { 2151 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2152 2153 PetscFunctionBegin; 2154 PetscCall(MatRealPart(a->A)); 2155 PetscCall(MatRealPart(a->B)); 2156 PetscFunctionReturn(0); 2157 } 2158 2159 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2160 { 2161 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2162 2163 PetscFunctionBegin; 2164 PetscCall(MatImaginaryPart(a->A)); 2165 PetscCall(MatImaginaryPart(a->B)); 2166 PetscFunctionReturn(0); 2167 } 2168 2169 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2170 { 2171 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2172 PetscInt i,*idxb = NULL,m = A->rmap->n; 2173 PetscScalar *va,*vv; 2174 Vec vB,vA; 2175 const PetscScalar *vb; 2176 2177 PetscFunctionBegin; 2178 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 2179 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2180 2181 PetscCall(VecGetArrayWrite(vA,&va)); 2182 if (idx) { 2183 for (i=0; i<m; i++) { 2184 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2185 } 2186 } 2187 2188 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 2189 PetscCall(PetscMalloc1(m,&idxb)); 2190 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2191 2192 PetscCall(VecGetArrayWrite(v,&vv)); 2193 PetscCall(VecGetArrayRead(vB,&vb)); 2194 for (i=0; i<m; i++) { 2195 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2196 vv[i] = vb[i]; 2197 if (idx) idx[i] = a->garray[idxb[i]]; 2198 } else { 2199 vv[i] = va[i]; 2200 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2201 idx[i] = a->garray[idxb[i]]; 2202 } 2203 } 2204 PetscCall(VecRestoreArrayWrite(vA,&vv)); 2205 PetscCall(VecRestoreArrayWrite(vA,&va)); 2206 PetscCall(VecRestoreArrayRead(vB,&vb)); 2207 PetscCall(PetscFree(idxb)); 2208 PetscCall(VecDestroy(&vA)); 2209 PetscCall(VecDestroy(&vB)); 2210 PetscFunctionReturn(0); 2211 } 2212 2213 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2214 { 2215 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2216 PetscInt m = A->rmap->n,n = A->cmap->n; 2217 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2218 PetscInt *cmap = mat->garray; 2219 PetscInt *diagIdx, *offdiagIdx; 2220 Vec diagV, offdiagV; 2221 PetscScalar *a, *diagA, *offdiagA; 2222 const PetscScalar *ba,*bav; 2223 PetscInt r,j,col,ncols,*bi,*bj; 2224 Mat B = mat->B; 2225 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2226 2227 PetscFunctionBegin; 2228 /* When a process holds entire A and other processes have no entry */ 2229 if (A->cmap->N == n) { 2230 PetscCall(VecGetArrayWrite(v,&diagA)); 2231 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2232 PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 2233 PetscCall(VecDestroy(&diagV)); 2234 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2235 PetscFunctionReturn(0); 2236 } else if (n == 0) { 2237 if (m) { 2238 PetscCall(VecGetArrayWrite(v,&a)); 2239 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2240 PetscCall(VecRestoreArrayWrite(v,&a)); 2241 } 2242 PetscFunctionReturn(0); 2243 } 2244 2245 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2246 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2248 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2249 2250 /* Get offdiagIdx[] for implicit 0.0 */ 2251 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2252 ba = bav; 2253 bi = b->i; 2254 bj = b->j; 2255 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2256 for (r = 0; r < m; r++) { 2257 ncols = bi[r+1] - bi[r]; 2258 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2259 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2260 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2261 offdiagA[r] = 0.0; 2262 2263 /* Find first hole in the cmap */ 2264 for (j=0; j<ncols; j++) { 2265 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2266 if (col > j && j < cstart) { 2267 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2268 break; 2269 } else if (col > j + n && j >= cstart) { 2270 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2271 break; 2272 } 2273 } 2274 if (j == ncols && ncols < A->cmap->N - n) { 2275 /* a hole is outside compressed Bcols */ 2276 if (ncols == 0) { 2277 if (cstart) { 2278 offdiagIdx[r] = 0; 2279 } else offdiagIdx[r] = cend; 2280 } else { /* ncols > 0 */ 2281 offdiagIdx[r] = cmap[ncols-1] + 1; 2282 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2283 } 2284 } 2285 } 2286 2287 for (j=0; j<ncols; j++) { 2288 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2289 ba++; bj++; 2290 } 2291 } 2292 2293 PetscCall(VecGetArrayWrite(v, &a)); 2294 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2295 for (r = 0; r < m; ++r) { 2296 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2297 a[r] = diagA[r]; 2298 if (idx) idx[r] = cstart + diagIdx[r]; 2299 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2300 a[r] = diagA[r]; 2301 if (idx) { 2302 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2303 idx[r] = cstart + diagIdx[r]; 2304 } else idx[r] = offdiagIdx[r]; 2305 } 2306 } else { 2307 a[r] = offdiagA[r]; 2308 if (idx) idx[r] = offdiagIdx[r]; 2309 } 2310 } 2311 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2312 PetscCall(VecRestoreArrayWrite(v, &a)); 2313 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2314 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2315 PetscCall(VecDestroy(&diagV)); 2316 PetscCall(VecDestroy(&offdiagV)); 2317 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2318 PetscFunctionReturn(0); 2319 } 2320 2321 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2322 { 2323 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2324 PetscInt m = A->rmap->n,n = A->cmap->n; 2325 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2326 PetscInt *cmap = mat->garray; 2327 PetscInt *diagIdx, *offdiagIdx; 2328 Vec diagV, offdiagV; 2329 PetscScalar *a, *diagA, *offdiagA; 2330 const PetscScalar *ba,*bav; 2331 PetscInt r,j,col,ncols,*bi,*bj; 2332 Mat B = mat->B; 2333 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2334 2335 PetscFunctionBegin; 2336 /* When a process holds entire A and other processes have no entry */ 2337 if (A->cmap->N == n) { 2338 PetscCall(VecGetArrayWrite(v,&diagA)); 2339 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2340 PetscCall(MatGetRowMin(mat->A,diagV,idx)); 2341 PetscCall(VecDestroy(&diagV)); 2342 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2343 PetscFunctionReturn(0); 2344 } else if (n == 0) { 2345 if (m) { 2346 PetscCall(VecGetArrayWrite(v,&a)); 2347 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2348 PetscCall(VecRestoreArrayWrite(v,&a)); 2349 } 2350 PetscFunctionReturn(0); 2351 } 2352 2353 PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 2354 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2355 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2356 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2357 2358 /* Get offdiagIdx[] for implicit 0.0 */ 2359 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2360 ba = bav; 2361 bi = b->i; 2362 bj = b->j; 2363 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2364 for (r = 0; r < m; r++) { 2365 ncols = bi[r+1] - bi[r]; 2366 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2367 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2368 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2369 offdiagA[r] = 0.0; 2370 2371 /* Find first hole in the cmap */ 2372 for (j=0; j<ncols; j++) { 2373 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2374 if (col > j && j < cstart) { 2375 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2376 break; 2377 } else if (col > j + n && j >= cstart) { 2378 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2379 break; 2380 } 2381 } 2382 if (j == ncols && ncols < A->cmap->N - n) { 2383 /* a hole is outside compressed Bcols */ 2384 if (ncols == 0) { 2385 if (cstart) { 2386 offdiagIdx[r] = 0; 2387 } else offdiagIdx[r] = cend; 2388 } else { /* ncols > 0 */ 2389 offdiagIdx[r] = cmap[ncols-1] + 1; 2390 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2391 } 2392 } 2393 } 2394 2395 for (j=0; j<ncols; j++) { 2396 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2397 ba++; bj++; 2398 } 2399 } 2400 2401 PetscCall(VecGetArrayWrite(v, &a)); 2402 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2403 for (r = 0; r < m; ++r) { 2404 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2405 a[r] = diagA[r]; 2406 if (idx) idx[r] = cstart + diagIdx[r]; 2407 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2408 a[r] = diagA[r]; 2409 if (idx) { 2410 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2411 idx[r] = cstart + diagIdx[r]; 2412 } else idx[r] = offdiagIdx[r]; 2413 } 2414 } else { 2415 a[r] = offdiagA[r]; 2416 if (idx) idx[r] = offdiagIdx[r]; 2417 } 2418 } 2419 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2420 PetscCall(VecRestoreArrayWrite(v, &a)); 2421 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2422 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2423 PetscCall(VecDestroy(&diagV)); 2424 PetscCall(VecDestroy(&offdiagV)); 2425 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2426 PetscFunctionReturn(0); 2427 } 2428 2429 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2430 { 2431 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2432 PetscInt m = A->rmap->n,n = A->cmap->n; 2433 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2434 PetscInt *cmap = mat->garray; 2435 PetscInt *diagIdx, *offdiagIdx; 2436 Vec diagV, offdiagV; 2437 PetscScalar *a, *diagA, *offdiagA; 2438 const PetscScalar *ba,*bav; 2439 PetscInt r,j,col,ncols,*bi,*bj; 2440 Mat B = mat->B; 2441 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2442 2443 PetscFunctionBegin; 2444 /* When a process holds entire A and other processes have no entry */ 2445 if (A->cmap->N == n) { 2446 PetscCall(VecGetArrayWrite(v,&diagA)); 2447 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2448 PetscCall(MatGetRowMax(mat->A,diagV,idx)); 2449 PetscCall(VecDestroy(&diagV)); 2450 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2451 PetscFunctionReturn(0); 2452 } else if (n == 0) { 2453 if (m) { 2454 PetscCall(VecGetArrayWrite(v,&a)); 2455 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2456 PetscCall(VecRestoreArrayWrite(v,&a)); 2457 } 2458 PetscFunctionReturn(0); 2459 } 2460 2461 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2462 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2463 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2464 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2465 2466 /* Get offdiagIdx[] for implicit 0.0 */ 2467 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2468 ba = bav; 2469 bi = b->i; 2470 bj = b->j; 2471 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2472 for (r = 0; r < m; r++) { 2473 ncols = bi[r+1] - bi[r]; 2474 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2475 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2476 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2477 offdiagA[r] = 0.0; 2478 2479 /* Find first hole in the cmap */ 2480 for (j=0; j<ncols; j++) { 2481 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2482 if (col > j && j < cstart) { 2483 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2484 break; 2485 } else if (col > j + n && j >= cstart) { 2486 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2487 break; 2488 } 2489 } 2490 if (j == ncols && ncols < A->cmap->N - n) { 2491 /* a hole is outside compressed Bcols */ 2492 if (ncols == 0) { 2493 if (cstart) { 2494 offdiagIdx[r] = 0; 2495 } else offdiagIdx[r] = cend; 2496 } else { /* ncols > 0 */ 2497 offdiagIdx[r] = cmap[ncols-1] + 1; 2498 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2499 } 2500 } 2501 } 2502 2503 for (j=0; j<ncols; j++) { 2504 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2505 ba++; bj++; 2506 } 2507 } 2508 2509 PetscCall(VecGetArrayWrite(v, &a)); 2510 PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 2511 for (r = 0; r < m; ++r) { 2512 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2513 a[r] = diagA[r]; 2514 if (idx) idx[r] = cstart + diagIdx[r]; 2515 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2516 a[r] = diagA[r]; 2517 if (idx) { 2518 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2519 idx[r] = cstart + diagIdx[r]; 2520 } else idx[r] = offdiagIdx[r]; 2521 } 2522 } else { 2523 a[r] = offdiagA[r]; 2524 if (idx) idx[r] = offdiagIdx[r]; 2525 } 2526 } 2527 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2528 PetscCall(VecRestoreArrayWrite(v, &a)); 2529 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2530 PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 2531 PetscCall(VecDestroy(&diagV)); 2532 PetscCall(VecDestroy(&offdiagV)); 2533 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2534 PetscFunctionReturn(0); 2535 } 2536 2537 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2538 { 2539 Mat *dummy; 2540 2541 PetscFunctionBegin; 2542 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2543 *newmat = *dummy; 2544 PetscCall(PetscFree(dummy)); 2545 PetscFunctionReturn(0); 2546 } 2547 2548 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2549 { 2550 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2551 2552 PetscFunctionBegin; 2553 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2554 A->factorerrortype = a->A->factorerrortype; 2555 PetscFunctionReturn(0); 2556 } 2557 2558 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2559 { 2560 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2561 2562 PetscFunctionBegin; 2563 PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2564 PetscCall(MatSetRandom(aij->A,rctx)); 2565 if (x->assembled) { 2566 PetscCall(MatSetRandom(aij->B,rctx)); 2567 } else { 2568 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2569 } 2570 PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 2571 PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 2572 PetscFunctionReturn(0); 2573 } 2574 2575 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2576 { 2577 PetscFunctionBegin; 2578 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2579 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2580 PetscFunctionReturn(0); 2581 } 2582 2583 /*@ 2584 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2585 2586 Collective on Mat 2587 2588 Input Parameters: 2589 + A - the matrix 2590 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2591 2592 Level: advanced 2593 2594 @*/ 2595 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2596 { 2597 PetscFunctionBegin; 2598 PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2599 PetscFunctionReturn(0); 2600 } 2601 2602 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2603 { 2604 PetscBool sc = PETSC_FALSE,flg; 2605 2606 PetscFunctionBegin; 2607 PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options"); 2608 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2609 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2610 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2611 PetscOptionsHeadEnd(); 2612 PetscFunctionReturn(0); 2613 } 2614 2615 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2616 { 2617 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2618 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2619 2620 PetscFunctionBegin; 2621 if (!Y->preallocated) { 2622 PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2623 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2624 PetscInt nonew = aij->nonew; 2625 PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2626 aij->nonew = nonew; 2627 } 2628 PetscCall(MatShift_Basic(Y,a)); 2629 PetscFunctionReturn(0); 2630 } 2631 2632 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2633 { 2634 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2635 2636 PetscFunctionBegin; 2637 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2638 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2639 if (d) { 2640 PetscInt rstart; 2641 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2642 *d += rstart; 2643 2644 } 2645 PetscFunctionReturn(0); 2646 } 2647 2648 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2649 { 2650 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2651 2652 PetscFunctionBegin; 2653 PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2654 PetscFunctionReturn(0); 2655 } 2656 2657 /* -------------------------------------------------------------------*/ 2658 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2659 MatGetRow_MPIAIJ, 2660 MatRestoreRow_MPIAIJ, 2661 MatMult_MPIAIJ, 2662 /* 4*/ MatMultAdd_MPIAIJ, 2663 MatMultTranspose_MPIAIJ, 2664 MatMultTransposeAdd_MPIAIJ, 2665 NULL, 2666 NULL, 2667 NULL, 2668 /*10*/ NULL, 2669 NULL, 2670 NULL, 2671 MatSOR_MPIAIJ, 2672 MatTranspose_MPIAIJ, 2673 /*15*/ MatGetInfo_MPIAIJ, 2674 MatEqual_MPIAIJ, 2675 MatGetDiagonal_MPIAIJ, 2676 MatDiagonalScale_MPIAIJ, 2677 MatNorm_MPIAIJ, 2678 /*20*/ MatAssemblyBegin_MPIAIJ, 2679 MatAssemblyEnd_MPIAIJ, 2680 MatSetOption_MPIAIJ, 2681 MatZeroEntries_MPIAIJ, 2682 /*24*/ MatZeroRows_MPIAIJ, 2683 NULL, 2684 NULL, 2685 NULL, 2686 NULL, 2687 /*29*/ MatSetUp_MPIAIJ, 2688 NULL, 2689 NULL, 2690 MatGetDiagonalBlock_MPIAIJ, 2691 NULL, 2692 /*34*/ MatDuplicate_MPIAIJ, 2693 NULL, 2694 NULL, 2695 NULL, 2696 NULL, 2697 /*39*/ MatAXPY_MPIAIJ, 2698 MatCreateSubMatrices_MPIAIJ, 2699 MatIncreaseOverlap_MPIAIJ, 2700 MatGetValues_MPIAIJ, 2701 MatCopy_MPIAIJ, 2702 /*44*/ MatGetRowMax_MPIAIJ, 2703 MatScale_MPIAIJ, 2704 MatShift_MPIAIJ, 2705 MatDiagonalSet_MPIAIJ, 2706 MatZeroRowsColumns_MPIAIJ, 2707 /*49*/ MatSetRandom_MPIAIJ, 2708 MatGetRowIJ_MPIAIJ, 2709 MatRestoreRowIJ_MPIAIJ, 2710 NULL, 2711 NULL, 2712 /*54*/ MatFDColoringCreate_MPIXAIJ, 2713 NULL, 2714 MatSetUnfactored_MPIAIJ, 2715 MatPermute_MPIAIJ, 2716 NULL, 2717 /*59*/ MatCreateSubMatrix_MPIAIJ, 2718 MatDestroy_MPIAIJ, 2719 MatView_MPIAIJ, 2720 NULL, 2721 NULL, 2722 /*64*/ NULL, 2723 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2724 NULL, 2725 NULL, 2726 NULL, 2727 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2728 MatGetRowMinAbs_MPIAIJ, 2729 NULL, 2730 NULL, 2731 NULL, 2732 NULL, 2733 /*75*/ MatFDColoringApply_AIJ, 2734 MatSetFromOptions_MPIAIJ, 2735 NULL, 2736 NULL, 2737 MatFindZeroDiagonals_MPIAIJ, 2738 /*80*/ NULL, 2739 NULL, 2740 NULL, 2741 /*83*/ MatLoad_MPIAIJ, 2742 MatIsSymmetric_MPIAIJ, 2743 NULL, 2744 NULL, 2745 NULL, 2746 NULL, 2747 /*89*/ NULL, 2748 NULL, 2749 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2750 NULL, 2751 NULL, 2752 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2753 NULL, 2754 NULL, 2755 NULL, 2756 MatBindToCPU_MPIAIJ, 2757 /*99*/ MatProductSetFromOptions_MPIAIJ, 2758 NULL, 2759 NULL, 2760 MatConjugate_MPIAIJ, 2761 NULL, 2762 /*104*/MatSetValuesRow_MPIAIJ, 2763 MatRealPart_MPIAIJ, 2764 MatImaginaryPart_MPIAIJ, 2765 NULL, 2766 NULL, 2767 /*109*/NULL, 2768 NULL, 2769 MatGetRowMin_MPIAIJ, 2770 NULL, 2771 MatMissingDiagonal_MPIAIJ, 2772 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2773 NULL, 2774 MatGetGhosts_MPIAIJ, 2775 NULL, 2776 NULL, 2777 /*119*/MatMultDiagonalBlock_MPIAIJ, 2778 NULL, 2779 NULL, 2780 NULL, 2781 MatGetMultiProcBlock_MPIAIJ, 2782 /*124*/MatFindNonzeroRows_MPIAIJ, 2783 MatGetColumnReductions_MPIAIJ, 2784 MatInvertBlockDiagonal_MPIAIJ, 2785 MatInvertVariableBlockDiagonal_MPIAIJ, 2786 MatCreateSubMatricesMPI_MPIAIJ, 2787 /*129*/NULL, 2788 NULL, 2789 NULL, 2790 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2791 NULL, 2792 /*134*/NULL, 2793 NULL, 2794 NULL, 2795 NULL, 2796 NULL, 2797 /*139*/MatSetBlockSizes_MPIAIJ, 2798 NULL, 2799 NULL, 2800 MatFDColoringSetUp_MPIXAIJ, 2801 MatFindOffBlockDiagonalEntries_MPIAIJ, 2802 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2803 /*145*/NULL, 2804 NULL, 2805 NULL 2806 }; 2807 2808 /* ----------------------------------------------------------------------------------------*/ 2809 2810 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2811 { 2812 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2813 2814 PetscFunctionBegin; 2815 PetscCall(MatStoreValues(aij->A)); 2816 PetscCall(MatStoreValues(aij->B)); 2817 PetscFunctionReturn(0); 2818 } 2819 2820 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2821 { 2822 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2823 2824 PetscFunctionBegin; 2825 PetscCall(MatRetrieveValues(aij->A)); 2826 PetscCall(MatRetrieveValues(aij->B)); 2827 PetscFunctionReturn(0); 2828 } 2829 2830 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2831 { 2832 Mat_MPIAIJ *b; 2833 PetscMPIInt size; 2834 2835 PetscFunctionBegin; 2836 PetscCall(PetscLayoutSetUp(B->rmap)); 2837 PetscCall(PetscLayoutSetUp(B->cmap)); 2838 b = (Mat_MPIAIJ*)B->data; 2839 2840 #if defined(PETSC_USE_CTABLE) 2841 PetscCall(PetscTableDestroy(&b->colmap)); 2842 #else 2843 PetscCall(PetscFree(b->colmap)); 2844 #endif 2845 PetscCall(PetscFree(b->garray)); 2846 PetscCall(VecDestroy(&b->lvec)); 2847 PetscCall(VecScatterDestroy(&b->Mvctx)); 2848 2849 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2850 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2851 PetscCall(MatDestroy(&b->B)); 2852 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2853 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2854 PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 2855 PetscCall(MatSetType(b->B,MATSEQAIJ)); 2856 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2857 2858 if (!B->preallocated) { 2859 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2860 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2861 PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 2862 PetscCall(MatSetType(b->A,MATSEQAIJ)); 2863 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2864 } 2865 2866 PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 2867 PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2868 B->preallocated = PETSC_TRUE; 2869 B->was_assembled = PETSC_FALSE; 2870 B->assembled = PETSC_FALSE; 2871 PetscFunctionReturn(0); 2872 } 2873 2874 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2875 { 2876 Mat_MPIAIJ *b; 2877 2878 PetscFunctionBegin; 2879 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2880 PetscCall(PetscLayoutSetUp(B->rmap)); 2881 PetscCall(PetscLayoutSetUp(B->cmap)); 2882 b = (Mat_MPIAIJ*)B->data; 2883 2884 #if defined(PETSC_USE_CTABLE) 2885 PetscCall(PetscTableDestroy(&b->colmap)); 2886 #else 2887 PetscCall(PetscFree(b->colmap)); 2888 #endif 2889 PetscCall(PetscFree(b->garray)); 2890 PetscCall(VecDestroy(&b->lvec)); 2891 PetscCall(VecScatterDestroy(&b->Mvctx)); 2892 2893 PetscCall(MatResetPreallocation(b->A)); 2894 PetscCall(MatResetPreallocation(b->B)); 2895 B->preallocated = PETSC_TRUE; 2896 B->was_assembled = PETSC_FALSE; 2897 B->assembled = PETSC_FALSE; 2898 PetscFunctionReturn(0); 2899 } 2900 2901 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2902 { 2903 Mat mat; 2904 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2905 2906 PetscFunctionBegin; 2907 *newmat = NULL; 2908 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2909 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2910 PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 2911 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2912 a = (Mat_MPIAIJ*)mat->data; 2913 2914 mat->factortype = matin->factortype; 2915 mat->assembled = matin->assembled; 2916 mat->insertmode = NOT_SET_VALUES; 2917 mat->preallocated = matin->preallocated; 2918 2919 a->size = oldmat->size; 2920 a->rank = oldmat->rank; 2921 a->donotstash = oldmat->donotstash; 2922 a->roworiented = oldmat->roworiented; 2923 a->rowindices = NULL; 2924 a->rowvalues = NULL; 2925 a->getrowactive = PETSC_FALSE; 2926 2927 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2928 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2929 2930 if (oldmat->colmap) { 2931 #if defined(PETSC_USE_CTABLE) 2932 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2933 #else 2934 PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 2935 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 2936 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2937 #endif 2938 } else a->colmap = NULL; 2939 if (oldmat->garray) { 2940 PetscInt len; 2941 len = oldmat->B->cmap->n; 2942 PetscCall(PetscMalloc1(len+1,&a->garray)); 2943 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2944 if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2945 } else a->garray = NULL; 2946 2947 /* It may happen MatDuplicate is called with a non-assembled matrix 2948 In fact, MatDuplicate only requires the matrix to be preallocated 2949 This may happen inside a DMCreateMatrix_Shell */ 2950 if (oldmat->lvec) { 2951 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2952 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2953 } 2954 if (oldmat->Mvctx) { 2955 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2956 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2957 } 2958 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2959 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2960 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2961 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2962 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2963 *newmat = mat; 2964 PetscFunctionReturn(0); 2965 } 2966 2967 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2968 { 2969 PetscBool isbinary, ishdf5; 2970 2971 PetscFunctionBegin; 2972 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2973 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2974 /* force binary viewer to load .info file if it has not yet done so */ 2975 PetscCall(PetscViewerSetUp(viewer)); 2976 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2977 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2978 if (isbinary) { 2979 PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 2980 } else if (ishdf5) { 2981 #if defined(PETSC_HAVE_HDF5) 2982 PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 2983 #else 2984 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2985 #endif 2986 } else { 2987 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2988 } 2989 PetscFunctionReturn(0); 2990 } 2991 2992 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2993 { 2994 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2995 PetscInt *rowidxs,*colidxs; 2996 PetscScalar *matvals; 2997 2998 PetscFunctionBegin; 2999 PetscCall(PetscViewerSetUp(viewer)); 3000 3001 /* read in matrix header */ 3002 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3003 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3004 M = header[1]; N = header[2]; nz = header[3]; 3005 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3006 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3007 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3008 3009 /* set block sizes from the viewer's .info file */ 3010 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3011 /* set global sizes if not set already */ 3012 if (mat->rmap->N < 0) mat->rmap->N = M; 3013 if (mat->cmap->N < 0) mat->cmap->N = N; 3014 PetscCall(PetscLayoutSetUp(mat->rmap)); 3015 PetscCall(PetscLayoutSetUp(mat->cmap)); 3016 3017 /* check if the matrix sizes are correct */ 3018 PetscCall(MatGetSize(mat,&rows,&cols)); 3019 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3020 3021 /* read in row lengths and build row indices */ 3022 PetscCall(MatGetLocalSize(mat,&m,NULL)); 3023 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3024 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3025 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3026 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3027 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3028 /* read in column indices and matrix values */ 3029 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3030 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3031 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3032 /* store matrix indices and values */ 3033 PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 3034 PetscCall(PetscFree(rowidxs)); 3035 PetscCall(PetscFree2(colidxs,matvals)); 3036 PetscFunctionReturn(0); 3037 } 3038 3039 /* Not scalable because of ISAllGather() unless getting all columns. */ 3040 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3041 { 3042 IS iscol_local; 3043 PetscBool isstride; 3044 PetscMPIInt lisstride=0,gisstride; 3045 3046 PetscFunctionBegin; 3047 /* check if we are grabbing all columns*/ 3048 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 3049 3050 if (isstride) { 3051 PetscInt start,len,mstart,mlen; 3052 PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 3053 PetscCall(ISGetLocalSize(iscol,&len)); 3054 PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3055 if (mstart == start && mlen-mstart == len) lisstride = 1; 3056 } 3057 3058 PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3059 if (gisstride) { 3060 PetscInt N; 3061 PetscCall(MatGetSize(mat,NULL,&N)); 3062 PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 3063 PetscCall(ISSetIdentity(iscol_local)); 3064 PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3065 } else { 3066 PetscInt cbs; 3067 PetscCall(ISGetBlockSize(iscol,&cbs)); 3068 PetscCall(ISAllGather(iscol,&iscol_local)); 3069 PetscCall(ISSetBlockSize(iscol_local,cbs)); 3070 } 3071 3072 *isseq = iscol_local; 3073 PetscFunctionReturn(0); 3074 } 3075 3076 /* 3077 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3078 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3079 3080 Input Parameters: 3081 mat - matrix 3082 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3083 i.e., mat->rstart <= isrow[i] < mat->rend 3084 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3085 i.e., mat->cstart <= iscol[i] < mat->cend 3086 Output Parameter: 3087 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3088 iscol_o - sequential column index set for retrieving mat->B 3089 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3090 */ 3091 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3092 { 3093 Vec x,cmap; 3094 const PetscInt *is_idx; 3095 PetscScalar *xarray,*cmaparray; 3096 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3097 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3098 Mat B=a->B; 3099 Vec lvec=a->lvec,lcmap; 3100 PetscInt i,cstart,cend,Bn=B->cmap->N; 3101 MPI_Comm comm; 3102 VecScatter Mvctx=a->Mvctx; 3103 3104 PetscFunctionBegin; 3105 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3106 PetscCall(ISGetLocalSize(iscol,&ncols)); 3107 3108 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3109 PetscCall(MatCreateVecs(mat,&x,NULL)); 3110 PetscCall(VecSet(x,-1.0)); 3111 PetscCall(VecDuplicate(x,&cmap)); 3112 PetscCall(VecSet(cmap,-1.0)); 3113 3114 /* Get start indices */ 3115 PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3116 isstart -= ncols; 3117 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3118 3119 PetscCall(ISGetIndices(iscol,&is_idx)); 3120 PetscCall(VecGetArray(x,&xarray)); 3121 PetscCall(VecGetArray(cmap,&cmaparray)); 3122 PetscCall(PetscMalloc1(ncols,&idx)); 3123 for (i=0; i<ncols; i++) { 3124 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3125 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3126 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3127 } 3128 PetscCall(VecRestoreArray(x,&xarray)); 3129 PetscCall(VecRestoreArray(cmap,&cmaparray)); 3130 PetscCall(ISRestoreIndices(iscol,&is_idx)); 3131 3132 /* Get iscol_d */ 3133 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 3134 PetscCall(ISGetBlockSize(iscol,&i)); 3135 PetscCall(ISSetBlockSize(*iscol_d,i)); 3136 3137 /* Get isrow_d */ 3138 PetscCall(ISGetLocalSize(isrow,&m)); 3139 rstart = mat->rmap->rstart; 3140 PetscCall(PetscMalloc1(m,&idx)); 3141 PetscCall(ISGetIndices(isrow,&is_idx)); 3142 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3143 PetscCall(ISRestoreIndices(isrow,&is_idx)); 3144 3145 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 3146 PetscCall(ISGetBlockSize(isrow,&i)); 3147 PetscCall(ISSetBlockSize(*isrow_d,i)); 3148 3149 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3150 PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3151 PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3152 3153 PetscCall(VecDuplicate(lvec,&lcmap)); 3154 3155 PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3156 PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3157 3158 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3159 /* off-process column indices */ 3160 count = 0; 3161 PetscCall(PetscMalloc1(Bn,&idx)); 3162 PetscCall(PetscMalloc1(Bn,&cmap1)); 3163 3164 PetscCall(VecGetArray(lvec,&xarray)); 3165 PetscCall(VecGetArray(lcmap,&cmaparray)); 3166 for (i=0; i<Bn; i++) { 3167 if (PetscRealPart(xarray[i]) > -1.0) { 3168 idx[count] = i; /* local column index in off-diagonal part B */ 3169 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3170 count++; 3171 } 3172 } 3173 PetscCall(VecRestoreArray(lvec,&xarray)); 3174 PetscCall(VecRestoreArray(lcmap,&cmaparray)); 3175 3176 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3177 /* cannot ensure iscol_o has same blocksize as iscol! */ 3178 3179 PetscCall(PetscFree(idx)); 3180 *garray = cmap1; 3181 3182 PetscCall(VecDestroy(&x)); 3183 PetscCall(VecDestroy(&cmap)); 3184 PetscCall(VecDestroy(&lcmap)); 3185 PetscFunctionReturn(0); 3186 } 3187 3188 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3189 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3190 { 3191 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3192 Mat M = NULL; 3193 MPI_Comm comm; 3194 IS iscol_d,isrow_d,iscol_o; 3195 Mat Asub = NULL,Bsub = NULL; 3196 PetscInt n; 3197 3198 PetscFunctionBegin; 3199 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3200 3201 if (call == MAT_REUSE_MATRIX) { 3202 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3203 PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 3204 PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3205 3206 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 3207 PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3208 3209 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 3210 PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3211 3212 /* Update diagonal and off-diagonal portions of submat */ 3213 asub = (Mat_MPIAIJ*)(*submat)->data; 3214 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 3215 PetscCall(ISGetLocalSize(iscol_o,&n)); 3216 if (n) { 3217 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 3218 } 3219 PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 3220 PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 3221 3222 } else { /* call == MAT_INITIAL_MATRIX) */ 3223 const PetscInt *garray; 3224 PetscInt BsubN; 3225 3226 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3227 PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 3228 3229 /* Create local submatrices Asub and Bsub */ 3230 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 3231 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 3232 3233 /* Create submatrix M */ 3234 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 3235 3236 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3237 asub = (Mat_MPIAIJ*)M->data; 3238 3239 PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3240 n = asub->B->cmap->N; 3241 if (BsubN > n) { 3242 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3243 const PetscInt *idx; 3244 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3245 PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 3246 3247 PetscCall(PetscMalloc1(n,&idx_new)); 3248 j = 0; 3249 PetscCall(ISGetIndices(iscol_o,&idx)); 3250 for (i=0; i<n; i++) { 3251 if (j >= BsubN) break; 3252 while (subgarray[i] > garray[j]) j++; 3253 3254 if (subgarray[i] == garray[j]) { 3255 idx_new[i] = idx[j++]; 3256 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3257 } 3258 PetscCall(ISRestoreIndices(iscol_o,&idx)); 3259 3260 PetscCall(ISDestroy(&iscol_o)); 3261 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 3262 3263 } else if (BsubN < n) { 3264 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3265 } 3266 3267 PetscCall(PetscFree(garray)); 3268 *submat = M; 3269 3270 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3271 PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 3272 PetscCall(ISDestroy(&isrow_d)); 3273 3274 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 3275 PetscCall(ISDestroy(&iscol_d)); 3276 3277 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 3278 PetscCall(ISDestroy(&iscol_o)); 3279 } 3280 PetscFunctionReturn(0); 3281 } 3282 3283 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3284 { 3285 IS iscol_local=NULL,isrow_d; 3286 PetscInt csize; 3287 PetscInt n,i,j,start,end; 3288 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3289 MPI_Comm comm; 3290 3291 PetscFunctionBegin; 3292 /* If isrow has same processor distribution as mat, 3293 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3294 if (call == MAT_REUSE_MATRIX) { 3295 PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3296 if (isrow_d) { 3297 sameRowDist = PETSC_TRUE; 3298 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3299 } else { 3300 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3301 if (iscol_local) { 3302 sameRowDist = PETSC_TRUE; 3303 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3304 } 3305 } 3306 } else { 3307 /* Check if isrow has same processor distribution as mat */ 3308 sameDist[0] = PETSC_FALSE; 3309 PetscCall(ISGetLocalSize(isrow,&n)); 3310 if (!n) { 3311 sameDist[0] = PETSC_TRUE; 3312 } else { 3313 PetscCall(ISGetMinMax(isrow,&i,&j)); 3314 PetscCall(MatGetOwnershipRange(mat,&start,&end)); 3315 if (i >= start && j < end) { 3316 sameDist[0] = PETSC_TRUE; 3317 } 3318 } 3319 3320 /* Check if iscol has same processor distribution as mat */ 3321 sameDist[1] = PETSC_FALSE; 3322 PetscCall(ISGetLocalSize(iscol,&n)); 3323 if (!n) { 3324 sameDist[1] = PETSC_TRUE; 3325 } else { 3326 PetscCall(ISGetMinMax(iscol,&i,&j)); 3327 PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 3328 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3329 } 3330 3331 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3332 PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 3333 sameRowDist = tsameDist[0]; 3334 } 3335 3336 if (sameRowDist) { 3337 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3338 /* isrow and iscol have same processor distribution as mat */ 3339 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 3340 PetscFunctionReturn(0); 3341 } else { /* sameRowDist */ 3342 /* isrow has same processor distribution as mat */ 3343 if (call == MAT_INITIAL_MATRIX) { 3344 PetscBool sorted; 3345 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3346 PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 3347 PetscCall(ISGetSize(iscol,&i)); 3348 PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3349 3350 PetscCall(ISSorted(iscol_local,&sorted)); 3351 if (sorted) { 3352 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3353 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 3354 PetscFunctionReturn(0); 3355 } 3356 } else { /* call == MAT_REUSE_MATRIX */ 3357 IS iscol_sub; 3358 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3359 if (iscol_sub) { 3360 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 3361 PetscFunctionReturn(0); 3362 } 3363 } 3364 } 3365 } 3366 3367 /* General case: iscol -> iscol_local which has global size of iscol */ 3368 if (call == MAT_REUSE_MATRIX) { 3369 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 3370 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3371 } else { 3372 if (!iscol_local) { 3373 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3374 } 3375 } 3376 3377 PetscCall(ISGetLocalSize(iscol,&csize)); 3378 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 3379 3380 if (call == MAT_INITIAL_MATRIX) { 3381 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 3382 PetscCall(ISDestroy(&iscol_local)); 3383 } 3384 PetscFunctionReturn(0); 3385 } 3386 3387 /*@C 3388 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3389 and "off-diagonal" part of the matrix in CSR format. 3390 3391 Collective 3392 3393 Input Parameters: 3394 + comm - MPI communicator 3395 . A - "diagonal" portion of matrix 3396 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3397 - garray - global index of B columns 3398 3399 Output Parameter: 3400 . mat - the matrix, with input A as its local diagonal matrix 3401 Level: advanced 3402 3403 Notes: 3404 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3405 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3406 3407 .seealso: `MatCreateMPIAIJWithSplitArrays()` 3408 @*/ 3409 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3410 { 3411 Mat_MPIAIJ *maij; 3412 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3413 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3414 const PetscScalar *oa; 3415 Mat Bnew; 3416 PetscInt m,n,N; 3417 3418 PetscFunctionBegin; 3419 PetscCall(MatCreate(comm,mat)); 3420 PetscCall(MatGetSize(A,&m,&n)); 3421 PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3422 PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3423 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3424 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3425 3426 /* Get global columns of mat */ 3427 PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3428 3429 PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 3430 PetscCall(MatSetType(*mat,MATMPIAIJ)); 3431 PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3432 maij = (Mat_MPIAIJ*)(*mat)->data; 3433 3434 (*mat)->preallocated = PETSC_TRUE; 3435 3436 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3437 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3438 3439 /* Set A as diagonal portion of *mat */ 3440 maij->A = A; 3441 3442 nz = oi[m]; 3443 for (i=0; i<nz; i++) { 3444 col = oj[i]; 3445 oj[i] = garray[col]; 3446 } 3447 3448 /* Set Bnew as off-diagonal portion of *mat */ 3449 PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 3450 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 3451 PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3452 bnew = (Mat_SeqAIJ*)Bnew->data; 3453 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3454 maij->B = Bnew; 3455 3456 PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3457 3458 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3459 b->free_a = PETSC_FALSE; 3460 b->free_ij = PETSC_FALSE; 3461 PetscCall(MatDestroy(&B)); 3462 3463 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3464 bnew->free_a = PETSC_TRUE; 3465 bnew->free_ij = PETSC_TRUE; 3466 3467 /* condense columns of maij->B */ 3468 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3469 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3470 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3471 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 3472 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3473 PetscFunctionReturn(0); 3474 } 3475 3476 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3477 3478 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3479 { 3480 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3481 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3482 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3483 Mat M,Msub,B=a->B; 3484 MatScalar *aa; 3485 Mat_SeqAIJ *aij; 3486 PetscInt *garray = a->garray,*colsub,Ncols; 3487 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3488 IS iscol_sub,iscmap; 3489 const PetscInt *is_idx,*cmap; 3490 PetscBool allcolumns=PETSC_FALSE; 3491 MPI_Comm comm; 3492 3493 PetscFunctionBegin; 3494 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3495 if (call == MAT_REUSE_MATRIX) { 3496 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3497 PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3498 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3499 3500 PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 3501 PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3502 3503 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 3504 PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3505 3506 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3507 3508 } else { /* call == MAT_INITIAL_MATRIX) */ 3509 PetscBool flg; 3510 3511 PetscCall(ISGetLocalSize(iscol,&n)); 3512 PetscCall(ISGetSize(iscol,&Ncols)); 3513 3514 /* (1) iscol -> nonscalable iscol_local */ 3515 /* Check for special case: each processor gets entire matrix columns */ 3516 PetscCall(ISIdentity(iscol_local,&flg)); 3517 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3518 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3519 if (allcolumns) { 3520 iscol_sub = iscol_local; 3521 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3522 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3523 3524 } else { 3525 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3526 PetscInt *idx,*cmap1,k; 3527 PetscCall(PetscMalloc1(Ncols,&idx)); 3528 PetscCall(PetscMalloc1(Ncols,&cmap1)); 3529 PetscCall(ISGetIndices(iscol_local,&is_idx)); 3530 count = 0; 3531 k = 0; 3532 for (i=0; i<Ncols; i++) { 3533 j = is_idx[i]; 3534 if (j >= cstart && j < cend) { 3535 /* diagonal part of mat */ 3536 idx[count] = j; 3537 cmap1[count++] = i; /* column index in submat */ 3538 } else if (Bn) { 3539 /* off-diagonal part of mat */ 3540 if (j == garray[k]) { 3541 idx[count] = j; 3542 cmap1[count++] = i; /* column index in submat */ 3543 } else if (j > garray[k]) { 3544 while (j > garray[k] && k < Bn-1) k++; 3545 if (j == garray[k]) { 3546 idx[count] = j; 3547 cmap1[count++] = i; /* column index in submat */ 3548 } 3549 } 3550 } 3551 } 3552 PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 3553 3554 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 3555 PetscCall(ISGetBlockSize(iscol,&cbs)); 3556 PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3557 3558 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3559 } 3560 3561 /* (3) Create sequential Msub */ 3562 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3563 } 3564 3565 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3566 aij = (Mat_SeqAIJ*)(Msub)->data; 3567 ii = aij->i; 3568 PetscCall(ISGetIndices(iscmap,&cmap)); 3569 3570 /* 3571 m - number of local rows 3572 Ncols - number of columns (same on all processors) 3573 rstart - first row in new global matrix generated 3574 */ 3575 PetscCall(MatGetSize(Msub,&m,NULL)); 3576 3577 if (call == MAT_INITIAL_MATRIX) { 3578 /* (4) Create parallel newmat */ 3579 PetscMPIInt rank,size; 3580 PetscInt csize; 3581 3582 PetscCallMPI(MPI_Comm_size(comm,&size)); 3583 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3584 3585 /* 3586 Determine the number of non-zeros in the diagonal and off-diagonal 3587 portions of the matrix in order to do correct preallocation 3588 */ 3589 3590 /* first get start and end of "diagonal" columns */ 3591 PetscCall(ISGetLocalSize(iscol,&csize)); 3592 if (csize == PETSC_DECIDE) { 3593 PetscCall(ISGetSize(isrow,&mglobal)); 3594 if (mglobal == Ncols) { /* square matrix */ 3595 nlocal = m; 3596 } else { 3597 nlocal = Ncols/size + ((Ncols % size) > rank); 3598 } 3599 } else { 3600 nlocal = csize; 3601 } 3602 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3603 rstart = rend - nlocal; 3604 PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3605 3606 /* next, compute all the lengths */ 3607 jj = aij->j; 3608 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3609 olens = dlens + m; 3610 for (i=0; i<m; i++) { 3611 jend = ii[i+1] - ii[i]; 3612 olen = 0; 3613 dlen = 0; 3614 for (j=0; j<jend; j++) { 3615 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3616 else dlen++; 3617 jj++; 3618 } 3619 olens[i] = olen; 3620 dlens[i] = dlen; 3621 } 3622 3623 PetscCall(ISGetBlockSize(isrow,&bs)); 3624 PetscCall(ISGetBlockSize(iscol,&cbs)); 3625 3626 PetscCall(MatCreate(comm,&M)); 3627 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 3628 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3629 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3630 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3631 PetscCall(PetscFree(dlens)); 3632 3633 } else { /* call == MAT_REUSE_MATRIX */ 3634 M = *newmat; 3635 PetscCall(MatGetLocalSize(M,&i,NULL)); 3636 PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3637 PetscCall(MatZeroEntries(M)); 3638 /* 3639 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3640 rather than the slower MatSetValues(). 3641 */ 3642 M->was_assembled = PETSC_TRUE; 3643 M->assembled = PETSC_FALSE; 3644 } 3645 3646 /* (5) Set values of Msub to *newmat */ 3647 PetscCall(PetscMalloc1(count,&colsub)); 3648 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 3649 3650 jj = aij->j; 3651 PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3652 for (i=0; i<m; i++) { 3653 row = rstart + i; 3654 nz = ii[i+1] - ii[i]; 3655 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3656 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 3657 jj += nz; aa += nz; 3658 } 3659 PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 3660 PetscCall(ISRestoreIndices(iscmap,&cmap)); 3661 3662 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3663 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3664 3665 PetscCall(PetscFree(colsub)); 3666 3667 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3668 if (call == MAT_INITIAL_MATRIX) { 3669 *newmat = M; 3670 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 3671 PetscCall(MatDestroy(&Msub)); 3672 3673 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 3674 PetscCall(ISDestroy(&iscol_sub)); 3675 3676 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 3677 PetscCall(ISDestroy(&iscmap)); 3678 3679 if (iscol_local) { 3680 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 3681 PetscCall(ISDestroy(&iscol_local)); 3682 } 3683 } 3684 PetscFunctionReturn(0); 3685 } 3686 3687 /* 3688 Not great since it makes two copies of the submatrix, first an SeqAIJ 3689 in local and then by concatenating the local matrices the end result. 3690 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3691 3692 Note: This requires a sequential iscol with all indices. 3693 */ 3694 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3695 { 3696 PetscMPIInt rank,size; 3697 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3698 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3699 Mat M,Mreuse; 3700 MatScalar *aa,*vwork; 3701 MPI_Comm comm; 3702 Mat_SeqAIJ *aij; 3703 PetscBool colflag,allcolumns=PETSC_FALSE; 3704 3705 PetscFunctionBegin; 3706 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3707 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3708 PetscCallMPI(MPI_Comm_size(comm,&size)); 3709 3710 /* Check for special case: each processor gets entire matrix columns */ 3711 PetscCall(ISIdentity(iscol,&colflag)); 3712 PetscCall(ISGetLocalSize(iscol,&n)); 3713 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3714 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3715 3716 if (call == MAT_REUSE_MATRIX) { 3717 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 3718 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3719 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3720 } else { 3721 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3722 } 3723 3724 /* 3725 m - number of local rows 3726 n - number of columns (same on all processors) 3727 rstart - first row in new global matrix generated 3728 */ 3729 PetscCall(MatGetSize(Mreuse,&m,&n)); 3730 PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3731 if (call == MAT_INITIAL_MATRIX) { 3732 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3733 ii = aij->i; 3734 jj = aij->j; 3735 3736 /* 3737 Determine the number of non-zeros in the diagonal and off-diagonal 3738 portions of the matrix in order to do correct preallocation 3739 */ 3740 3741 /* first get start and end of "diagonal" columns */ 3742 if (csize == PETSC_DECIDE) { 3743 PetscCall(ISGetSize(isrow,&mglobal)); 3744 if (mglobal == n) { /* square matrix */ 3745 nlocal = m; 3746 } else { 3747 nlocal = n/size + ((n % size) > rank); 3748 } 3749 } else { 3750 nlocal = csize; 3751 } 3752 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3753 rstart = rend - nlocal; 3754 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3755 3756 /* next, compute all the lengths */ 3757 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3758 olens = dlens + m; 3759 for (i=0; i<m; i++) { 3760 jend = ii[i+1] - ii[i]; 3761 olen = 0; 3762 dlen = 0; 3763 for (j=0; j<jend; j++) { 3764 if (*jj < rstart || *jj >= rend) olen++; 3765 else dlen++; 3766 jj++; 3767 } 3768 olens[i] = olen; 3769 dlens[i] = dlen; 3770 } 3771 PetscCall(MatCreate(comm,&M)); 3772 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 3773 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3774 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3775 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3776 PetscCall(PetscFree(dlens)); 3777 } else { 3778 PetscInt ml,nl; 3779 3780 M = *newmat; 3781 PetscCall(MatGetLocalSize(M,&ml,&nl)); 3782 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3783 PetscCall(MatZeroEntries(M)); 3784 /* 3785 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3786 rather than the slower MatSetValues(). 3787 */ 3788 M->was_assembled = PETSC_TRUE; 3789 M->assembled = PETSC_FALSE; 3790 } 3791 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3792 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3793 ii = aij->i; 3794 jj = aij->j; 3795 3796 /* trigger copy to CPU if needed */ 3797 PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3798 for (i=0; i<m; i++) { 3799 row = rstart + i; 3800 nz = ii[i+1] - ii[i]; 3801 cwork = jj; jj += nz; 3802 vwork = aa; aa += nz; 3803 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3804 } 3805 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3806 3807 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3808 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3809 *newmat = M; 3810 3811 /* save submatrix used in processor for next request */ 3812 if (call == MAT_INITIAL_MATRIX) { 3813 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 3814 PetscCall(MatDestroy(&Mreuse)); 3815 } 3816 PetscFunctionReturn(0); 3817 } 3818 3819 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3820 { 3821 PetscInt m,cstart, cend,j,nnz,i,d; 3822 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3823 const PetscInt *JJ; 3824 PetscBool nooffprocentries; 3825 3826 PetscFunctionBegin; 3827 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3828 3829 PetscCall(PetscLayoutSetUp(B->rmap)); 3830 PetscCall(PetscLayoutSetUp(B->cmap)); 3831 m = B->rmap->n; 3832 cstart = B->cmap->rstart; 3833 cend = B->cmap->rend; 3834 rstart = B->rmap->rstart; 3835 3836 PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3837 3838 if (PetscDefined(USE_DEBUG)) { 3839 for (i=0; i<m; i++) { 3840 nnz = Ii[i+1]- Ii[i]; 3841 JJ = J + Ii[i]; 3842 PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3843 PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3844 PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3845 } 3846 } 3847 3848 for (i=0; i<m; i++) { 3849 nnz = Ii[i+1]- Ii[i]; 3850 JJ = J + Ii[i]; 3851 nnz_max = PetscMax(nnz_max,nnz); 3852 d = 0; 3853 for (j=0; j<nnz; j++) { 3854 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3855 } 3856 d_nnz[i] = d; 3857 o_nnz[i] = nnz - d; 3858 } 3859 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 3860 PetscCall(PetscFree2(d_nnz,o_nnz)); 3861 3862 for (i=0; i<m; i++) { 3863 ii = i + rstart; 3864 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3865 } 3866 nooffprocentries = B->nooffprocentries; 3867 B->nooffprocentries = PETSC_TRUE; 3868 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3869 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3870 B->nooffprocentries = nooffprocentries; 3871 3872 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3873 PetscFunctionReturn(0); 3874 } 3875 3876 /*@ 3877 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3878 (the default parallel PETSc format). 3879 3880 Collective 3881 3882 Input Parameters: 3883 + B - the matrix 3884 . i - the indices into j for the start of each local row (starts with zero) 3885 . j - the column indices for each local row (starts with zero) 3886 - v - optional values in the matrix 3887 3888 Level: developer 3889 3890 Notes: 3891 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3892 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3893 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3894 3895 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3896 3897 The format which is used for the sparse matrix input, is equivalent to a 3898 row-major ordering.. i.e for the following matrix, the input data expected is 3899 as shown 3900 3901 $ 1 0 0 3902 $ 2 0 3 P0 3903 $ ------- 3904 $ 4 5 6 P1 3905 $ 3906 $ Process0 [P0]: rows_owned=[0,1] 3907 $ i = {0,1,3} [size = nrow+1 = 2+1] 3908 $ j = {0,0,2} [size = 3] 3909 $ v = {1,2,3} [size = 3] 3910 $ 3911 $ Process1 [P1]: rows_owned=[2] 3912 $ i = {0,3} [size = nrow+1 = 1+1] 3913 $ j = {0,1,2} [size = 3] 3914 $ v = {4,5,6} [size = 3] 3915 3916 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3917 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3918 @*/ 3919 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3920 { 3921 PetscFunctionBegin; 3922 PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3923 PetscFunctionReturn(0); 3924 } 3925 3926 /*@C 3927 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3928 (the default parallel PETSc format). For good matrix assembly performance 3929 the user should preallocate the matrix storage by setting the parameters 3930 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3931 performance can be increased by more than a factor of 50. 3932 3933 Collective 3934 3935 Input Parameters: 3936 + B - the matrix 3937 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3938 (same value is used for all local rows) 3939 . d_nnz - array containing the number of nonzeros in the various rows of the 3940 DIAGONAL portion of the local submatrix (possibly different for each row) 3941 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3942 The size of this array is equal to the number of local rows, i.e 'm'. 3943 For matrices that will be factored, you must leave room for (and set) 3944 the diagonal entry even if it is zero. 3945 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3946 submatrix (same value is used for all local rows). 3947 - o_nnz - array containing the number of nonzeros in the various rows of the 3948 OFF-DIAGONAL portion of the local submatrix (possibly different for 3949 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3950 structure. The size of this array is equal to the number 3951 of local rows, i.e 'm'. 3952 3953 If the *_nnz parameter is given then the *_nz parameter is ignored 3954 3955 The AIJ format (also called the Yale sparse matrix format or 3956 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3957 storage. The stored row and column indices begin with zero. 3958 See Users-Manual: ch_mat for details. 3959 3960 The parallel matrix is partitioned such that the first m0 rows belong to 3961 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3962 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3963 3964 The DIAGONAL portion of the local submatrix of a processor can be defined 3965 as the submatrix which is obtained by extraction the part corresponding to 3966 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3967 first row that belongs to the processor, r2 is the last row belonging to 3968 the this processor, and c1-c2 is range of indices of the local part of a 3969 vector suitable for applying the matrix to. This is an mxn matrix. In the 3970 common case of a square matrix, the row and column ranges are the same and 3971 the DIAGONAL part is also square. The remaining portion of the local 3972 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3973 3974 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3975 3976 You can call MatGetInfo() to get information on how effective the preallocation was; 3977 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3978 You can also run with the option -info and look for messages with the string 3979 malloc in them to see if additional memory allocation was needed. 3980 3981 Example usage: 3982 3983 Consider the following 8x8 matrix with 34 non-zero values, that is 3984 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3985 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3986 as follows: 3987 3988 .vb 3989 1 2 0 | 0 3 0 | 0 4 3990 Proc0 0 5 6 | 7 0 0 | 8 0 3991 9 0 10 | 11 0 0 | 12 0 3992 ------------------------------------- 3993 13 0 14 | 15 16 17 | 0 0 3994 Proc1 0 18 0 | 19 20 21 | 0 0 3995 0 0 0 | 22 23 0 | 24 0 3996 ------------------------------------- 3997 Proc2 25 26 27 | 0 0 28 | 29 0 3998 30 0 0 | 31 32 33 | 0 34 3999 .ve 4000 4001 This can be represented as a collection of submatrices as: 4002 4003 .vb 4004 A B C 4005 D E F 4006 G H I 4007 .ve 4008 4009 Where the submatrices A,B,C are owned by proc0, D,E,F are 4010 owned by proc1, G,H,I are owned by proc2. 4011 4012 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4013 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4014 The 'M','N' parameters are 8,8, and have the same values on all procs. 4015 4016 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4017 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4018 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4019 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4020 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4021 matrix, ans [DF] as another SeqAIJ matrix. 4022 4023 When d_nz, o_nz parameters are specified, d_nz storage elements are 4024 allocated for every row of the local diagonal submatrix, and o_nz 4025 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4026 One way to choose d_nz and o_nz is to use the max nonzerors per local 4027 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4028 In this case, the values of d_nz,o_nz are: 4029 .vb 4030 proc0 : dnz = 2, o_nz = 2 4031 proc1 : dnz = 3, o_nz = 2 4032 proc2 : dnz = 1, o_nz = 4 4033 .ve 4034 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4035 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4036 for proc3. i.e we are using 12+15+10=37 storage locations to store 4037 34 values. 4038 4039 When d_nnz, o_nnz parameters are specified, the storage is specified 4040 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4041 In the above case the values for d_nnz,o_nnz are: 4042 .vb 4043 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4044 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4045 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4046 .ve 4047 Here the space allocated is sum of all the above values i.e 34, and 4048 hence pre-allocation is perfect. 4049 4050 Level: intermediate 4051 4052 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4053 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4054 @*/ 4055 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4056 { 4057 PetscFunctionBegin; 4058 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4059 PetscValidType(B,1); 4060 PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4061 PetscFunctionReturn(0); 4062 } 4063 4064 /*@ 4065 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4066 CSR format for the local rows. 4067 4068 Collective 4069 4070 Input Parameters: 4071 + comm - MPI communicator 4072 . m - number of local rows (Cannot be PETSC_DECIDE) 4073 . n - This value should be the same as the local size used in creating the 4074 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4075 calculated if N is given) For square matrices n is almost always m. 4076 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4077 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4078 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4079 . j - column indices 4080 - a - matrix values 4081 4082 Output Parameter: 4083 . mat - the matrix 4084 4085 Level: intermediate 4086 4087 Notes: 4088 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4089 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4090 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4091 4092 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4093 4094 The format which is used for the sparse matrix input, is equivalent to a 4095 row-major ordering.. i.e for the following matrix, the input data expected is 4096 as shown 4097 4098 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4099 4100 $ 1 0 0 4101 $ 2 0 3 P0 4102 $ ------- 4103 $ 4 5 6 P1 4104 $ 4105 $ Process0 [P0]: rows_owned=[0,1] 4106 $ i = {0,1,3} [size = nrow+1 = 2+1] 4107 $ j = {0,0,2} [size = 3] 4108 $ v = {1,2,3} [size = 3] 4109 $ 4110 $ Process1 [P1]: rows_owned=[2] 4111 $ i = {0,3} [size = nrow+1 = 1+1] 4112 $ j = {0,1,2} [size = 3] 4113 $ v = {4,5,6} [size = 3] 4114 4115 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4116 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4117 @*/ 4118 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4119 { 4120 PetscFunctionBegin; 4121 PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4122 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4123 PetscCall(MatCreate(comm,mat)); 4124 PetscCall(MatSetSizes(*mat,m,n,M,N)); 4125 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4126 PetscCall(MatSetType(*mat,MATMPIAIJ)); 4127 PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 4128 PetscFunctionReturn(0); 4129 } 4130 4131 /*@ 4132 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4133 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4134 4135 Collective 4136 4137 Input Parameters: 4138 + mat - the matrix 4139 . m - number of local rows (Cannot be PETSC_DECIDE) 4140 . n - This value should be the same as the local size used in creating the 4141 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4142 calculated if N is given) For square matrices n is almost always m. 4143 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4144 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4145 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4146 . J - column indices 4147 - v - matrix values 4148 4149 Level: intermediate 4150 4151 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4152 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4153 @*/ 4154 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4155 { 4156 PetscInt cstart,nnz,i,j; 4157 PetscInt *ld; 4158 PetscBool nooffprocentries; 4159 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4160 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4161 PetscScalar *ad,*ao; 4162 const PetscInt *Adi = Ad->i; 4163 PetscInt ldi,Iii,md; 4164 4165 PetscFunctionBegin; 4166 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4167 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4168 PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4169 PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4170 4171 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4172 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4173 cstart = mat->cmap->rstart; 4174 if (!Aij->ld) { 4175 /* count number of entries below block diagonal */ 4176 PetscCall(PetscCalloc1(m,&ld)); 4177 Aij->ld = ld; 4178 for (i=0; i<m; i++) { 4179 nnz = Ii[i+1]- Ii[i]; 4180 j = 0; 4181 while (J[j] < cstart && j < nnz) {j++;} 4182 J += nnz; 4183 ld[i] = j; 4184 } 4185 } else { 4186 ld = Aij->ld; 4187 } 4188 4189 for (i=0; i<m; i++) { 4190 nnz = Ii[i+1]- Ii[i]; 4191 Iii = Ii[i]; 4192 ldi = ld[i]; 4193 md = Adi[i+1]-Adi[i]; 4194 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4195 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4196 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4197 ad += md; 4198 ao += nnz - md; 4199 } 4200 nooffprocentries = mat->nooffprocentries; 4201 mat->nooffprocentries = PETSC_TRUE; 4202 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4203 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4204 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4205 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4206 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4207 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4208 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4209 mat->nooffprocentries = nooffprocentries; 4210 PetscFunctionReturn(0); 4211 } 4212 4213 /*@C 4214 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4215 (the default parallel PETSc format). For good matrix assembly performance 4216 the user should preallocate the matrix storage by setting the parameters 4217 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4218 performance can be increased by more than a factor of 50. 4219 4220 Collective 4221 4222 Input Parameters: 4223 + comm - MPI communicator 4224 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4225 This value should be the same as the local size used in creating the 4226 y vector for the matrix-vector product y = Ax. 4227 . n - This value should be the same as the local size used in creating the 4228 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4229 calculated if N is given) For square matrices n is almost always m. 4230 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4231 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4232 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4233 (same value is used for all local rows) 4234 . d_nnz - array containing the number of nonzeros in the various rows of the 4235 DIAGONAL portion of the local submatrix (possibly different for each row) 4236 or NULL, if d_nz is used to specify the nonzero structure. 4237 The size of this array is equal to the number of local rows, i.e 'm'. 4238 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4239 submatrix (same value is used for all local rows). 4240 - o_nnz - array containing the number of nonzeros in the various rows of the 4241 OFF-DIAGONAL portion of the local submatrix (possibly different for 4242 each row) or NULL, if o_nz is used to specify the nonzero 4243 structure. The size of this array is equal to the number 4244 of local rows, i.e 'm'. 4245 4246 Output Parameter: 4247 . A - the matrix 4248 4249 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4250 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4251 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4252 4253 Notes: 4254 If the *_nnz parameter is given then the *_nz parameter is ignored 4255 4256 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4257 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4258 storage requirements for this matrix. 4259 4260 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4261 processor than it must be used on all processors that share the object for 4262 that argument. 4263 4264 The user MUST specify either the local or global matrix dimensions 4265 (possibly both). 4266 4267 The parallel matrix is partitioned across processors such that the 4268 first m0 rows belong to process 0, the next m1 rows belong to 4269 process 1, the next m2 rows belong to process 2 etc.. where 4270 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4271 values corresponding to [m x N] submatrix. 4272 4273 The columns are logically partitioned with the n0 columns belonging 4274 to 0th partition, the next n1 columns belonging to the next 4275 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4276 4277 The DIAGONAL portion of the local submatrix on any given processor 4278 is the submatrix corresponding to the rows and columns m,n 4279 corresponding to the given processor. i.e diagonal matrix on 4280 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4281 etc. The remaining portion of the local submatrix [m x (N-n)] 4282 constitute the OFF-DIAGONAL portion. The example below better 4283 illustrates this concept. 4284 4285 For a square global matrix we define each processor's diagonal portion 4286 to be its local rows and the corresponding columns (a square submatrix); 4287 each processor's off-diagonal portion encompasses the remainder of the 4288 local matrix (a rectangular submatrix). 4289 4290 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4291 4292 When calling this routine with a single process communicator, a matrix of 4293 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4294 type of communicator, use the construction mechanism 4295 .vb 4296 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4297 .ve 4298 4299 $ MatCreate(...,&A); 4300 $ MatSetType(A,MATMPIAIJ); 4301 $ MatSetSizes(A, m,n,M,N); 4302 $ MatMPIAIJSetPreallocation(A,...); 4303 4304 By default, this format uses inodes (identical nodes) when possible. 4305 We search for consecutive rows with the same nonzero structure, thereby 4306 reusing matrix information to achieve increased efficiency. 4307 4308 Options Database Keys: 4309 + -mat_no_inode - Do not use inodes 4310 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4311 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4312 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4313 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4314 4315 Example usage: 4316 4317 Consider the following 8x8 matrix with 34 non-zero values, that is 4318 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4319 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4320 as follows 4321 4322 .vb 4323 1 2 0 | 0 3 0 | 0 4 4324 Proc0 0 5 6 | 7 0 0 | 8 0 4325 9 0 10 | 11 0 0 | 12 0 4326 ------------------------------------- 4327 13 0 14 | 15 16 17 | 0 0 4328 Proc1 0 18 0 | 19 20 21 | 0 0 4329 0 0 0 | 22 23 0 | 24 0 4330 ------------------------------------- 4331 Proc2 25 26 27 | 0 0 28 | 29 0 4332 30 0 0 | 31 32 33 | 0 34 4333 .ve 4334 4335 This can be represented as a collection of submatrices as 4336 4337 .vb 4338 A B C 4339 D E F 4340 G H I 4341 .ve 4342 4343 Where the submatrices A,B,C are owned by proc0, D,E,F are 4344 owned by proc1, G,H,I are owned by proc2. 4345 4346 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4347 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4348 The 'M','N' parameters are 8,8, and have the same values on all procs. 4349 4350 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4351 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4352 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4353 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4354 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4355 matrix, ans [DF] as another SeqAIJ matrix. 4356 4357 When d_nz, o_nz parameters are specified, d_nz storage elements are 4358 allocated for every row of the local diagonal submatrix, and o_nz 4359 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4360 One way to choose d_nz and o_nz is to use the max nonzerors per local 4361 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4362 In this case, the values of d_nz,o_nz are 4363 .vb 4364 proc0 : dnz = 2, o_nz = 2 4365 proc1 : dnz = 3, o_nz = 2 4366 proc2 : dnz = 1, o_nz = 4 4367 .ve 4368 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4369 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4370 for proc3. i.e we are using 12+15+10=37 storage locations to store 4371 34 values. 4372 4373 When d_nnz, o_nnz parameters are specified, the storage is specified 4374 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4375 In the above case the values for d_nnz,o_nnz are 4376 .vb 4377 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4378 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4379 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4380 .ve 4381 Here the space allocated is sum of all the above values i.e 34, and 4382 hence pre-allocation is perfect. 4383 4384 Level: intermediate 4385 4386 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4387 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4388 @*/ 4389 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4390 { 4391 PetscMPIInt size; 4392 4393 PetscFunctionBegin; 4394 PetscCall(MatCreate(comm,A)); 4395 PetscCall(MatSetSizes(*A,m,n,M,N)); 4396 PetscCallMPI(MPI_Comm_size(comm,&size)); 4397 if (size > 1) { 4398 PetscCall(MatSetType(*A,MATMPIAIJ)); 4399 PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4400 } else { 4401 PetscCall(MatSetType(*A,MATSEQAIJ)); 4402 PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4403 } 4404 PetscFunctionReturn(0); 4405 } 4406 4407 /*@C 4408 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4409 4410 Not collective 4411 4412 Input Parameter: 4413 . A - The MPIAIJ matrix 4414 4415 Output Parameters: 4416 + Ad - The local diagonal block as a SeqAIJ matrix 4417 . Ao - The local off-diagonal block as a SeqAIJ matrix 4418 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4419 4420 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4421 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4422 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4423 local column numbers to global column numbers in the original matrix. 4424 4425 Level: intermediate 4426 4427 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4428 @*/ 4429 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4430 { 4431 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4432 PetscBool flg; 4433 4434 PetscFunctionBegin; 4435 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 4436 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4437 if (Ad) *Ad = a->A; 4438 if (Ao) *Ao = a->B; 4439 if (colmap) *colmap = a->garray; 4440 PetscFunctionReturn(0); 4441 } 4442 4443 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4444 { 4445 PetscInt m,N,i,rstart,nnz,Ii; 4446 PetscInt *indx; 4447 PetscScalar *values; 4448 MatType rootType; 4449 4450 PetscFunctionBegin; 4451 PetscCall(MatGetSize(inmat,&m,&N)); 4452 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4453 PetscInt *dnz,*onz,sum,bs,cbs; 4454 4455 if (n == PETSC_DECIDE) { 4456 PetscCall(PetscSplitOwnership(comm,&n,&N)); 4457 } 4458 /* Check sum(n) = N */ 4459 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 4460 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4461 4462 PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 4463 rstart -= m; 4464 4465 MatPreallocateBegin(comm,m,n,dnz,onz); 4466 for (i=0; i<m; i++) { 4467 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4468 PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 4469 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4470 } 4471 4472 PetscCall(MatCreate(comm,outmat)); 4473 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4474 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 4475 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 4476 PetscCall(MatGetRootType_Private(inmat,&rootType)); 4477 PetscCall(MatSetType(*outmat,rootType)); 4478 PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 4479 PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4480 MatPreallocateEnd(dnz,onz); 4481 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 4482 } 4483 4484 /* numeric phase */ 4485 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 4486 for (i=0; i<m; i++) { 4487 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4488 Ii = i + rstart; 4489 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 4490 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4491 } 4492 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 4493 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4494 PetscFunctionReturn(0); 4495 } 4496 4497 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4498 { 4499 PetscMPIInt rank; 4500 PetscInt m,N,i,rstart,nnz; 4501 size_t len; 4502 const PetscInt *indx; 4503 PetscViewer out; 4504 char *name; 4505 Mat B; 4506 const PetscScalar *values; 4507 4508 PetscFunctionBegin; 4509 PetscCall(MatGetLocalSize(A,&m,NULL)); 4510 PetscCall(MatGetSize(A,NULL,&N)); 4511 /* Should this be the type of the diagonal block of A? */ 4512 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 4513 PetscCall(MatSetSizes(B,m,N,m,N)); 4514 PetscCall(MatSetBlockSizesFromMats(B,A,A)); 4515 PetscCall(MatSetType(B,MATSEQAIJ)); 4516 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 4517 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4518 for (i=0; i<m; i++) { 4519 PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 4520 PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 4521 PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4522 } 4523 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 4524 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4525 4526 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 4527 PetscCall(PetscStrlen(outfile,&len)); 4528 PetscCall(PetscMalloc1(len+6,&name)); 4529 PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 4530 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 4531 PetscCall(PetscFree(name)); 4532 PetscCall(MatView(B,out)); 4533 PetscCall(PetscViewerDestroy(&out)); 4534 PetscCall(MatDestroy(&B)); 4535 PetscFunctionReturn(0); 4536 } 4537 4538 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4539 { 4540 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4541 4542 PetscFunctionBegin; 4543 if (!merge) PetscFunctionReturn(0); 4544 PetscCall(PetscFree(merge->id_r)); 4545 PetscCall(PetscFree(merge->len_s)); 4546 PetscCall(PetscFree(merge->len_r)); 4547 PetscCall(PetscFree(merge->bi)); 4548 PetscCall(PetscFree(merge->bj)); 4549 PetscCall(PetscFree(merge->buf_ri[0])); 4550 PetscCall(PetscFree(merge->buf_ri)); 4551 PetscCall(PetscFree(merge->buf_rj[0])); 4552 PetscCall(PetscFree(merge->buf_rj)); 4553 PetscCall(PetscFree(merge->coi)); 4554 PetscCall(PetscFree(merge->coj)); 4555 PetscCall(PetscFree(merge->owners_co)); 4556 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4557 PetscCall(PetscFree(merge)); 4558 PetscFunctionReturn(0); 4559 } 4560 4561 #include <../src/mat/utils/freespace.h> 4562 #include <petscbt.h> 4563 4564 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4565 { 4566 MPI_Comm comm; 4567 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4568 PetscMPIInt size,rank,taga,*len_s; 4569 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4570 PetscInt proc,m; 4571 PetscInt **buf_ri,**buf_rj; 4572 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4573 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4574 MPI_Request *s_waits,*r_waits; 4575 MPI_Status *status; 4576 const MatScalar *aa,*a_a; 4577 MatScalar **abuf_r,*ba_i; 4578 Mat_Merge_SeqsToMPI *merge; 4579 PetscContainer container; 4580 4581 PetscFunctionBegin; 4582 PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 4583 PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 4584 4585 PetscCallMPI(MPI_Comm_size(comm,&size)); 4586 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4587 4588 PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 4589 PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4590 PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 4591 PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4592 aa = a_a; 4593 4594 bi = merge->bi; 4595 bj = merge->bj; 4596 buf_ri = merge->buf_ri; 4597 buf_rj = merge->buf_rj; 4598 4599 PetscCall(PetscMalloc1(size,&status)); 4600 owners = merge->rowmap->range; 4601 len_s = merge->len_s; 4602 4603 /* send and recv matrix values */ 4604 /*-----------------------------*/ 4605 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 4606 PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 4607 4608 PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 4609 for (proc=0,k=0; proc<size; proc++) { 4610 if (!len_s[proc]) continue; 4611 i = owners[proc]; 4612 PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 4613 k++; 4614 } 4615 4616 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 4617 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 4618 PetscCall(PetscFree(status)); 4619 4620 PetscCall(PetscFree(s_waits)); 4621 PetscCall(PetscFree(r_waits)); 4622 4623 /* insert mat values of mpimat */ 4624 /*----------------------------*/ 4625 PetscCall(PetscMalloc1(N,&ba_i)); 4626 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4627 4628 for (k=0; k<merge->nrecv; k++) { 4629 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4630 nrows = *(buf_ri_k[k]); 4631 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4632 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4633 } 4634 4635 /* set values of ba */ 4636 m = merge->rowmap->n; 4637 for (i=0; i<m; i++) { 4638 arow = owners[rank] + i; 4639 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4640 bnzi = bi[i+1] - bi[i]; 4641 PetscCall(PetscArrayzero(ba_i,bnzi)); 4642 4643 /* add local non-zero vals of this proc's seqmat into ba */ 4644 anzi = ai[arow+1] - ai[arow]; 4645 aj = a->j + ai[arow]; 4646 aa = a_a + ai[arow]; 4647 nextaj = 0; 4648 for (j=0; nextaj<anzi; j++) { 4649 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4650 ba_i[j] += aa[nextaj++]; 4651 } 4652 } 4653 4654 /* add received vals into ba */ 4655 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4656 /* i-th row */ 4657 if (i == *nextrow[k]) { 4658 anzi = *(nextai[k]+1) - *nextai[k]; 4659 aj = buf_rj[k] + *(nextai[k]); 4660 aa = abuf_r[k] + *(nextai[k]); 4661 nextaj = 0; 4662 for (j=0; nextaj<anzi; j++) { 4663 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4664 ba_i[j] += aa[nextaj++]; 4665 } 4666 } 4667 nextrow[k]++; nextai[k]++; 4668 } 4669 } 4670 PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 4671 } 4672 PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 4673 PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 4674 PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 4675 4676 PetscCall(PetscFree(abuf_r[0])); 4677 PetscCall(PetscFree(abuf_r)); 4678 PetscCall(PetscFree(ba_i)); 4679 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4680 PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 4681 PetscFunctionReturn(0); 4682 } 4683 4684 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4685 { 4686 Mat B_mpi; 4687 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4688 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4689 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4690 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4691 PetscInt len,proc,*dnz,*onz,bs,cbs; 4692 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4693 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4694 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4695 MPI_Status *status; 4696 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4697 PetscBT lnkbt; 4698 Mat_Merge_SeqsToMPI *merge; 4699 PetscContainer container; 4700 4701 PetscFunctionBegin; 4702 PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 4703 4704 /* make sure it is a PETSc comm */ 4705 PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 4706 PetscCallMPI(MPI_Comm_size(comm,&size)); 4707 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4708 4709 PetscCall(PetscNew(&merge)); 4710 PetscCall(PetscMalloc1(size,&status)); 4711 4712 /* determine row ownership */ 4713 /*---------------------------------------------------------*/ 4714 PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 4715 PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 4716 PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 4717 PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 4718 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4719 PetscCall(PetscMalloc1(size,&len_si)); 4720 PetscCall(PetscMalloc1(size,&merge->len_s)); 4721 4722 m = merge->rowmap->n; 4723 owners = merge->rowmap->range; 4724 4725 /* determine the number of messages to send, their lengths */ 4726 /*---------------------------------------------------------*/ 4727 len_s = merge->len_s; 4728 4729 len = 0; /* length of buf_si[] */ 4730 merge->nsend = 0; 4731 for (proc=0; proc<size; proc++) { 4732 len_si[proc] = 0; 4733 if (proc == rank) { 4734 len_s[proc] = 0; 4735 } else { 4736 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4737 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4738 } 4739 if (len_s[proc]) { 4740 merge->nsend++; 4741 nrows = 0; 4742 for (i=owners[proc]; i<owners[proc+1]; i++) { 4743 if (ai[i+1] > ai[i]) nrows++; 4744 } 4745 len_si[proc] = 2*(nrows+1); 4746 len += len_si[proc]; 4747 } 4748 } 4749 4750 /* determine the number and length of messages to receive for ij-structure */ 4751 /*-------------------------------------------------------------------------*/ 4752 PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 4753 PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4754 4755 /* post the Irecv of j-structure */ 4756 /*-------------------------------*/ 4757 PetscCall(PetscCommGetNewTag(comm,&tagj)); 4758 PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 4759 4760 /* post the Isend of j-structure */ 4761 /*--------------------------------*/ 4762 PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 4763 4764 for (proc=0, k=0; proc<size; proc++) { 4765 if (!len_s[proc]) continue; 4766 i = owners[proc]; 4767 PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 4768 k++; 4769 } 4770 4771 /* receives and sends of j-structure are complete */ 4772 /*------------------------------------------------*/ 4773 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 4774 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 4775 4776 /* send and recv i-structure */ 4777 /*---------------------------*/ 4778 PetscCall(PetscCommGetNewTag(comm,&tagi)); 4779 PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 4780 4781 PetscCall(PetscMalloc1(len+1,&buf_s)); 4782 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4783 for (proc=0,k=0; proc<size; proc++) { 4784 if (!len_s[proc]) continue; 4785 /* form outgoing message for i-structure: 4786 buf_si[0]: nrows to be sent 4787 [1:nrows]: row index (global) 4788 [nrows+1:2*nrows+1]: i-structure index 4789 */ 4790 /*-------------------------------------------*/ 4791 nrows = len_si[proc]/2 - 1; 4792 buf_si_i = buf_si + nrows+1; 4793 buf_si[0] = nrows; 4794 buf_si_i[0] = 0; 4795 nrows = 0; 4796 for (i=owners[proc]; i<owners[proc+1]; i++) { 4797 anzi = ai[i+1] - ai[i]; 4798 if (anzi) { 4799 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4800 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4801 nrows++; 4802 } 4803 } 4804 PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 4805 k++; 4806 buf_si += len_si[proc]; 4807 } 4808 4809 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 4810 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 4811 4812 PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 4813 for (i=0; i<merge->nrecv; i++) { 4814 PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 4815 } 4816 4817 PetscCall(PetscFree(len_si)); 4818 PetscCall(PetscFree(len_ri)); 4819 PetscCall(PetscFree(rj_waits)); 4820 PetscCall(PetscFree2(si_waits,sj_waits)); 4821 PetscCall(PetscFree(ri_waits)); 4822 PetscCall(PetscFree(buf_s)); 4823 PetscCall(PetscFree(status)); 4824 4825 /* compute a local seq matrix in each processor */ 4826 /*----------------------------------------------*/ 4827 /* allocate bi array and free space for accumulating nonzero column info */ 4828 PetscCall(PetscMalloc1(m+1,&bi)); 4829 bi[0] = 0; 4830 4831 /* create and initialize a linked list */ 4832 nlnk = N+1; 4833 PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 4834 4835 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4836 len = ai[owners[rank+1]] - ai[owners[rank]]; 4837 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 4838 4839 current_space = free_space; 4840 4841 /* determine symbolic info for each local row */ 4842 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4843 4844 for (k=0; k<merge->nrecv; k++) { 4845 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4846 nrows = *buf_ri_k[k]; 4847 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4848 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4849 } 4850 4851 MatPreallocateBegin(comm,m,n,dnz,onz); 4852 len = 0; 4853 for (i=0; i<m; i++) { 4854 bnzi = 0; 4855 /* add local non-zero cols of this proc's seqmat into lnk */ 4856 arow = owners[rank] + i; 4857 anzi = ai[arow+1] - ai[arow]; 4858 aj = a->j + ai[arow]; 4859 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4860 bnzi += nlnk; 4861 /* add received col data into lnk */ 4862 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4863 if (i == *nextrow[k]) { /* i-th row */ 4864 anzi = *(nextai[k]+1) - *nextai[k]; 4865 aj = buf_rj[k] + *nextai[k]; 4866 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4867 bnzi += nlnk; 4868 nextrow[k]++; nextai[k]++; 4869 } 4870 } 4871 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4872 4873 /* if free space is not available, make more free space */ 4874 if (current_space->local_remaining<bnzi) { 4875 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 4876 nspacedouble++; 4877 } 4878 /* copy data into free space, then initialize lnk */ 4879 PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 4880 PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4881 4882 current_space->array += bnzi; 4883 current_space->local_used += bnzi; 4884 current_space->local_remaining -= bnzi; 4885 4886 bi[i+1] = bi[i] + bnzi; 4887 } 4888 4889 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4890 4891 PetscCall(PetscMalloc1(bi[m]+1,&bj)); 4892 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4893 PetscCall(PetscLLDestroy(lnk,lnkbt)); 4894 4895 /* create symbolic parallel matrix B_mpi */ 4896 /*---------------------------------------*/ 4897 PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 4898 PetscCall(MatCreate(comm,&B_mpi)); 4899 if (n==PETSC_DECIDE) { 4900 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 4901 } else { 4902 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4903 } 4904 PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 4905 PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 4906 PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 4907 MatPreallocateEnd(dnz,onz); 4908 PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 4909 4910 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4911 B_mpi->assembled = PETSC_FALSE; 4912 merge->bi = bi; 4913 merge->bj = bj; 4914 merge->buf_ri = buf_ri; 4915 merge->buf_rj = buf_rj; 4916 merge->coi = NULL; 4917 merge->coj = NULL; 4918 merge->owners_co = NULL; 4919 4920 PetscCall(PetscCommDestroy(&comm)); 4921 4922 /* attach the supporting struct to B_mpi for reuse */ 4923 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 4924 PetscCall(PetscContainerSetPointer(container,merge)); 4925 PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 4926 PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 4927 PetscCall(PetscContainerDestroy(&container)); 4928 *mpimat = B_mpi; 4929 4930 PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 4931 PetscFunctionReturn(0); 4932 } 4933 4934 /*@C 4935 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4936 matrices from each processor 4937 4938 Collective 4939 4940 Input Parameters: 4941 + comm - the communicators the parallel matrix will live on 4942 . seqmat - the input sequential matrices 4943 . m - number of local rows (or PETSC_DECIDE) 4944 . n - number of local columns (or PETSC_DECIDE) 4945 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4946 4947 Output Parameter: 4948 . mpimat - the parallel matrix generated 4949 4950 Level: advanced 4951 4952 Notes: 4953 The dimensions of the sequential matrix in each processor MUST be the same. 4954 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4955 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4956 @*/ 4957 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4958 { 4959 PetscMPIInt size; 4960 4961 PetscFunctionBegin; 4962 PetscCallMPI(MPI_Comm_size(comm,&size)); 4963 if (size == 1) { 4964 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4965 if (scall == MAT_INITIAL_MATRIX) { 4966 PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 4967 } else { 4968 PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 4969 } 4970 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4971 PetscFunctionReturn(0); 4972 } 4973 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4974 if (scall == MAT_INITIAL_MATRIX) { 4975 PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 4976 } 4977 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 4978 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4979 PetscFunctionReturn(0); 4980 } 4981 4982 /*@ 4983 MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4984 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4985 with MatGetSize() 4986 4987 Not Collective 4988 4989 Input Parameters: 4990 + A - the matrix 4991 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4992 4993 Output Parameter: 4994 . A_loc - the local sequential matrix generated 4995 4996 Level: developer 4997 4998 Notes: 4999 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5000 5001 Destroy the matrix with MatDestroy() 5002 5003 .seealso: MatMPIAIJGetLocalMat() 5004 5005 @*/ 5006 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc) 5007 { 5008 PetscBool mpi; 5009 5010 PetscFunctionBegin; 5011 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi)); 5012 if (mpi) { 5013 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc)); 5014 } else { 5015 *A_loc = A; 5016 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5017 } 5018 PetscFunctionReturn(0); 5019 } 5020 5021 /*@ 5022 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5023 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5024 with MatGetSize() 5025 5026 Not Collective 5027 5028 Input Parameters: 5029 + A - the matrix 5030 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5031 5032 Output Parameter: 5033 . A_loc - the local sequential matrix generated 5034 5035 Level: developer 5036 5037 Notes: 5038 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5039 5040 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5041 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5042 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5043 modify the values of the returned A_loc. 5044 5045 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5046 @*/ 5047 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5048 { 5049 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5050 Mat_SeqAIJ *mat,*a,*b; 5051 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5052 const PetscScalar *aa,*ba,*aav,*bav; 5053 PetscScalar *ca,*cam; 5054 PetscMPIInt size; 5055 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5056 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5057 PetscBool match; 5058 5059 PetscFunctionBegin; 5060 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 5061 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5062 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5063 if (size == 1) { 5064 if (scall == MAT_INITIAL_MATRIX) { 5065 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5066 *A_loc = mpimat->A; 5067 } else if (scall == MAT_REUSE_MATRIX) { 5068 PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 5069 } 5070 PetscFunctionReturn(0); 5071 } 5072 5073 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5074 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5075 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5076 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5077 PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 5078 PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5079 aa = aav; 5080 ba = bav; 5081 if (scall == MAT_INITIAL_MATRIX) { 5082 PetscCall(PetscMalloc1(1+am,&ci)); 5083 ci[0] = 0; 5084 for (i=0; i<am; i++) { 5085 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5086 } 5087 PetscCall(PetscMalloc1(1+ci[am],&cj)); 5088 PetscCall(PetscMalloc1(1+ci[am],&ca)); 5089 k = 0; 5090 for (i=0; i<am; i++) { 5091 ncols_o = bi[i+1] - bi[i]; 5092 ncols_d = ai[i+1] - ai[i]; 5093 /* off-diagonal portion of A */ 5094 for (jo=0; jo<ncols_o; jo++) { 5095 col = cmap[*bj]; 5096 if (col >= cstart) break; 5097 cj[k] = col; bj++; 5098 ca[k++] = *ba++; 5099 } 5100 /* diagonal portion of A */ 5101 for (j=0; j<ncols_d; j++) { 5102 cj[k] = cstart + *aj++; 5103 ca[k++] = *aa++; 5104 } 5105 /* off-diagonal portion of A */ 5106 for (j=jo; j<ncols_o; j++) { 5107 cj[k] = cmap[*bj++]; 5108 ca[k++] = *ba++; 5109 } 5110 } 5111 /* put together the new matrix */ 5112 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5113 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5114 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5115 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5116 mat->free_a = PETSC_TRUE; 5117 mat->free_ij = PETSC_TRUE; 5118 mat->nonew = 0; 5119 } else if (scall == MAT_REUSE_MATRIX) { 5120 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5121 ci = mat->i; 5122 cj = mat->j; 5123 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 5124 for (i=0; i<am; i++) { 5125 /* off-diagonal portion of A */ 5126 ncols_o = bi[i+1] - bi[i]; 5127 for (jo=0; jo<ncols_o; jo++) { 5128 col = cmap[*bj]; 5129 if (col >= cstart) break; 5130 *cam++ = *ba++; bj++; 5131 } 5132 /* diagonal portion of A */ 5133 ncols_d = ai[i+1] - ai[i]; 5134 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5135 /* off-diagonal portion of A */ 5136 for (j=jo; j<ncols_o; j++) { 5137 *cam++ = *ba++; bj++; 5138 } 5139 } 5140 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 5141 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5142 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 5143 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 5144 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5145 PetscFunctionReturn(0); 5146 } 5147 5148 /*@ 5149 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5150 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5151 5152 Not Collective 5153 5154 Input Parameters: 5155 + A - the matrix 5156 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5157 5158 Output Parameters: 5159 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5160 - A_loc - the local sequential matrix generated 5161 5162 Level: developer 5163 5164 Notes: 5165 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5166 5167 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5168 5169 @*/ 5170 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5171 { 5172 Mat Ao,Ad; 5173 const PetscInt *cmap; 5174 PetscMPIInt size; 5175 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5176 5177 PetscFunctionBegin; 5178 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 5179 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5180 if (size == 1) { 5181 if (scall == MAT_INITIAL_MATRIX) { 5182 PetscCall(PetscObjectReference((PetscObject)Ad)); 5183 *A_loc = Ad; 5184 } else if (scall == MAT_REUSE_MATRIX) { 5185 PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5186 } 5187 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5188 PetscFunctionReturn(0); 5189 } 5190 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 5191 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5192 if (f) { 5193 PetscCall((*f)(A,scall,glob,A_loc)); 5194 } else { 5195 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5196 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5197 Mat_SeqAIJ *c; 5198 PetscInt *ai = a->i, *aj = a->j; 5199 PetscInt *bi = b->i, *bj = b->j; 5200 PetscInt *ci,*cj; 5201 const PetscScalar *aa,*ba; 5202 PetscScalar *ca; 5203 PetscInt i,j,am,dn,on; 5204 5205 PetscCall(MatGetLocalSize(Ad,&am,&dn)); 5206 PetscCall(MatGetLocalSize(Ao,NULL,&on)); 5207 PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 5208 PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5209 if (scall == MAT_INITIAL_MATRIX) { 5210 PetscInt k; 5211 PetscCall(PetscMalloc1(1+am,&ci)); 5212 PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 5213 PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5214 ci[0] = 0; 5215 for (i=0,k=0; i<am; i++) { 5216 const PetscInt ncols_o = bi[i+1] - bi[i]; 5217 const PetscInt ncols_d = ai[i+1] - ai[i]; 5218 ci[i+1] = ci[i] + ncols_o + ncols_d; 5219 /* diagonal portion of A */ 5220 for (j=0; j<ncols_d; j++,k++) { 5221 cj[k] = *aj++; 5222 ca[k] = *aa++; 5223 } 5224 /* off-diagonal portion of A */ 5225 for (j=0; j<ncols_o; j++,k++) { 5226 cj[k] = dn + *bj++; 5227 ca[k] = *ba++; 5228 } 5229 } 5230 /* put together the new matrix */ 5231 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5232 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5233 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5234 c = (Mat_SeqAIJ*)(*A_loc)->data; 5235 c->free_a = PETSC_TRUE; 5236 c->free_ij = PETSC_TRUE; 5237 c->nonew = 0; 5238 PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5239 } else if (scall == MAT_REUSE_MATRIX) { 5240 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5241 for (i=0; i<am; i++) { 5242 const PetscInt ncols_d = ai[i+1] - ai[i]; 5243 const PetscInt ncols_o = bi[i+1] - bi[i]; 5244 /* diagonal portion of A */ 5245 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5246 /* off-diagonal portion of A */ 5247 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5248 } 5249 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 5250 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5251 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 5252 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5253 if (glob) { 5254 PetscInt cst, *gidx; 5255 5256 PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 5257 PetscCall(PetscMalloc1(dn+on,&gidx)); 5258 for (i=0; i<dn; i++) gidx[i] = cst + i; 5259 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5260 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5261 } 5262 } 5263 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5264 PetscFunctionReturn(0); 5265 } 5266 5267 /*@C 5268 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5269 5270 Not Collective 5271 5272 Input Parameters: 5273 + A - the matrix 5274 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5275 - row, col - index sets of rows and columns to extract (or NULL) 5276 5277 Output Parameter: 5278 . A_loc - the local sequential matrix generated 5279 5280 Level: developer 5281 5282 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5283 5284 @*/ 5285 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5286 { 5287 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5288 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5289 IS isrowa,iscola; 5290 Mat *aloc; 5291 PetscBool match; 5292 5293 PetscFunctionBegin; 5294 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 5295 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5296 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 5297 if (!row) { 5298 start = A->rmap->rstart; end = A->rmap->rend; 5299 PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 5300 } else { 5301 isrowa = *row; 5302 } 5303 if (!col) { 5304 start = A->cmap->rstart; 5305 cmap = a->garray; 5306 nzA = a->A->cmap->n; 5307 nzB = a->B->cmap->n; 5308 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5309 ncols = 0; 5310 for (i=0; i<nzB; i++) { 5311 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5312 else break; 5313 } 5314 imark = i; 5315 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5316 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5317 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 5318 } else { 5319 iscola = *col; 5320 } 5321 if (scall != MAT_INITIAL_MATRIX) { 5322 PetscCall(PetscMalloc1(1,&aloc)); 5323 aloc[0] = *A_loc; 5324 } 5325 PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5326 if (!col) { /* attach global id of condensed columns */ 5327 PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5328 } 5329 *A_loc = aloc[0]; 5330 PetscCall(PetscFree(aloc)); 5331 if (!row) { 5332 PetscCall(ISDestroy(&isrowa)); 5333 } 5334 if (!col) { 5335 PetscCall(ISDestroy(&iscola)); 5336 } 5337 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 5338 PetscFunctionReturn(0); 5339 } 5340 5341 /* 5342 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5343 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5344 * on a global size. 5345 * */ 5346 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5347 { 5348 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5349 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5350 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5351 PetscMPIInt owner; 5352 PetscSFNode *iremote,*oiremote; 5353 const PetscInt *lrowindices; 5354 PetscSF sf,osf; 5355 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5356 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5357 MPI_Comm comm; 5358 ISLocalToGlobalMapping mapping; 5359 const PetscScalar *pd_a,*po_a; 5360 5361 PetscFunctionBegin; 5362 PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 5363 /* plocalsize is the number of roots 5364 * nrows is the number of leaves 5365 * */ 5366 PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 5367 PetscCall(ISGetLocalSize(rows,&nrows)); 5368 PetscCall(PetscCalloc1(nrows,&iremote)); 5369 PetscCall(ISGetIndices(rows,&lrowindices)); 5370 for (i=0;i<nrows;i++) { 5371 /* Find a remote index and an owner for a row 5372 * The row could be local or remote 5373 * */ 5374 owner = 0; 5375 lidx = 0; 5376 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 5377 iremote[i].index = lidx; 5378 iremote[i].rank = owner; 5379 } 5380 /* Create SF to communicate how many nonzero columns for each row */ 5381 PetscCall(PetscSFCreate(comm,&sf)); 5382 /* SF will figure out the number of nonzero colunms for each row, and their 5383 * offsets 5384 * */ 5385 PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5386 PetscCall(PetscSFSetFromOptions(sf)); 5387 PetscCall(PetscSFSetUp(sf)); 5388 5389 PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 5390 PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 5391 PetscCall(PetscCalloc1(nrows,&pnnz)); 5392 roffsets[0] = 0; 5393 roffsets[1] = 0; 5394 for (i=0;i<plocalsize;i++) { 5395 /* diag */ 5396 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5397 /* off diag */ 5398 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5399 /* compute offsets so that we relative location for each row */ 5400 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5401 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5402 } 5403 PetscCall(PetscCalloc1(2*nrows,&nlcols)); 5404 PetscCall(PetscCalloc1(2*nrows,&loffsets)); 5405 /* 'r' means root, and 'l' means leaf */ 5406 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5407 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5408 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5409 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5410 PetscCall(PetscSFDestroy(&sf)); 5411 PetscCall(PetscFree(roffsets)); 5412 PetscCall(PetscFree(nrcols)); 5413 dntotalcols = 0; 5414 ontotalcols = 0; 5415 ncol = 0; 5416 for (i=0;i<nrows;i++) { 5417 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5418 ncol = PetscMax(pnnz[i],ncol); 5419 /* diag */ 5420 dntotalcols += nlcols[i*2+0]; 5421 /* off diag */ 5422 ontotalcols += nlcols[i*2+1]; 5423 } 5424 /* We do not need to figure the right number of columns 5425 * since all the calculations will be done by going through the raw data 5426 * */ 5427 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 5428 PetscCall(MatSetUp(*P_oth)); 5429 PetscCall(PetscFree(pnnz)); 5430 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5431 /* diag */ 5432 PetscCall(PetscCalloc1(dntotalcols,&iremote)); 5433 /* off diag */ 5434 PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 5435 /* diag */ 5436 PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 5437 /* off diag */ 5438 PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 5439 dntotalcols = 0; 5440 ontotalcols = 0; 5441 ntotalcols = 0; 5442 for (i=0;i<nrows;i++) { 5443 owner = 0; 5444 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 5445 /* Set iremote for diag matrix */ 5446 for (j=0;j<nlcols[i*2+0];j++) { 5447 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5448 iremote[dntotalcols].rank = owner; 5449 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5450 ilocal[dntotalcols++] = ntotalcols++; 5451 } 5452 /* off diag */ 5453 for (j=0;j<nlcols[i*2+1];j++) { 5454 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5455 oiremote[ontotalcols].rank = owner; 5456 oilocal[ontotalcols++] = ntotalcols++; 5457 } 5458 } 5459 PetscCall(ISRestoreIndices(rows,&lrowindices)); 5460 PetscCall(PetscFree(loffsets)); 5461 PetscCall(PetscFree(nlcols)); 5462 PetscCall(PetscSFCreate(comm,&sf)); 5463 /* P serves as roots and P_oth is leaves 5464 * Diag matrix 5465 * */ 5466 PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5467 PetscCall(PetscSFSetFromOptions(sf)); 5468 PetscCall(PetscSFSetUp(sf)); 5469 5470 PetscCall(PetscSFCreate(comm,&osf)); 5471 /* Off diag */ 5472 PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 5473 PetscCall(PetscSFSetFromOptions(osf)); 5474 PetscCall(PetscSFSetUp(osf)); 5475 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5476 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5477 /* We operate on the matrix internal data for saving memory */ 5478 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5479 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5480 PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 5481 /* Convert to global indices for diag matrix */ 5482 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5483 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5484 /* We want P_oth store global indices */ 5485 PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 5486 /* Use memory scalable approach */ 5487 PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 5488 PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 5489 PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5490 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5491 /* Convert back to local indices */ 5492 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5493 PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5494 nout = 0; 5495 PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 5496 PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5497 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5498 /* Exchange values */ 5499 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5500 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5501 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5502 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5503 /* Stop PETSc from shrinking memory */ 5504 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5505 PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 5506 PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 5507 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5508 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 5509 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 5510 PetscCall(PetscSFDestroy(&sf)); 5511 PetscCall(PetscSFDestroy(&osf)); 5512 PetscFunctionReturn(0); 5513 } 5514 5515 /* 5516 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5517 * This supports MPIAIJ and MAIJ 5518 * */ 5519 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5520 { 5521 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5522 Mat_SeqAIJ *p_oth; 5523 IS rows,map; 5524 PetscHMapI hamp; 5525 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5526 MPI_Comm comm; 5527 PetscSF sf,osf; 5528 PetscBool has; 5529 5530 PetscFunctionBegin; 5531 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5532 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 5533 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5534 * and then create a submatrix (that often is an overlapping matrix) 5535 * */ 5536 if (reuse == MAT_INITIAL_MATRIX) { 5537 /* Use a hash table to figure out unique keys */ 5538 PetscCall(PetscHMapICreate(&hamp)); 5539 PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 5540 PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5541 count = 0; 5542 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5543 for (i=0;i<a->B->cmap->n;i++) { 5544 key = a->garray[i]/dof; 5545 PetscCall(PetscHMapIHas(hamp,key,&has)); 5546 if (!has) { 5547 mapping[i] = count; 5548 PetscCall(PetscHMapISet(hamp,key,count++)); 5549 } else { 5550 /* Current 'i' has the same value the previous step */ 5551 mapping[i] = count-1; 5552 } 5553 } 5554 PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 5555 PetscCall(PetscHMapIGetSize(hamp,&htsize)); 5556 PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5557 PetscCall(PetscCalloc1(htsize,&rowindices)); 5558 off = 0; 5559 PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 5560 PetscCall(PetscHMapIDestroy(&hamp)); 5561 PetscCall(PetscSortInt(htsize,rowindices)); 5562 PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 5563 /* In case, the matrix was already created but users want to recreate the matrix */ 5564 PetscCall(MatDestroy(P_oth)); 5565 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 5566 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 5567 PetscCall(ISDestroy(&map)); 5568 PetscCall(ISDestroy(&rows)); 5569 } else if (reuse == MAT_REUSE_MATRIX) { 5570 /* If matrix was already created, we simply update values using SF objects 5571 * that as attached to the matrix ealier. 5572 */ 5573 const PetscScalar *pd_a,*po_a; 5574 5575 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 5576 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 5577 PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5578 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5579 /* Update values in place */ 5580 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5581 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5582 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5583 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5584 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5585 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5586 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5587 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5588 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5589 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 5590 PetscFunctionReturn(0); 5591 } 5592 5593 /*@C 5594 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5595 5596 Collective on Mat 5597 5598 Input Parameters: 5599 + A - the first matrix in mpiaij format 5600 . B - the second matrix in mpiaij format 5601 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5602 5603 Output Parameters: 5604 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5605 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5606 - B_seq - the sequential matrix generated 5607 5608 Level: developer 5609 5610 @*/ 5611 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5612 { 5613 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5614 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5615 IS isrowb,iscolb; 5616 Mat *bseq=NULL; 5617 5618 PetscFunctionBegin; 5619 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5620 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5621 } 5622 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 5623 5624 if (scall == MAT_INITIAL_MATRIX) { 5625 start = A->cmap->rstart; 5626 cmap = a->garray; 5627 nzA = a->A->cmap->n; 5628 nzB = a->B->cmap->n; 5629 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5630 ncols = 0; 5631 for (i=0; i<nzB; i++) { /* row < local row index */ 5632 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5633 else break; 5634 } 5635 imark = i; 5636 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5637 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5638 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 5639 PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 5640 } else { 5641 PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5642 isrowb = *rowb; iscolb = *colb; 5643 PetscCall(PetscMalloc1(1,&bseq)); 5644 bseq[0] = *B_seq; 5645 } 5646 PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 5647 *B_seq = bseq[0]; 5648 PetscCall(PetscFree(bseq)); 5649 if (!rowb) { 5650 PetscCall(ISDestroy(&isrowb)); 5651 } else { 5652 *rowb = isrowb; 5653 } 5654 if (!colb) { 5655 PetscCall(ISDestroy(&iscolb)); 5656 } else { 5657 *colb = iscolb; 5658 } 5659 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 5660 PetscFunctionReturn(0); 5661 } 5662 5663 /* 5664 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5665 of the OFF-DIAGONAL portion of local A 5666 5667 Collective on Mat 5668 5669 Input Parameters: 5670 + A,B - the matrices in mpiaij format 5671 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5672 5673 Output Parameter: 5674 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5675 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5676 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5677 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5678 5679 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5680 for this matrix. This is not desirable.. 5681 5682 Level: developer 5683 5684 */ 5685 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5686 { 5687 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5688 Mat_SeqAIJ *b_oth; 5689 VecScatter ctx; 5690 MPI_Comm comm; 5691 const PetscMPIInt *rprocs,*sprocs; 5692 const PetscInt *srow,*rstarts,*sstarts; 5693 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5694 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5695 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5696 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5697 PetscMPIInt size,tag,rank,nreqs; 5698 5699 PetscFunctionBegin; 5700 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5701 PetscCallMPI(MPI_Comm_size(comm,&size)); 5702 5703 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5704 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5705 } 5706 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 5707 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5708 5709 if (size == 1) { 5710 startsj_s = NULL; 5711 bufa_ptr = NULL; 5712 *B_oth = NULL; 5713 PetscFunctionReturn(0); 5714 } 5715 5716 ctx = a->Mvctx; 5717 tag = ((PetscObject)ctx)->tag; 5718 5719 PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5720 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5721 PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 5722 PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 5723 PetscCall(PetscMalloc1(nreqs,&reqs)); 5724 rwaits = reqs; 5725 swaits = reqs + nrecvs; 5726 5727 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5728 if (scall == MAT_INITIAL_MATRIX) { 5729 /* i-array */ 5730 /*---------*/ 5731 /* post receives */ 5732 if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5733 for (i=0; i<nrecvs; i++) { 5734 rowlen = rvalues + rstarts[i]*rbs; 5735 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5736 PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5737 } 5738 5739 /* pack the outgoing message */ 5740 PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 5741 5742 sstartsj[0] = 0; 5743 rstartsj[0] = 0; 5744 len = 0; /* total length of j or a array to be sent */ 5745 if (nsends) { 5746 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5747 PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 5748 } 5749 for (i=0; i<nsends; i++) { 5750 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5751 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5752 for (j=0; j<nrows; j++) { 5753 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5754 for (l=0; l<sbs; l++) { 5755 PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 5756 5757 rowlen[j*sbs+l] = ncols; 5758 5759 len += ncols; 5760 PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5761 } 5762 k++; 5763 } 5764 PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5765 5766 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5767 } 5768 /* recvs and sends of i-array are completed */ 5769 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5770 PetscCall(PetscFree(svalues)); 5771 5772 /* allocate buffers for sending j and a arrays */ 5773 PetscCall(PetscMalloc1(len+1,&bufj)); 5774 PetscCall(PetscMalloc1(len+1,&bufa)); 5775 5776 /* create i-array of B_oth */ 5777 PetscCall(PetscMalloc1(aBn+2,&b_othi)); 5778 5779 b_othi[0] = 0; 5780 len = 0; /* total length of j or a array to be received */ 5781 k = 0; 5782 for (i=0; i<nrecvs; i++) { 5783 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5784 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5785 for (j=0; j<nrows; j++) { 5786 b_othi[k+1] = b_othi[k] + rowlen[j]; 5787 PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5788 k++; 5789 } 5790 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5791 } 5792 PetscCall(PetscFree(rvalues)); 5793 5794 /* allocate space for j and a arrays of B_oth */ 5795 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 5796 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5797 5798 /* j-array */ 5799 /*---------*/ 5800 /* post receives of j-array */ 5801 for (i=0; i<nrecvs; i++) { 5802 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5803 PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5804 } 5805 5806 /* pack the outgoing message j-array */ 5807 if (nsends) k = sstarts[0]; 5808 for (i=0; i<nsends; i++) { 5809 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5810 bufJ = bufj+sstartsj[i]; 5811 for (j=0; j<nrows; j++) { 5812 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5813 for (ll=0; ll<sbs; ll++) { 5814 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5815 for (l=0; l<ncols; l++) { 5816 *bufJ++ = cols[l]; 5817 } 5818 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5819 } 5820 } 5821 PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5822 } 5823 5824 /* recvs and sends of j-array are completed */ 5825 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5826 } else if (scall == MAT_REUSE_MATRIX) { 5827 sstartsj = *startsj_s; 5828 rstartsj = *startsj_r; 5829 bufa = *bufa_ptr; 5830 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5831 PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5832 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5833 5834 /* a-array */ 5835 /*---------*/ 5836 /* post receives of a-array */ 5837 for (i=0; i<nrecvs; i++) { 5838 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5839 PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 5840 } 5841 5842 /* pack the outgoing message a-array */ 5843 if (nsends) k = sstarts[0]; 5844 for (i=0; i<nsends; i++) { 5845 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5846 bufA = bufa+sstartsj[i]; 5847 for (j=0; j<nrows; j++) { 5848 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5849 for (ll=0; ll<sbs; ll++) { 5850 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5851 for (l=0; l<ncols; l++) { 5852 *bufA++ = vals[l]; 5853 } 5854 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5855 } 5856 } 5857 PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5858 } 5859 /* recvs and sends of a-array are completed */ 5860 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5861 PetscCall(PetscFree(reqs)); 5862 5863 if (scall == MAT_INITIAL_MATRIX) { 5864 /* put together the new matrix */ 5865 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5866 5867 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5868 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5869 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5870 b_oth->free_a = PETSC_TRUE; 5871 b_oth->free_ij = PETSC_TRUE; 5872 b_oth->nonew = 0; 5873 5874 PetscCall(PetscFree(bufj)); 5875 if (!startsj_s || !bufa_ptr) { 5876 PetscCall(PetscFree2(sstartsj,rstartsj)); 5877 PetscCall(PetscFree(bufa_ptr)); 5878 } else { 5879 *startsj_s = sstartsj; 5880 *startsj_r = rstartsj; 5881 *bufa_ptr = bufa; 5882 } 5883 } else if (scall == MAT_REUSE_MATRIX) { 5884 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5885 } 5886 5887 PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5888 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 5889 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5890 PetscFunctionReturn(0); 5891 } 5892 5893 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5894 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5895 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5896 #if defined(PETSC_HAVE_MKL_SPARSE) 5897 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5898 #endif 5899 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5900 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5901 #if defined(PETSC_HAVE_ELEMENTAL) 5902 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5903 #endif 5904 #if defined(PETSC_HAVE_SCALAPACK) 5905 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5906 #endif 5907 #if defined(PETSC_HAVE_HYPRE) 5908 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5909 #endif 5910 #if defined(PETSC_HAVE_CUDA) 5911 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5912 #endif 5913 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5914 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5915 #endif 5916 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5917 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5918 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5919 5920 /* 5921 Computes (B'*A')' since computing B*A directly is untenable 5922 5923 n p p 5924 [ ] [ ] [ ] 5925 m [ A ] * n [ B ] = m [ C ] 5926 [ ] [ ] [ ] 5927 5928 */ 5929 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5930 { 5931 Mat At,Bt,Ct; 5932 5933 PetscFunctionBegin; 5934 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 5935 PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 5936 PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 5937 PetscCall(MatDestroy(&At)); 5938 PetscCall(MatDestroy(&Bt)); 5939 PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 5940 PetscCall(MatDestroy(&Ct)); 5941 PetscFunctionReturn(0); 5942 } 5943 5944 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5945 { 5946 PetscBool cisdense; 5947 5948 PetscFunctionBegin; 5949 PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 5950 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 5951 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 5952 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 5953 if (!cisdense) { 5954 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 5955 } 5956 PetscCall(MatSetUp(C)); 5957 5958 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5959 PetscFunctionReturn(0); 5960 } 5961 5962 /* ----------------------------------------------------------------*/ 5963 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5964 { 5965 Mat_Product *product = C->product; 5966 Mat A = product->A,B=product->B; 5967 5968 PetscFunctionBegin; 5969 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5970 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5971 5972 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5973 C->ops->productsymbolic = MatProductSymbolic_AB; 5974 PetscFunctionReturn(0); 5975 } 5976 5977 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5978 { 5979 Mat_Product *product = C->product; 5980 5981 PetscFunctionBegin; 5982 if (product->type == MATPRODUCT_AB) { 5983 PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 5984 } 5985 PetscFunctionReturn(0); 5986 } 5987 5988 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 5989 5990 Input Parameters: 5991 5992 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 5993 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 5994 5995 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 5996 5997 For Set1, j1[] contains column indices of the nonzeros. 5998 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 5999 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6000 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6001 6002 Similar for Set2. 6003 6004 This routine merges the two sets of nonzeros row by row and removes repeats. 6005 6006 Output Parameters: (memory is allocated by the caller) 6007 6008 i[],j[]: the CSR of the merged matrix, which has m rows. 6009 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6010 imap2[]: similar to imap1[], but for Set2. 6011 Note we order nonzeros row-by-row and from left to right. 6012 */ 6013 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 6014 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 6015 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 6016 { 6017 PetscInt r,m; /* Row index of mat */ 6018 PetscCount t,t1,t2,b1,e1,b2,e2; 6019 6020 PetscFunctionBegin; 6021 PetscCall(MatGetLocalSize(mat,&m,NULL)); 6022 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6023 i[0] = 0; 6024 for (r=0; r<m; r++) { /* Do row by row merging */ 6025 b1 = rowBegin1[r]; 6026 e1 = rowEnd1[r]; 6027 b2 = rowBegin2[r]; 6028 e2 = rowEnd2[r]; 6029 while (b1 < e1 && b2 < e2) { 6030 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6031 j[t] = j1[b1]; 6032 imap1[t1] = t; 6033 imap2[t2] = t; 6034 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6035 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6036 t1++; t2++; t++; 6037 } else if (j1[b1] < j2[b2]) { 6038 j[t] = j1[b1]; 6039 imap1[t1] = t; 6040 b1 += jmap1[t1+1] - jmap1[t1]; 6041 t1++; t++; 6042 } else { 6043 j[t] = j2[b2]; 6044 imap2[t2] = t; 6045 b2 += jmap2[t2+1] - jmap2[t2]; 6046 t2++; t++; 6047 } 6048 } 6049 /* Merge the remaining in either j1[] or j2[] */ 6050 while (b1 < e1) { 6051 j[t] = j1[b1]; 6052 imap1[t1] = t; 6053 b1 += jmap1[t1+1] - jmap1[t1]; 6054 t1++; t++; 6055 } 6056 while (b2 < e2) { 6057 j[t] = j2[b2]; 6058 imap2[t2] = t; 6059 b2 += jmap2[t2+1] - jmap2[t2]; 6060 t2++; t++; 6061 } 6062 i[r+1] = t; 6063 } 6064 PetscFunctionReturn(0); 6065 } 6066 6067 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6068 6069 Input Parameters: 6070 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6071 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6072 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6073 6074 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6075 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6076 6077 Output Parameters: 6078 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6079 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6080 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6081 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6082 6083 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6084 Atot: number of entries belonging to the diagonal block. 6085 Annz: number of unique nonzeros belonging to the diagonal block. 6086 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6087 repeats (i.e., same 'i,j' pair). 6088 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6089 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6090 6091 Atot: number of entries belonging to the diagonal block 6092 Annz: number of unique nonzeros belonging to the diagonal block. 6093 6094 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6095 6096 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6097 */ 6098 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6099 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6100 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6101 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6102 { 6103 PetscInt cstart,cend,rstart,rend,row,col; 6104 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6105 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6106 PetscCount k,m,p,q,r,s,mid; 6107 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6108 6109 PetscFunctionBegin; 6110 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6111 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6112 m = rend - rstart; 6113 6114 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6115 6116 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6117 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6118 */ 6119 while (k<n) { 6120 row = i[k]; 6121 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6122 for (s=k; s<n; s++) if (i[s] != row) break; 6123 for (p=k; p<s; p++) { 6124 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6125 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6126 } 6127 PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6128 PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6129 rowBegin[row-rstart] = k; 6130 rowMid[row-rstart] = mid; 6131 rowEnd[row-rstart] = s; 6132 6133 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6134 Atot += mid - k; 6135 Btot += s - mid; 6136 6137 /* Count unique nonzeros of this diag/offdiag row */ 6138 for (p=k; p<mid;) { 6139 col = j[p]; 6140 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6141 Annz++; 6142 } 6143 6144 for (p=mid; p<s;) { 6145 col = j[p]; 6146 do {p++;} while (p<s && j[p] == col); 6147 Bnnz++; 6148 } 6149 k = s; 6150 } 6151 6152 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6153 PetscCall(PetscMalloc1(Atot,&Aperm)); 6154 PetscCall(PetscMalloc1(Btot,&Bperm)); 6155 PetscCall(PetscMalloc1(Annz+1,&Ajmap)); 6156 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap)); 6157 6158 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6159 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6160 for (r=0; r<m; r++) { 6161 k = rowBegin[r]; 6162 mid = rowMid[r]; 6163 s = rowEnd[r]; 6164 PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 6165 PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6166 Atot += mid - k; 6167 Btot += s - mid; 6168 6169 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6170 for (p=k; p<mid;) { 6171 col = j[p]; 6172 q = p; 6173 do {p++;} while (p<mid && j[p] == col); 6174 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6175 Annz++; 6176 } 6177 6178 for (p=mid; p<s;) { 6179 col = j[p]; 6180 q = p; 6181 do {p++;} while (p<s && j[p] == col); 6182 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6183 Bnnz++; 6184 } 6185 } 6186 /* Output */ 6187 *Aperm_ = Aperm; 6188 *Annz_ = Annz; 6189 *Atot_ = Atot; 6190 *Ajmap_ = Ajmap; 6191 *Bperm_ = Bperm; 6192 *Bnnz_ = Bnnz; 6193 *Btot_ = Btot; 6194 *Bjmap_ = Bjmap; 6195 PetscFunctionReturn(0); 6196 } 6197 6198 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6199 6200 Input Parameters: 6201 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6202 nnz: number of unique nonzeros in the merged matrix 6203 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6204 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6205 6206 Output Parameter: (memory is allocated by the caller) 6207 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6208 6209 Example: 6210 nnz1 = 4 6211 nnz = 6 6212 imap = [1,3,4,5] 6213 jmap = [0,3,5,6,7] 6214 then, 6215 jmap_new = [0,0,3,3,5,6,7] 6216 */ 6217 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[]) 6218 { 6219 PetscCount k,p; 6220 6221 PetscFunctionBegin; 6222 jmap_new[0] = 0; 6223 p = nnz; /* p loops over jmap_new[] backwards */ 6224 for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */ 6225 for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1]; 6226 } 6227 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6228 PetscFunctionReturn(0); 6229 } 6230 6231 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6232 { 6233 MPI_Comm comm; 6234 PetscMPIInt rank,size; 6235 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6236 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6237 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6238 6239 PetscFunctionBegin; 6240 PetscCall(PetscFree(mpiaij->garray)); 6241 PetscCall(VecDestroy(&mpiaij->lvec)); 6242 #if defined(PETSC_USE_CTABLE) 6243 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6244 #else 6245 PetscCall(PetscFree(mpiaij->colmap)); 6246 #endif 6247 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6248 mat->assembled = PETSC_FALSE; 6249 mat->was_assembled = PETSC_FALSE; 6250 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6251 6252 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 6253 PetscCallMPI(MPI_Comm_size(comm,&size)); 6254 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 6255 PetscCall(PetscLayoutSetUp(mat->rmap)); 6256 PetscCall(PetscLayoutSetUp(mat->cmap)); 6257 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6258 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6259 PetscCall(MatGetLocalSize(mat,&m,&n)); 6260 PetscCall(MatGetSize(mat,&M,&N)); 6261 6262 /* ---------------------------------------------------------------------------*/ 6263 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6264 /* entries come first, then local rows, then remote rows. */ 6265 /* ---------------------------------------------------------------------------*/ 6266 PetscCount n1 = coo_n,*perm1; 6267 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6268 PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1)); 6269 PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */ 6270 PetscCall(PetscArraycpy(j1,coo_j,n1)); 6271 for (k=0; k<n1; k++) perm1[k] = k; 6272 6273 /* Manipulate indices so that entries with negative row or col indices will have smallest 6274 row indices, local entries will have greater but negative row indices, and remote entries 6275 will have positive row indices. 6276 */ 6277 for (k=0; k<n1; k++) { 6278 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6279 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6280 else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6281 else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6282 } 6283 6284 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6285 PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6286 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6287 PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6288 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6289 6290 /* ---------------------------------------------------------------------------*/ 6291 /* Split local rows into diag/offdiag portions */ 6292 /* ---------------------------------------------------------------------------*/ 6293 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6294 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6295 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6296 6297 PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 6298 PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 6299 PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6300 6301 /* ---------------------------------------------------------------------------*/ 6302 /* Send remote rows to their owner */ 6303 /* ---------------------------------------------------------------------------*/ 6304 /* Find which rows should be sent to which remote ranks*/ 6305 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6306 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6307 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6308 const PetscInt *ranges; 6309 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6310 6311 PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 6312 PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6313 for (k=rem; k<n1;) { 6314 PetscMPIInt owner; 6315 PetscInt firstRow,lastRow; 6316 6317 /* Locate a row range */ 6318 firstRow = i1[k]; /* first row of this owner */ 6319 PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6320 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6321 6322 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6323 PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6324 6325 /* All entries in [k,p) belong to this remote owner */ 6326 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6327 PetscMPIInt *sendto2; 6328 PetscInt *nentries2; 6329 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6330 6331 PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 6332 PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 6333 PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 6334 PetscCall(PetscFree2(sendto,nentries2)); 6335 sendto = sendto2; 6336 nentries = nentries2; 6337 maxNsend = maxNsend2; 6338 } 6339 sendto[nsend] = owner; 6340 nentries[nsend] = p - k; 6341 PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6342 nsend++; 6343 k = p; 6344 } 6345 6346 /* Build 1st SF to know offsets on remote to send data */ 6347 PetscSF sf1; 6348 PetscInt nroots = 1,nroots2 = 0; 6349 PetscInt nleaves = nsend,nleaves2 = 0; 6350 PetscInt *offsets; 6351 PetscSFNode *iremote; 6352 6353 PetscCall(PetscSFCreate(comm,&sf1)); 6354 PetscCall(PetscMalloc1(nsend,&iremote)); 6355 PetscCall(PetscMalloc1(nsend,&offsets)); 6356 for (k=0; k<nsend; k++) { 6357 iremote[k].rank = sendto[k]; 6358 iremote[k].index = 0; 6359 nleaves2 += nentries[k]; 6360 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6361 } 6362 PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6363 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 6364 PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6365 PetscCall(PetscSFDestroy(&sf1)); 6366 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem); 6367 6368 /* Build 2nd SF to send remote COOs to their owner */ 6369 PetscSF sf2; 6370 nroots = nroots2; 6371 nleaves = nleaves2; 6372 PetscCall(PetscSFCreate(comm,&sf2)); 6373 PetscCall(PetscSFSetFromOptions(sf2)); 6374 PetscCall(PetscMalloc1(nleaves,&iremote)); 6375 p = 0; 6376 for (k=0; k<nsend; k++) { 6377 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6378 for (q=0; q<nentries[k]; q++,p++) { 6379 iremote[p].rank = sendto[k]; 6380 iremote[p].index = offsets[k] + q; 6381 } 6382 } 6383 PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6384 6385 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6386 PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6387 6388 /* Send the remote COOs to their owner */ 6389 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6390 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6391 PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 6392 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 6393 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 6394 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 6395 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6396 6397 PetscCall(PetscFree(offsets)); 6398 PetscCall(PetscFree2(sendto,nentries)); 6399 6400 /* ---------------------------------------------------------------*/ 6401 /* Sort received COOs by row along with the permutation array */ 6402 /* ---------------------------------------------------------------*/ 6403 for (k=0; k<n2; k++) perm2[k] = k; 6404 PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6405 6406 /* ---------------------------------------------------------------*/ 6407 /* Split received COOs into diag/offdiag portions */ 6408 /* ---------------------------------------------------------------*/ 6409 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6410 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6411 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6412 6413 PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 6414 PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6415 6416 /* --------------------------------------------------------------------------*/ 6417 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6418 /* --------------------------------------------------------------------------*/ 6419 PetscInt *Ai,*Bi; 6420 PetscInt *Aj,*Bj; 6421 6422 PetscCall(PetscMalloc1(m+1,&Ai)); 6423 PetscCall(PetscMalloc1(m+1,&Bi)); 6424 PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6425 PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6426 6427 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6428 PetscCall(PetscMalloc1(Annz1,&Aimap1)); 6429 PetscCall(PetscMalloc1(Bnnz1,&Bimap1)); 6430 PetscCall(PetscMalloc1(Annz2,&Aimap2)); 6431 PetscCall(PetscMalloc1(Bnnz2,&Bimap2)); 6432 6433 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 6434 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6435 6436 /* --------------------------------------------------------------------------*/ 6437 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6438 /* expect nonzeros in A/B most likely have local contributing entries */ 6439 /* --------------------------------------------------------------------------*/ 6440 PetscInt Annz = Ai[m]; 6441 PetscInt Bnnz = Bi[m]; 6442 PetscCount *Ajmap1_new,*Bjmap1_new; 6443 6444 PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new)); 6445 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new)); 6446 6447 PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new)); 6448 PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new)); 6449 6450 PetscCall(PetscFree(Aimap1)); 6451 PetscCall(PetscFree(Ajmap1)); 6452 PetscCall(PetscFree(Bimap1)); 6453 PetscCall(PetscFree(Bjmap1)); 6454 PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 6455 PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 6456 PetscCall(PetscFree3(i1,j1,perm1)); 6457 PetscCall(PetscFree3(i2,j2,perm2)); 6458 6459 Ajmap1 = Ajmap1_new; 6460 Bjmap1 = Bjmap1_new; 6461 6462 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6463 if (Annz < Annz1 + Annz2) { 6464 PetscInt *Aj_new; 6465 PetscCall(PetscMalloc1(Annz,&Aj_new)); 6466 PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 6467 PetscCall(PetscFree(Aj)); 6468 Aj = Aj_new; 6469 } 6470 6471 if (Bnnz < Bnnz1 + Bnnz2) { 6472 PetscInt *Bj_new; 6473 PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 6474 PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 6475 PetscCall(PetscFree(Bj)); 6476 Bj = Bj_new; 6477 } 6478 6479 /* --------------------------------------------------------------------------------*/ 6480 /* Create new submatrices for on-process and off-process coupling */ 6481 /* --------------------------------------------------------------------------------*/ 6482 PetscScalar *Aa,*Ba; 6483 MatType rtype; 6484 Mat_SeqAIJ *a,*b; 6485 PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 6486 PetscCall(PetscCalloc1(Bnnz,&Ba)); 6487 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6488 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6489 PetscCall(MatDestroy(&mpiaij->A)); 6490 PetscCall(MatDestroy(&mpiaij->B)); 6491 PetscCall(MatGetRootType_Private(mat,&rtype)); 6492 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 6493 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 6494 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6495 6496 a = (Mat_SeqAIJ*)mpiaij->A->data; 6497 b = (Mat_SeqAIJ*)mpiaij->B->data; 6498 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6499 a->free_a = b->free_a = PETSC_TRUE; 6500 a->free_ij = b->free_ij = PETSC_TRUE; 6501 6502 /* conversion must happen AFTER multiply setup */ 6503 PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 6504 PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 6505 PetscCall(VecDestroy(&mpiaij->lvec)); 6506 PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 6507 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6508 6509 mpiaij->coo_n = coo_n; 6510 mpiaij->coo_sf = sf2; 6511 mpiaij->sendlen = nleaves; 6512 mpiaij->recvlen = nroots; 6513 6514 mpiaij->Annz = Annz; 6515 mpiaij->Bnnz = Bnnz; 6516 6517 mpiaij->Annz2 = Annz2; 6518 mpiaij->Bnnz2 = Bnnz2; 6519 6520 mpiaij->Atot1 = Atot1; 6521 mpiaij->Atot2 = Atot2; 6522 mpiaij->Btot1 = Btot1; 6523 mpiaij->Btot2 = Btot2; 6524 6525 mpiaij->Ajmap1 = Ajmap1; 6526 mpiaij->Aperm1 = Aperm1; 6527 6528 mpiaij->Bjmap1 = Bjmap1; 6529 mpiaij->Bperm1 = Bperm1; 6530 6531 mpiaij->Aimap2 = Aimap2; 6532 mpiaij->Ajmap2 = Ajmap2; 6533 mpiaij->Aperm2 = Aperm2; 6534 6535 mpiaij->Bimap2 = Bimap2; 6536 mpiaij->Bjmap2 = Bjmap2; 6537 mpiaij->Bperm2 = Bperm2; 6538 6539 mpiaij->Cperm1 = Cperm1; 6540 6541 /* Allocate in preallocation. If not used, it has zero cost on host */ 6542 PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6543 PetscFunctionReturn(0); 6544 } 6545 6546 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6547 { 6548 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6549 Mat A = mpiaij->A,B = mpiaij->B; 6550 PetscCount Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2; 6551 PetscScalar *Aa,*Ba; 6552 PetscScalar *sendbuf = mpiaij->sendbuf; 6553 PetscScalar *recvbuf = mpiaij->recvbuf; 6554 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2; 6555 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2; 6556 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6557 const PetscCount *Cperm1 = mpiaij->Cperm1; 6558 6559 PetscFunctionBegin; 6560 PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 6561 PetscCall(MatSeqAIJGetArray(B,&Ba)); 6562 6563 /* Pack entries to be sent to remote */ 6564 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6565 6566 /* Send remote entries to their owner and overlap the communication with local computation */ 6567 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6568 /* Add local entries to A and B */ 6569 for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6570 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6571 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]]; 6572 Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum; 6573 } 6574 for (PetscCount i=0; i<Bnnz; i++) { 6575 PetscScalar sum = 0.0; 6576 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]]; 6577 Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum; 6578 } 6579 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6580 6581 /* Add received remote entries to A and B */ 6582 for (PetscCount i=0; i<Annz2; i++) { 6583 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6584 } 6585 for (PetscCount i=0; i<Bnnz2; i++) { 6586 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6587 } 6588 PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 6589 PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6590 PetscFunctionReturn(0); 6591 } 6592 6593 /* ----------------------------------------------------------------*/ 6594 6595 /*MC 6596 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6597 6598 Options Database Keys: 6599 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6600 6601 Level: beginner 6602 6603 Notes: 6604 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6605 in this case the values associated with the rows and columns one passes in are set to zero 6606 in the matrix 6607 6608 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6609 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6610 6611 .seealso: `MatCreateAIJ()` 6612 M*/ 6613 6614 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6615 { 6616 Mat_MPIAIJ *b; 6617 PetscMPIInt size; 6618 6619 PetscFunctionBegin; 6620 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 6621 6622 PetscCall(PetscNewLog(B,&b)); 6623 B->data = (void*)b; 6624 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6625 B->assembled = PETSC_FALSE; 6626 B->insertmode = NOT_SET_VALUES; 6627 b->size = size; 6628 6629 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6630 6631 /* build cache for off array entries formed */ 6632 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 6633 6634 b->donotstash = PETSC_FALSE; 6635 b->colmap = NULL; 6636 b->garray = NULL; 6637 b->roworiented = PETSC_TRUE; 6638 6639 /* stuff used for matrix vector multiply */ 6640 b->lvec = NULL; 6641 b->Mvctx = NULL; 6642 6643 /* stuff for MatGetRow() */ 6644 b->rowindices = NULL; 6645 b->rowvalues = NULL; 6646 b->getrowactive = PETSC_FALSE; 6647 6648 /* flexible pointer used in CUSPARSE classes */ 6649 b->spptr = NULL; 6650 6651 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6652 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 6653 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 6654 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 6655 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 6656 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 6657 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6658 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 6659 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 6660 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 6661 #if defined(PETSC_HAVE_CUDA) 6662 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6663 #endif 6664 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6665 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 6666 #endif 6667 #if defined(PETSC_HAVE_MKL_SPARSE) 6668 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6669 #endif 6670 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 6671 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 6672 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 6673 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 6674 #if defined(PETSC_HAVE_ELEMENTAL) 6675 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 6676 #endif 6677 #if defined(PETSC_HAVE_SCALAPACK) 6678 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6679 #endif 6680 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 6681 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 6682 #if defined(PETSC_HAVE_HYPRE) 6683 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 6684 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6685 #endif 6686 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 6687 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 6688 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 6689 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 6690 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6691 PetscFunctionReturn(0); 6692 } 6693 6694 /*@C 6695 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6696 and "off-diagonal" part of the matrix in CSR format. 6697 6698 Collective 6699 6700 Input Parameters: 6701 + comm - MPI communicator 6702 . m - number of local rows (Cannot be PETSC_DECIDE) 6703 . n - This value should be the same as the local size used in creating the 6704 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6705 calculated if N is given) For square matrices n is almost always m. 6706 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6707 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6708 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6709 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6710 . a - matrix values 6711 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6712 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6713 - oa - matrix values 6714 6715 Output Parameter: 6716 . mat - the matrix 6717 6718 Level: advanced 6719 6720 Notes: 6721 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6722 must free the arrays once the matrix has been destroyed and not before. 6723 6724 The i and j indices are 0 based 6725 6726 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6727 6728 This sets local rows and cannot be used to set off-processor values. 6729 6730 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6731 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6732 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6733 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6734 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6735 communication if it is known that only local entries will be set. 6736 6737 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6738 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6739 @*/ 6740 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6741 { 6742 Mat_MPIAIJ *maij; 6743 6744 PetscFunctionBegin; 6745 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6746 PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6747 PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6748 PetscCall(MatCreate(comm,mat)); 6749 PetscCall(MatSetSizes(*mat,m,n,M,N)); 6750 PetscCall(MatSetType(*mat,MATMPIAIJ)); 6751 maij = (Mat_MPIAIJ*) (*mat)->data; 6752 6753 (*mat)->preallocated = PETSC_TRUE; 6754 6755 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6756 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6757 6758 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 6759 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 6760 6761 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 6762 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 6763 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 6764 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 6765 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 6766 PetscFunctionReturn(0); 6767 } 6768 6769 typedef struct { 6770 Mat *mp; /* intermediate products */ 6771 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6772 PetscInt cp; /* number of intermediate products */ 6773 6774 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6775 PetscInt *startsj_s,*startsj_r; 6776 PetscScalar *bufa; 6777 Mat P_oth; 6778 6779 /* may take advantage of merging product->B */ 6780 Mat Bloc; /* B-local by merging diag and off-diag */ 6781 6782 /* cusparse does not have support to split between symbolic and numeric phases. 6783 When api_user is true, we don't need to update the numerical values 6784 of the temporary storage */ 6785 PetscBool reusesym; 6786 6787 /* support for COO values insertion */ 6788 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6789 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6790 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6791 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6792 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6793 PetscMemType mtype; 6794 6795 /* customization */ 6796 PetscBool abmerge; 6797 PetscBool P_oth_bind; 6798 } MatMatMPIAIJBACKEND; 6799 6800 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6801 { 6802 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6803 PetscInt i; 6804 6805 PetscFunctionBegin; 6806 PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 6807 PetscCall(PetscFree(mmdata->bufa)); 6808 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 6809 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 6810 PetscCall(MatDestroy(&mmdata->P_oth)); 6811 PetscCall(MatDestroy(&mmdata->Bloc)); 6812 PetscCall(PetscSFDestroy(&mmdata->sf)); 6813 for (i = 0; i < mmdata->cp; i++) { 6814 PetscCall(MatDestroy(&mmdata->mp[i])); 6815 } 6816 PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 6817 PetscCall(PetscFree(mmdata->own[0])); 6818 PetscCall(PetscFree(mmdata->own)); 6819 PetscCall(PetscFree(mmdata->off[0])); 6820 PetscCall(PetscFree(mmdata->off)); 6821 PetscCall(PetscFree(mmdata)); 6822 PetscFunctionReturn(0); 6823 } 6824 6825 /* Copy selected n entries with indices in idx[] of A to v[]. 6826 If idx is NULL, copy the whole data array of A to v[] 6827 */ 6828 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6829 { 6830 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6831 6832 PetscFunctionBegin; 6833 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6834 if (f) { 6835 PetscCall((*f)(A,n,idx,v)); 6836 } else { 6837 const PetscScalar *vv; 6838 6839 PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6840 if (n && idx) { 6841 PetscScalar *w = v; 6842 const PetscInt *oi = idx; 6843 PetscInt j; 6844 6845 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6846 } else { 6847 PetscCall(PetscArraycpy(v,vv,n)); 6848 } 6849 PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6850 } 6851 PetscFunctionReturn(0); 6852 } 6853 6854 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6855 { 6856 MatMatMPIAIJBACKEND *mmdata; 6857 PetscInt i,n_d,n_o; 6858 6859 PetscFunctionBegin; 6860 MatCheckProduct(C,1); 6861 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6862 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6863 if (!mmdata->reusesym) { /* update temporary matrices */ 6864 if (mmdata->P_oth) { 6865 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6866 } 6867 if (mmdata->Bloc) { 6868 PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 6869 } 6870 } 6871 mmdata->reusesym = PETSC_FALSE; 6872 6873 for (i = 0; i < mmdata->cp; i++) { 6874 PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6875 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6876 } 6877 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6878 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6879 6880 if (mmdata->mptmp[i]) continue; 6881 if (noff) { 6882 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6883 6884 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 6885 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 6886 n_o += noff; 6887 n_d += nown; 6888 } else { 6889 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6890 6891 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 6892 n_d += mm->nz; 6893 } 6894 } 6895 if (mmdata->hasoffproc) { /* offprocess insertion */ 6896 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6897 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6898 } 6899 PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 6900 PetscFunctionReturn(0); 6901 } 6902 6903 /* Support for Pt * A, A * P, or Pt * A * P */ 6904 #define MAX_NUMBER_INTERMEDIATE 4 6905 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6906 { 6907 Mat_Product *product = C->product; 6908 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6909 Mat_MPIAIJ *a,*p; 6910 MatMatMPIAIJBACKEND *mmdata; 6911 ISLocalToGlobalMapping P_oth_l2g = NULL; 6912 IS glob = NULL; 6913 const char *prefix; 6914 char pprefix[256]; 6915 const PetscInt *globidx,*P_oth_idx; 6916 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 6917 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 6918 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6919 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6920 /* a base offset; type-2: sparse with a local to global map table */ 6921 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6922 6923 MatProductType ptype; 6924 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6925 PetscMPIInt size; 6926 6927 PetscFunctionBegin; 6928 MatCheckProduct(C,1); 6929 PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6930 ptype = product->type; 6931 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6932 ptype = MATPRODUCT_AB; 6933 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6934 } 6935 switch (ptype) { 6936 case MATPRODUCT_AB: 6937 A = product->A; 6938 P = product->B; 6939 m = A->rmap->n; 6940 n = P->cmap->n; 6941 M = A->rmap->N; 6942 N = P->cmap->N; 6943 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6944 break; 6945 case MATPRODUCT_AtB: 6946 P = product->A; 6947 A = product->B; 6948 m = P->cmap->n; 6949 n = A->cmap->n; 6950 M = P->cmap->N; 6951 N = A->cmap->N; 6952 hasoffproc = PETSC_TRUE; 6953 break; 6954 case MATPRODUCT_PtAP: 6955 A = product->A; 6956 P = product->B; 6957 m = P->cmap->n; 6958 n = P->cmap->n; 6959 M = P->cmap->N; 6960 N = P->cmap->N; 6961 hasoffproc = PETSC_TRUE; 6962 break; 6963 default: 6964 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6965 } 6966 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 6967 if (size == 1) hasoffproc = PETSC_FALSE; 6968 6969 /* defaults */ 6970 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6971 mp[i] = NULL; 6972 mptmp[i] = PETSC_FALSE; 6973 rmapt[i] = -1; 6974 cmapt[i] = -1; 6975 rmapa[i] = NULL; 6976 cmapa[i] = NULL; 6977 } 6978 6979 /* customization */ 6980 PetscCall(PetscNew(&mmdata)); 6981 mmdata->reusesym = product->api_user; 6982 if (ptype == MATPRODUCT_AB) { 6983 if (product->api_user) { 6984 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 6985 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6986 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6987 PetscOptionsEnd(); 6988 } else { 6989 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 6990 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6991 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6992 PetscOptionsEnd(); 6993 } 6994 } else if (ptype == MATPRODUCT_PtAP) { 6995 if (product->api_user) { 6996 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 6997 PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6998 PetscOptionsEnd(); 6999 } else { 7000 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 7001 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7002 PetscOptionsEnd(); 7003 } 7004 } 7005 a = (Mat_MPIAIJ*)A->data; 7006 p = (Mat_MPIAIJ*)P->data; 7007 PetscCall(MatSetSizes(C,m,n,M,N)); 7008 PetscCall(PetscLayoutSetUp(C->rmap)); 7009 PetscCall(PetscLayoutSetUp(C->cmap)); 7010 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 7011 PetscCall(MatGetOptionsPrefix(C,&prefix)); 7012 7013 cp = 0; 7014 switch (ptype) { 7015 case MATPRODUCT_AB: /* A * P */ 7016 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7017 7018 /* A_diag * P_local (merged or not) */ 7019 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7020 /* P is product->B */ 7021 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7022 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7023 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7024 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7025 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7026 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7027 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7028 mp[cp]->product->api_user = product->api_user; 7029 PetscCall(MatProductSetFromOptions(mp[cp])); 7030 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7031 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7032 PetscCall(ISGetIndices(glob,&globidx)); 7033 rmapt[cp] = 1; 7034 cmapt[cp] = 2; 7035 cmapa[cp] = globidx; 7036 mptmp[cp] = PETSC_FALSE; 7037 cp++; 7038 } else { /* A_diag * P_diag and A_diag * P_off */ 7039 PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 7040 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7041 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7042 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7043 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7044 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7045 mp[cp]->product->api_user = product->api_user; 7046 PetscCall(MatProductSetFromOptions(mp[cp])); 7047 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7048 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7049 rmapt[cp] = 1; 7050 cmapt[cp] = 1; 7051 mptmp[cp] = PETSC_FALSE; 7052 cp++; 7053 PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 7054 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7055 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7056 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7057 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7058 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7059 mp[cp]->product->api_user = product->api_user; 7060 PetscCall(MatProductSetFromOptions(mp[cp])); 7061 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7062 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7063 rmapt[cp] = 1; 7064 cmapt[cp] = 2; 7065 cmapa[cp] = p->garray; 7066 mptmp[cp] = PETSC_FALSE; 7067 cp++; 7068 } 7069 7070 /* A_off * P_other */ 7071 if (mmdata->P_oth) { 7072 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 7073 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7074 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7075 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7076 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7077 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7078 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7079 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7080 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7081 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7082 mp[cp]->product->api_user = product->api_user; 7083 PetscCall(MatProductSetFromOptions(mp[cp])); 7084 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7085 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7086 rmapt[cp] = 1; 7087 cmapt[cp] = 2; 7088 cmapa[cp] = P_oth_idx; 7089 mptmp[cp] = PETSC_FALSE; 7090 cp++; 7091 } 7092 break; 7093 7094 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7095 /* A is product->B */ 7096 PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7097 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7098 PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 7099 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7100 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7101 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7102 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7103 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7104 mp[cp]->product->api_user = product->api_user; 7105 PetscCall(MatProductSetFromOptions(mp[cp])); 7106 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7107 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7108 PetscCall(ISGetIndices(glob,&globidx)); 7109 rmapt[cp] = 2; 7110 rmapa[cp] = globidx; 7111 cmapt[cp] = 2; 7112 cmapa[cp] = globidx; 7113 mptmp[cp] = PETSC_FALSE; 7114 cp++; 7115 } else { 7116 PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 7117 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7118 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7119 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7120 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7121 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7122 mp[cp]->product->api_user = product->api_user; 7123 PetscCall(MatProductSetFromOptions(mp[cp])); 7124 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7125 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7126 PetscCall(ISGetIndices(glob,&globidx)); 7127 rmapt[cp] = 1; 7128 cmapt[cp] = 2; 7129 cmapa[cp] = globidx; 7130 mptmp[cp] = PETSC_FALSE; 7131 cp++; 7132 PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 7133 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7134 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7135 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7136 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7137 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7138 mp[cp]->product->api_user = product->api_user; 7139 PetscCall(MatProductSetFromOptions(mp[cp])); 7140 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7141 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7142 rmapt[cp] = 2; 7143 rmapa[cp] = p->garray; 7144 cmapt[cp] = 2; 7145 cmapa[cp] = globidx; 7146 mptmp[cp] = PETSC_FALSE; 7147 cp++; 7148 } 7149 break; 7150 case MATPRODUCT_PtAP: 7151 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7152 /* P is product->B */ 7153 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7154 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7155 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 7156 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7157 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7158 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7159 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7160 mp[cp]->product->api_user = product->api_user; 7161 PetscCall(MatProductSetFromOptions(mp[cp])); 7162 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7163 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7164 PetscCall(ISGetIndices(glob,&globidx)); 7165 rmapt[cp] = 2; 7166 rmapa[cp] = globidx; 7167 cmapt[cp] = 2; 7168 cmapa[cp] = globidx; 7169 mptmp[cp] = PETSC_FALSE; 7170 cp++; 7171 if (mmdata->P_oth) { 7172 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 7173 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7174 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7175 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7176 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7177 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7178 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7179 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7180 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7181 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7182 mp[cp]->product->api_user = product->api_user; 7183 PetscCall(MatProductSetFromOptions(mp[cp])); 7184 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7185 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7186 mptmp[cp] = PETSC_TRUE; 7187 cp++; 7188 PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 7189 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7190 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7191 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7192 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7193 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7194 mp[cp]->product->api_user = product->api_user; 7195 PetscCall(MatProductSetFromOptions(mp[cp])); 7196 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7197 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7198 rmapt[cp] = 2; 7199 rmapa[cp] = globidx; 7200 cmapt[cp] = 2; 7201 cmapa[cp] = P_oth_idx; 7202 mptmp[cp] = PETSC_FALSE; 7203 cp++; 7204 } 7205 break; 7206 default: 7207 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7208 } 7209 /* sanity check */ 7210 if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7211 7212 PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7213 for (i = 0; i < cp; i++) { 7214 mmdata->mp[i] = mp[i]; 7215 mmdata->mptmp[i] = mptmp[i]; 7216 } 7217 mmdata->cp = cp; 7218 C->product->data = mmdata; 7219 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7220 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7221 7222 /* memory type */ 7223 mmdata->mtype = PETSC_MEMTYPE_HOST; 7224 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 7225 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7226 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7227 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7228 7229 /* prepare coo coordinates for values insertion */ 7230 7231 /* count total nonzeros of those intermediate seqaij Mats 7232 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7233 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7234 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7235 */ 7236 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7237 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7238 if (mptmp[cp]) continue; 7239 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7240 const PetscInt *rmap = rmapa[cp]; 7241 const PetscInt mr = mp[cp]->rmap->n; 7242 const PetscInt rs = C->rmap->rstart; 7243 const PetscInt re = C->rmap->rend; 7244 const PetscInt *ii = mm->i; 7245 for (i = 0; i < mr; i++) { 7246 const PetscInt gr = rmap[i]; 7247 const PetscInt nz = ii[i+1] - ii[i]; 7248 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7249 else ncoo_oown += nz; /* this row is local */ 7250 } 7251 } else ncoo_d += mm->nz; 7252 } 7253 7254 /* 7255 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7256 7257 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7258 7259 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7260 7261 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7262 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7263 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7264 7265 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7266 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7267 */ 7268 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 7269 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7270 7271 /* gather (i,j) of nonzeros inserted by remote procs */ 7272 if (hasoffproc) { 7273 PetscSF msf; 7274 PetscInt ncoo2,*coo_i2,*coo_j2; 7275 7276 PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 7277 PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 7278 PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7279 7280 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7281 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7282 PetscInt *idxoff = mmdata->off[cp]; 7283 PetscInt *idxown = mmdata->own[cp]; 7284 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7285 const PetscInt *rmap = rmapa[cp]; 7286 const PetscInt *cmap = cmapa[cp]; 7287 const PetscInt *ii = mm->i; 7288 PetscInt *coi = coo_i + ncoo_o; 7289 PetscInt *coj = coo_j + ncoo_o; 7290 const PetscInt mr = mp[cp]->rmap->n; 7291 const PetscInt rs = C->rmap->rstart; 7292 const PetscInt re = C->rmap->rend; 7293 const PetscInt cs = C->cmap->rstart; 7294 for (i = 0; i < mr; i++) { 7295 const PetscInt *jj = mm->j + ii[i]; 7296 const PetscInt gr = rmap[i]; 7297 const PetscInt nz = ii[i+1] - ii[i]; 7298 if (gr < rs || gr >= re) { /* this is an offproc row */ 7299 for (j = ii[i]; j < ii[i+1]; j++) { 7300 *coi++ = gr; 7301 *idxoff++ = j; 7302 } 7303 if (!cmapt[cp]) { /* already global */ 7304 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7305 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7306 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7307 } else { /* offdiag */ 7308 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7309 } 7310 ncoo_o += nz; 7311 } else { /* this is a local row */ 7312 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7313 } 7314 } 7315 } 7316 mmdata->off[cp + 1] = idxoff; 7317 mmdata->own[cp + 1] = idxown; 7318 } 7319 7320 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7321 PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 7322 PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 7323 PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 7324 ncoo = ncoo_d + ncoo_oown + ncoo2; 7325 PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 7326 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7327 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 7328 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7329 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7330 PetscCall(PetscFree2(coo_i,coo_j)); 7331 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7332 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 7333 coo_i = coo_i2; 7334 coo_j = coo_j2; 7335 } else { /* no offproc values insertion */ 7336 ncoo = ncoo_d; 7337 PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7338 7339 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7340 PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 7341 PetscCall(PetscSFSetUp(mmdata->sf)); 7342 } 7343 mmdata->hasoffproc = hasoffproc; 7344 7345 /* gather (i,j) of nonzeros inserted locally */ 7346 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7347 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7348 PetscInt *coi = coo_i + ncoo_d; 7349 PetscInt *coj = coo_j + ncoo_d; 7350 const PetscInt *jj = mm->j; 7351 const PetscInt *ii = mm->i; 7352 const PetscInt *cmap = cmapa[cp]; 7353 const PetscInt *rmap = rmapa[cp]; 7354 const PetscInt mr = mp[cp]->rmap->n; 7355 const PetscInt rs = C->rmap->rstart; 7356 const PetscInt re = C->rmap->rend; 7357 const PetscInt cs = C->cmap->rstart; 7358 7359 if (mptmp[cp]) continue; 7360 if (rmapt[cp] == 1) { /* consecutive rows */ 7361 /* fill coo_i */ 7362 for (i = 0; i < mr; i++) { 7363 const PetscInt gr = i + rs; 7364 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7365 } 7366 /* fill coo_j */ 7367 if (!cmapt[cp]) { /* type-0, already global */ 7368 PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7369 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7370 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7371 } else { /* type-2, local to global for sparse columns */ 7372 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7373 } 7374 ncoo_d += mm->nz; 7375 } else if (rmapt[cp] == 2) { /* sparse rows */ 7376 for (i = 0; i < mr; i++) { 7377 const PetscInt *jj = mm->j + ii[i]; 7378 const PetscInt gr = rmap[i]; 7379 const PetscInt nz = ii[i+1] - ii[i]; 7380 if (gr >= rs && gr < re) { /* local rows */ 7381 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7382 if (!cmapt[cp]) { /* type-0, already global */ 7383 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7384 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7385 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7386 } else { /* type-2, local to global for sparse columns */ 7387 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7388 } 7389 ncoo_d += nz; 7390 } 7391 } 7392 } 7393 } 7394 if (glob) { 7395 PetscCall(ISRestoreIndices(glob,&globidx)); 7396 } 7397 PetscCall(ISDestroy(&glob)); 7398 if (P_oth_l2g) { 7399 PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 7400 } 7401 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7402 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7403 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 7404 7405 /* preallocate with COO data */ 7406 PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 7407 PetscCall(PetscFree2(coo_i,coo_j)); 7408 PetscFunctionReturn(0); 7409 } 7410 7411 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7412 { 7413 Mat_Product *product = mat->product; 7414 #if defined(PETSC_HAVE_DEVICE) 7415 PetscBool match = PETSC_FALSE; 7416 PetscBool usecpu = PETSC_FALSE; 7417 #else 7418 PetscBool match = PETSC_TRUE; 7419 #endif 7420 7421 PetscFunctionBegin; 7422 MatCheckProduct(mat,1); 7423 #if defined(PETSC_HAVE_DEVICE) 7424 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7425 PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 7426 } 7427 if (match) { /* we can always fallback to the CPU if requested */ 7428 switch (product->type) { 7429 case MATPRODUCT_AB: 7430 if (product->api_user) { 7431 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat"); 7432 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7433 PetscOptionsEnd(); 7434 } else { 7435 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat"); 7436 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7437 PetscOptionsEnd(); 7438 } 7439 break; 7440 case MATPRODUCT_AtB: 7441 if (product->api_user) { 7442 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat"); 7443 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7444 PetscOptionsEnd(); 7445 } else { 7446 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat"); 7447 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7448 PetscOptionsEnd(); 7449 } 7450 break; 7451 case MATPRODUCT_PtAP: 7452 if (product->api_user) { 7453 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat"); 7454 PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7455 PetscOptionsEnd(); 7456 } else { 7457 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat"); 7458 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7459 PetscOptionsEnd(); 7460 } 7461 break; 7462 default: 7463 break; 7464 } 7465 match = (PetscBool)!usecpu; 7466 } 7467 #endif 7468 if (match) { 7469 switch (product->type) { 7470 case MATPRODUCT_AB: 7471 case MATPRODUCT_AtB: 7472 case MATPRODUCT_PtAP: 7473 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7474 break; 7475 default: 7476 break; 7477 } 7478 } 7479 /* fallback to MPIAIJ ops */ 7480 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7481 PetscFunctionReturn(0); 7482 } 7483 7484 /* 7485 Special version for direct calls from Fortran 7486 */ 7487 #include <petsc/private/fortranimpl.h> 7488 7489 /* Change these macros so can be used in void function */ 7490 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7491 #undef PetscCall 7492 #define PetscCall(...) do { \ 7493 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7494 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7495 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7496 return; \ 7497 } \ 7498 } while (0) 7499 7500 #undef SETERRQ 7501 #define SETERRQ(comm,ierr,...) do { \ 7502 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 7503 return; \ 7504 } while (0) 7505 7506 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7507 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7508 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7509 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7510 #else 7511 #endif 7512 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 7513 { 7514 Mat mat = *mmat; 7515 PetscInt m = *mm, n = *mn; 7516 InsertMode addv = *maddv; 7517 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 7518 PetscScalar value; 7519 7520 MatCheckPreallocated(mat,1); 7521 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 7522 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 7523 { 7524 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 7525 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 7526 PetscBool roworiented = aij->roworiented; 7527 7528 /* Some Variables required in the macro */ 7529 Mat A = aij->A; 7530 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 7531 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 7532 MatScalar *aa; 7533 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 7534 Mat B = aij->B; 7535 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 7536 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 7537 MatScalar *ba; 7538 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 7539 * cannot use "#if defined" inside a macro. */ 7540 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 7541 7542 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 7543 PetscInt nonew = a->nonew; 7544 MatScalar *ap1,*ap2; 7545 7546 PetscFunctionBegin; 7547 PetscCall(MatSeqAIJGetArray(A,&aa)); 7548 PetscCall(MatSeqAIJGetArray(B,&ba)); 7549 for (i=0; i<m; i++) { 7550 if (im[i] < 0) continue; 7551 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 7552 if (im[i] >= rstart && im[i] < rend) { 7553 row = im[i] - rstart; 7554 lastcol1 = -1; 7555 rp1 = aj + ai[row]; 7556 ap1 = aa + ai[row]; 7557 rmax1 = aimax[row]; 7558 nrow1 = ailen[row]; 7559 low1 = 0; 7560 high1 = nrow1; 7561 lastcol2 = -1; 7562 rp2 = bj + bi[row]; 7563 ap2 = ba + bi[row]; 7564 rmax2 = bimax[row]; 7565 nrow2 = bilen[row]; 7566 low2 = 0; 7567 high2 = nrow2; 7568 7569 for (j=0; j<n; j++) { 7570 if (roworiented) value = v[i*n+j]; 7571 else value = v[i+j*m]; 7572 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 7573 if (in[j] >= cstart && in[j] < cend) { 7574 col = in[j] - cstart; 7575 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 7576 } else if (in[j] < 0) continue; 7577 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 7578 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 7579 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 7580 } else { 7581 if (mat->was_assembled) { 7582 if (!aij->colmap) { 7583 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 7584 } 7585 #if defined(PETSC_USE_CTABLE) 7586 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 7587 col--; 7588 #else 7589 col = aij->colmap[in[j]] - 1; 7590 #endif 7591 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 7592 PetscCall(MatDisAssemble_MPIAIJ(mat)); 7593 col = in[j]; 7594 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 7595 B = aij->B; 7596 b = (Mat_SeqAIJ*)B->data; 7597 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 7598 rp2 = bj + bi[row]; 7599 ap2 = ba + bi[row]; 7600 rmax2 = bimax[row]; 7601 nrow2 = bilen[row]; 7602 low2 = 0; 7603 high2 = nrow2; 7604 bm = aij->B->rmap->n; 7605 ba = b->a; 7606 inserted = PETSC_FALSE; 7607 } 7608 } else col = in[j]; 7609 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 7610 } 7611 } 7612 } else if (!aij->donotstash) { 7613 if (roworiented) { 7614 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7615 } else { 7616 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7617 } 7618 } 7619 } 7620 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 7621 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 7622 } 7623 PetscFunctionReturnVoid(); 7624 } 7625 /* Undefining these here since they were redefined from their original definition above! No 7626 * other PETSc functions should be defined past this point, as it is impossible to recover the 7627 * original definitions */ 7628 #undef PetscCall 7629 #undef SETERRQ 7630